diff --git a/cpp/src/arrow/compute/type_fwd.h b/cpp/src/arrow/compute/type_fwd.h index 99fb60ef634..4b05ceccb5a 100644 --- a/cpp/src/arrow/compute/type_fwd.h +++ b/cpp/src/arrow/compute/type_fwd.h @@ -23,6 +23,8 @@ struct Datum; namespace compute { +struct CastOptions; + class ExecContext; class KernelContext; diff --git a/cpp/src/arrow/csv/options.h b/cpp/src/arrow/csv/options.h index e94f5fc9653..82153ed466a 100644 --- a/cpp/src/arrow/csv/options.h +++ b/cpp/src/arrow/csv/options.h @@ -23,6 +23,7 @@ #include #include +#include "arrow/csv/type_fwd.h" #include "arrow/util/visibility.h" namespace arrow { diff --git a/cpp/src/arrow/csv/type_fwd.h b/cpp/src/arrow/csv/type_fwd.h new file mode 100644 index 00000000000..17fcdbdcc56 --- /dev/null +++ b/cpp/src/arrow/csv/type_fwd.h @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +namespace arrow { +namespace csv { + +class TableReader; +struct ConvertOptions; +struct ReadOptions; +struct ParseOptions; + +} // namespace csv +} // namespace arrow diff --git a/cpp/src/arrow/dataset/type_fwd.h b/cpp/src/arrow/dataset/type_fwd.h index 73cfdff2b42..0ff77de0102 100644 --- a/cpp/src/arrow/dataset/type_fwd.h +++ b/cpp/src/arrow/dataset/type_fwd.h @@ -36,10 +36,11 @@ class ExecContext; namespace dataset { class Dataset; +class DatasetFactory; using DatasetVector = std::vector>; class UnionDataset; -class DatasetFactory; +class UnionDatasetFactory; class Fragment; using FragmentIterator = Iterator>; @@ -51,6 +52,7 @@ class FileFragment; class FileWriter; class FileWriteOptions; class FileSystemDataset; +class FileSystemDatasetFactory; struct FileSystemDatasetWriteOptions; class InMemoryDataset; @@ -77,6 +79,8 @@ const std::shared_ptr& scalar(bool); class Partitioning; class PartitioningFactory; class PartitioningOrFactory; +class DirectoryPartitioning; +class HivePartitioning; struct ScanContext; diff --git a/cpp/src/arrow/ipc/feather.h b/cpp/src/arrow/ipc/feather.h index b40893c408f..a32ff6d0a5a 100644 --- a/cpp/src/arrow/ipc/feather.h +++ b/cpp/src/arrow/ipc/feather.h @@ -128,7 +128,7 @@ struct ARROW_EXPORT WriteProperties { Compression::type compression = Compression::UNCOMPRESSED; /// Compressor-specific compression level - int compression_level = Compression::kUseDefaultCompressionLevel; + int compression_level = ::arrow::util::kUseDefaultCompressionLevel; }; ARROW_EXPORT diff --git a/cpp/src/arrow/json/options.h b/cpp/src/arrow/json/options.h index 03d46ad8430..d7edab9cedd 100644 --- a/cpp/src/arrow/json/options.h +++ b/cpp/src/arrow/json/options.h @@ -20,6 +20,7 @@ #include #include +#include "arrow/json/type_fwd.h" #include "arrow/util/visibility.h" namespace arrow { diff --git a/cpp/src/arrow/json/type_fwd.h b/cpp/src/arrow/json/type_fwd.h new file mode 100644 index 00000000000..67e2e1bb406 --- /dev/null +++ b/cpp/src/arrow/json/type_fwd.h @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +namespace arrow { +namespace json { + +class TableReader; +struct ReadOptions; +struct ParseOptions; + +} // namespace json +} // namespace arrow diff --git a/cpp/src/arrow/util/compression.h b/cpp/src/arrow/util/compression.h index d3e8e1e62f1..6c9a74c6d21 100644 --- a/cpp/src/arrow/util/compression.h +++ b/cpp/src/arrow/util/compression.h @@ -24,31 +24,13 @@ #include "arrow/result.h" #include "arrow/status.h" +#include "arrow/util/type_fwd.h" #include "arrow/util/visibility.h" namespace arrow { - -struct Compression { - /// \brief Compression algorithm - enum type { - UNCOMPRESSED, - SNAPPY, - GZIP, - BROTLI, - ZSTD, - LZ4, - LZ4_FRAME, - LZO, - BZ2, - LZ4_HADOOP - }; - - static constexpr int kUseDefaultCompressionLevel = std::numeric_limits::min(); -}; - namespace util { -constexpr int kUseDefaultCompressionLevel = Compression::kUseDefaultCompressionLevel; +constexpr int kUseDefaultCompressionLevel = std::numeric_limits::min(); /// \brief Streaming compressor interface /// diff --git a/cpp/src/arrow/util/type_fwd.h b/cpp/src/arrow/util/type_fwd.h index 3eabd0d2553..cda77d52c10 100644 --- a/cpp/src/arrow/util/type_fwd.h +++ b/cpp/src/arrow/util/type_fwd.h @@ -22,6 +22,8 @@ namespace arrow { template class Future; +class TimestampParser; + namespace internal { class Executor; @@ -29,4 +31,27 @@ class TaskGroup; class ThreadPool; } // namespace internal + +struct Compression { + /// \brief Compression algorithm + enum type { + UNCOMPRESSED, + SNAPPY, + GZIP, + BROTLI, + ZSTD, + LZ4, + LZ4_FRAME, + LZO, + BZ2, + LZ4_HADOOP + }; +}; + +namespace util { +class Compressor; +class Decompressor; +class Codec; +} // namespace util + } // namespace arrow diff --git a/cpp/src/parquet/arrow/generate_fuzz_corpus.cc b/cpp/src/parquet/arrow/generate_fuzz_corpus.cc index ec5accdeea0..33c3a1461b6 100644 --- a/cpp/src/parquet/arrow/generate_fuzz_corpus.cc +++ b/cpp/src/parquet/arrow/generate_fuzz_corpus.cc @@ -195,4 +195,4 @@ int Main(int argc, char** argv) { } // namespace arrow -int main(int argc, char** argv) { return arrow::Main(argc, argv); } +int main(int argc, char** argv) { return ::arrow::Main(argc, argv); } diff --git a/cpp/src/parquet/arrow/schema_internal.cc b/cpp/src/parquet/arrow/schema_internal.cc index 4d37519d39e..3d83a382d3f 100644 --- a/cpp/src/parquet/arrow/schema_internal.cc +++ b/cpp/src/parquet/arrow/schema_internal.cc @@ -19,8 +19,8 @@ #include "arrow/type.h" -using ArrowType = arrow::DataType; -using ArrowTypeId = arrow::Type; +using ArrowType = ::arrow::DataType; +using ArrowTypeId = ::arrow::Type; using ParquetType = parquet::Type; namespace parquet { diff --git a/cpp/src/parquet/column_writer.cc b/cpp/src/parquet/column_writer.cc index b68ad4537c4..db3f427572c 100644 --- a/cpp/src/parquet/column_writer.cc +++ b/cpp/src/parquet/column_writer.cc @@ -1129,7 +1129,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< if (maybe_parent_nulls) { ARROW_ASSIGN_OR_RAISE( bits_buffer_, - arrow::AllocateResizableBuffer( + ::arrow::AllocateResizableBuffer( BitUtil::BytesForBits(properties_->write_batch_size()), ctx->memory_pool)); bits_buffer_->ZeroPadding(); } @@ -1304,7 +1304,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< Result> MaybeReplaceValidity(std::shared_ptr array, int64_t new_null_count, - arrow::MemoryPool* memory_pool) { + ::arrow::MemoryPool* memory_pool) { if (bits_buffer_ == nullptr) { return array; } @@ -1320,7 +1320,7 @@ class TypedColumnWriterImpl : public ColumnWriterImpl, public TypedColumnWriter< RETURN_NOT_OK(::arrow::VisitArrayInline(*array, &slicer)); buffers[1] = slicer.buffer_; } - return arrow::MakeArray(std::make_shared( + return ::arrow::MakeArray(std::make_shared( array->type(), array->length(), std::move(buffers), new_null_count)); } @@ -1462,7 +1462,7 @@ Status TypedColumnWriterImpl::WriteArrowDictionary( auto WriteIndicesChunk = [&](int64_t offset, int64_t batch_size) { int64_t batch_num_values = 0; int64_t batch_num_spaced_values = 0; - int64_t null_count = arrow::kUnknownNullCount; + int64_t null_count = ::arrow::kUnknownNullCount; // Bits is not null for nullable values. At this point in the code we can't determine // if the leaf array has the same null values as any parents it might have had so we // need to recompute it from def levels. diff --git a/cpp/src/parquet/encoding.cc b/cpp/src/parquet/encoding.cc index 109f48ea63c..d0b58471c64 100644 --- a/cpp/src/parquet/encoding.cc +++ b/cpp/src/parquet/encoding.cc @@ -99,14 +99,14 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder { void Put(const T* buffer, int num_values) override; - void Put(const arrow::Array& values) override; + void Put(const ::arrow::Array& values) override; void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset) override { - PARQUET_ASSIGN_OR_THROW( - auto buffer, arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); + PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), + this->memory_pool())); T* data = reinterpret_cast(buffer->mutable_data()); - int num_valid_values = arrow::util::internal::SpacedCompress( + int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); } @@ -127,7 +127,7 @@ class PlainEncoder : public EncoderImpl, virtual public TypedEncoder { } protected: - arrow::BufferBuilder sink_; + ::arrow::BufferBuilder sink_; }; template @@ -145,7 +145,7 @@ inline void PlainEncoder::Put(const ByteArray* src, int num_value } template -void DirectPutImpl(const arrow::Array& values, arrow::BufferBuilder* sink) { +void DirectPutImpl(const ::arrow::Array& values, ::arrow::BufferBuilder* sink) { if (values.type_id() != ArrayType::TypeClass::type_id) { std::string type_name = ArrayType::TypeClass::type_name(); throw ParquetException("direct put to " + type_name + " from " + @@ -172,46 +172,46 @@ void DirectPutImpl(const arrow::Array& values, arrow::BufferBuilder* sink) { } template <> -void PlainEncoder::Put(const arrow::Array& values) { - DirectPutImpl(values, &sink_); +void PlainEncoder::Put(const ::arrow::Array& values) { + DirectPutImpl<::arrow::Int32Array>(values, &sink_); } template <> -void PlainEncoder::Put(const arrow::Array& values) { - DirectPutImpl(values, &sink_); +void PlainEncoder::Put(const ::arrow::Array& values) { + DirectPutImpl<::arrow::Int64Array>(values, &sink_); } template <> -void PlainEncoder::Put(const arrow::Array& values) { +void PlainEncoder::Put(const ::arrow::Array& values) { ParquetException::NYI("direct put to Int96"); } template <> -void PlainEncoder::Put(const arrow::Array& values) { - DirectPutImpl(values, &sink_); +void PlainEncoder::Put(const ::arrow::Array& values) { + DirectPutImpl<::arrow::FloatArray>(values, &sink_); } template <> -void PlainEncoder::Put(const arrow::Array& values) { - DirectPutImpl(values, &sink_); +void PlainEncoder::Put(const ::arrow::Array& values) { + DirectPutImpl<::arrow::DoubleArray>(values, &sink_); } template -void PlainEncoder::Put(const arrow::Array& values) { +void PlainEncoder::Put(const ::arrow::Array& values) { ParquetException::NYI("direct put of " + values.type()->ToString()); } -void AssertBinary(const arrow::Array& values) { - if (values.type_id() != arrow::Type::BINARY && - values.type_id() != arrow::Type::STRING) { +void AssertBinary(const ::arrow::Array& values) { + if (values.type_id() != ::arrow::Type::BINARY && + values.type_id() != ::arrow::Type::STRING) { throw ParquetException("Only BinaryArray and subclasses supported"); } } template <> -inline void PlainEncoder::Put(const arrow::Array& values) { +inline void PlainEncoder::Put(const ::arrow::Array& values) { AssertBinary(values); - const auto& data = checked_cast(values); + const auto& data = checked_cast(values); const int64_t total_bytes = data.value_offset(data.length()) - data.value_offset(0); PARQUET_THROW_NOT_OK(sink_.Reserve(total_bytes + data.length() * sizeof(uint32_t))); @@ -231,12 +231,12 @@ inline void PlainEncoder::Put(const arrow::Array& values) { } } -void AssertFixedSizeBinary(const arrow::Array& values, int type_length) { - if (values.type_id() != arrow::Type::FIXED_SIZE_BINARY && - values.type_id() != arrow::Type::DECIMAL) { +void AssertFixedSizeBinary(const ::arrow::Array& values, int type_length) { + if (values.type_id() != ::arrow::Type::FIXED_SIZE_BINARY && + values.type_id() != ::arrow::Type::DECIMAL) { throw ParquetException("Only FixedSizeBinaryArray and subclasses supported"); } - if (checked_cast(*values.type()).byte_width() != + if (checked_cast(*values.type()).byte_width() != type_length) { throw ParquetException("Size mismatch: " + values.type()->ToString() + " should have been " + std::to_string(type_length) + " wide"); @@ -244,9 +244,9 @@ void AssertFixedSizeBinary(const arrow::Array& values, int type_length) { } template <> -inline void PlainEncoder::Put(const arrow::Array& values) { +inline void PlainEncoder::Put(const ::arrow::Array& values) { AssertFixedSizeBinary(values, descr_->type_length()); - const auto& data = checked_cast(values); + const auto& data = checked_cast(values); if (data.null_count() == 0) { // no nulls, just dump the data @@ -296,32 +296,32 @@ class PlainEncoder : public EncoderImpl, virtual public BooleanEnco void PutSpaced(const bool* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset) override { - PARQUET_ASSIGN_OR_THROW( - auto buffer, arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); + PARQUET_ASSIGN_OR_THROW(auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), + this->memory_pool())); T* data = reinterpret_cast(buffer->mutable_data()); - int num_valid_values = arrow::util::internal::SpacedCompress( + int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); } - void Put(const arrow::Array& values) override { - if (values.type_id() != arrow::Type::BOOL) { + void Put(const ::arrow::Array& values) override { + if (values.type_id() != ::arrow::Type::BOOL) { throw ParquetException("direct put to boolean from " + values.type()->ToString() + " not supported"); } - const auto& data = checked_cast(values); + const auto& data = checked_cast(values); if (data.null_count() == 0) { PARQUET_THROW_NOT_OK(sink_.Reserve(BitUtil::BytesForBits(data.length()))); // no nulls, just dump the data - arrow::internal::CopyBitmap(data.data()->GetValues(1), data.offset(), - data.length(), sink_.mutable_data(), sink_.length()); + ::arrow::internal::CopyBitmap(data.data()->GetValues(1), data.offset(), + data.length(), sink_.mutable_data(), sink_.length()); sink_.UnsafeAdvance(data.length()); } else { auto n_valid = BitUtil::BytesForBits(data.length() - data.null_count()); PARQUET_THROW_NOT_OK(sink_.Reserve(n_valid)); - arrow::internal::FirstTimeBitmapWriter writer(sink_.mutable_data(), sink_.length(), - n_valid); + ::arrow::internal::FirstTimeBitmapWriter writer(sink_.mutable_data(), + sink_.length(), n_valid); for (int64_t i = 0; i < data.length(); i++) { if (data.IsValid(i)) { @@ -340,8 +340,8 @@ class PlainEncoder : public EncoderImpl, virtual public BooleanEnco private: int bits_available_; std::shared_ptr bits_buffer_; - arrow::BufferBuilder sink_; - arrow::BitUtil::BitWriter bit_writer_; + ::arrow::BufferBuilder sink_; + ::arrow::BitUtil::BitWriter bit_writer_; template void PutImpl(const SequenceType& src, int num_values); @@ -419,17 +419,17 @@ void PlainEncoder::Put(const std::vector& src, int num_values template struct DictEncoderTraits { using c_type = typename DType::c_type; - using MemoTableType = arrow::internal::ScalarMemoTable; + using MemoTableType = ::arrow::internal::ScalarMemoTable; }; template <> struct DictEncoderTraits { - using MemoTableType = arrow::internal::BinaryMemoTable; + using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>; }; template <> struct DictEncoderTraits { - using MemoTableType = arrow::internal::BinaryMemoTable; + using MemoTableType = ::arrow::internal::BinaryMemoTable<::arrow::BinaryBuilder>; }; // Initially 1024 elements @@ -465,7 +465,7 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { ++buffer; --buffer_len; - arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width()); + ::arrow::util::RleEncoder encoder(buffer, buffer_len, bit_width()); for (int32_t index : buffered_indices_) { if (!encoder.Put(index)) return -1; @@ -486,9 +486,9 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { // an extra "RleEncoder::MinBufferSize" bytes. These extra bytes won't be used // but not reserving them would cause the encoder to fail. return 1 + - arrow::util::RleEncoder::MaxBufferSize( + ::arrow::util::RleEncoder::MaxBufferSize( bit_width(), static_cast(buffered_indices_.size())) + - arrow::util::RleEncoder::MinBufferSize(bit_width()); + ::arrow::util::RleEncoder::MinBufferSize(bit_width()); } /// The minimum bit width required to encode the currently buffered indices. @@ -513,8 +513,8 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset) override { - arrow::internal::BitmapReader valid_bits_reader(valid_bits, valid_bits_offset, - num_values); + ::arrow::internal::BitmapReader valid_bits_reader(valid_bits, valid_bits_offset, + num_values); for (int32_t i = 0; i < num_values; i++) { if (valid_bits_reader.IsSet()) { Put(src[i]); @@ -525,18 +525,18 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { using TypedEncoder::Put; - void Put(const arrow::Array& values) override; - void PutDictionary(const arrow::Array& values) override; + void Put(const ::arrow::Array& values) override; + void PutDictionary(const ::arrow::Array& values) override; template - void PutIndicesTyped(const arrow::Array& data) { + void PutIndicesTyped(const ::arrow::Array& data) { auto values = data.data()->GetValues(1); size_t buffer_position = buffered_indices_.size(); buffered_indices_.resize(buffer_position + static_cast(data.length() - data.null_count())); if (data.null_count() > 0) { - arrow::internal::BitmapReader valid_bits_reader(data.null_bitmap_data(), - data.offset(), data.length()); + ::arrow::internal::BitmapReader valid_bits_reader(data.null_bitmap_data(), + data.offset(), data.length()); for (int64_t i = 0; i < data.length(); ++i) { if (valid_bits_reader.IsSet()) { buffered_indices_[buffer_position++] = static_cast(values[i]); @@ -550,20 +550,20 @@ class DictEncoderImpl : public EncoderImpl, virtual public DictEncoder { } } - void PutIndices(const arrow::Array& data) override { + void PutIndices(const ::arrow::Array& data) override { switch (data.type()->id()) { - case arrow::Type::UINT8: - case arrow::Type::INT8: - return PutIndicesTyped(data); - case arrow::Type::UINT16: - case arrow::Type::INT16: - return PutIndicesTyped(data); - case arrow::Type::UINT32: - case arrow::Type::INT32: - return PutIndicesTyped(data); - case arrow::Type::UINT64: - case arrow::Type::INT64: - return PutIndicesTyped(data); + case ::arrow::Type::UINT8: + case ::arrow::Type::INT8: + return PutIndicesTyped<::arrow::UInt8Type>(data); + case ::arrow::Type::UINT16: + case ::arrow::Type::INT16: + return PutIndicesTyped<::arrow::UInt16Type>(data); + case ::arrow::Type::UINT32: + case ::arrow::Type::INT32: + return PutIndicesTyped<::arrow::UInt32Type>(data); + case ::arrow::Type::UINT64: + case ::arrow::Type::INT64: + return PutIndicesTyped<::arrow::UInt64Type>(data); default: throw ParquetException("Passed non-integer array to PutIndices"); } @@ -608,7 +608,7 @@ void DictEncoderImpl::WriteDict(uint8_t* buffer) { // ByteArray and FLBA already have the dictionary encoded in their data heaps template <> void DictEncoderImpl::WriteDict(uint8_t* buffer) { - memo_table_.VisitValues(0, [&buffer](const arrow::util::string_view& v) { + memo_table_.VisitValues(0, [&buffer](const ::arrow::util::string_view& v) { uint32_t len = static_cast(v.length()); memcpy(buffer, &len, sizeof(len)); buffer += sizeof(len); @@ -619,7 +619,7 @@ void DictEncoderImpl::WriteDict(uint8_t* buffer) { template <> void DictEncoderImpl::WriteDict(uint8_t* buffer) { - memo_table_.VisitValues(0, [&](const arrow::util::string_view& v) { + memo_table_.VisitValues(0, [&](const ::arrow::util::string_view& v) { DCHECK_EQ(v.length(), static_cast(type_length_)); memcpy(buffer, v.data(), type_length_); buffer += type_length_; @@ -683,18 +683,18 @@ inline void DictEncoderImpl::Put(const FixedLenByteArray& v) { } template <> -void DictEncoderImpl::Put(const arrow::Array& values) { +void DictEncoderImpl::Put(const ::arrow::Array& values) { ParquetException::NYI("Direct put to Int96"); } template <> -void DictEncoderImpl::PutDictionary(const arrow::Array& values) { +void DictEncoderImpl::PutDictionary(const ::arrow::Array& values) { ParquetException::NYI("Direct put to Int96"); } template -void DictEncoderImpl::Put(const arrow::Array& values) { - using ArrayType = typename arrow::CTypeTraits::ArrayType; +void DictEncoderImpl::Put(const ::arrow::Array& values) { + using ArrayType = typename ::arrow::CTypeTraits::ArrayType; const auto& data = checked_cast(values); if (data.null_count() == 0) { // no nulls, just dump the data @@ -711,9 +711,9 @@ void DictEncoderImpl::Put(const arrow::Array& values) { } template <> -void DictEncoderImpl::Put(const arrow::Array& values) { +void DictEncoderImpl::Put(const ::arrow::Array& values) { AssertFixedSizeBinary(values, type_length_); - const auto& data = checked_cast(values); + const auto& data = checked_cast(values); if (data.null_count() == 0) { // no nulls, just dump the data for (int64_t i = 0; i < data.length(); i++) { @@ -730,9 +730,9 @@ void DictEncoderImpl::Put(const arrow::Array& values) { } template <> -void DictEncoderImpl::Put(const arrow::Array& values) { +void DictEncoderImpl::Put(const ::arrow::Array& values) { AssertBinary(values); - const auto& data = checked_cast(values); + const auto& data = checked_cast(values); if (data.null_count() == 0) { // no nulls, just dump the data for (int64_t i = 0; i < data.length(); i++) { @@ -750,7 +750,7 @@ void DictEncoderImpl::Put(const arrow::Array& values) { } template -void AssertCanPutDictionary(DictEncoderImpl* encoder, const arrow::Array& dict) { +void AssertCanPutDictionary(DictEncoderImpl* encoder, const ::arrow::Array& dict) { if (dict.null_count() > 0) { throw ParquetException("Inserted dictionary cannot cannot contain nulls"); } @@ -761,10 +761,10 @@ void AssertCanPutDictionary(DictEncoderImpl* encoder, const arrow::Array& } template -void DictEncoderImpl::PutDictionary(const arrow::Array& values) { +void DictEncoderImpl::PutDictionary(const ::arrow::Array& values) { AssertCanPutDictionary(this, values); - using ArrayType = typename arrow::CTypeTraits::ArrayType; + using ArrayType = typename ::arrow::CTypeTraits::ArrayType; const auto& data = checked_cast(values); dict_encoded_size_ += static_cast(sizeof(typename DType::c_type) * data.length()); @@ -775,11 +775,11 @@ void DictEncoderImpl::PutDictionary(const arrow::Array& values) { } template <> -void DictEncoderImpl::PutDictionary(const arrow::Array& values) { +void DictEncoderImpl::PutDictionary(const ::arrow::Array& values) { AssertFixedSizeBinary(values, type_length_); AssertCanPutDictionary(this, values); - const auto& data = checked_cast(values); + const auto& data = checked_cast(values); dict_encoded_size_ += static_cast(type_length_ * data.length()); for (int64_t i = 0; i < data.length(); i++) { @@ -790,11 +790,11 @@ void DictEncoderImpl::PutDictionary(const arrow::Array& values) { } template <> -void DictEncoderImpl::PutDictionary(const arrow::Array& values) { +void DictEncoderImpl::PutDictionary(const ::arrow::Array& values) { AssertBinary(values); AssertCanPutDictionary(this, values); - const auto& data = checked_cast(values); + const auto& data = checked_cast(values); for (int64_t i = 0; i < data.length(); i++) { auto v = data.GetView(i); @@ -822,15 +822,15 @@ class ByteStreamSplitEncoder : public EncoderImpl, virtual public TypedEncoder FlushValues() override; void Put(const T* buffer, int num_values) override; - void Put(const arrow::Array& values) override; + void Put(const ::arrow::Array& values) override; void PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset) override; protected: - arrow::TypedBufferBuilder values_; + ::arrow::TypedBufferBuilder values_; private: - void PutArrowArray(const arrow::Array& values); + void PutArrowArray(const ::arrow::Array& values); }; template @@ -850,8 +850,8 @@ std::shared_ptr ByteStreamSplitEncoder::FlushValues() { uint8_t* output_buffer_raw = output_buffer->mutable_data(); const size_t num_values = values_.length(); const uint8_t* raw_values = reinterpret_cast(values_.data()); - arrow::util::internal::ByteStreamSplitEncode(raw_values, num_values, - output_buffer_raw); + ::arrow::util::internal::ByteStreamSplitEncode(raw_values, num_values, + output_buffer_raw); values_.Reset(); return std::move(output_buffer); } @@ -868,14 +868,14 @@ void ByteStreamSplitEncoder::Put(const ::arrow::Array& values) { template <> void ByteStreamSplitEncoder::PutArrowArray(const ::arrow::Array& values) { - DirectPutImpl(values, - reinterpret_cast(&values_)); + DirectPutImpl<::arrow::FloatArray>(values, + reinterpret_cast<::arrow::BufferBuilder*>(&values_)); } template <> void ByteStreamSplitEncoder::PutArrowArray(const ::arrow::Array& values) { - DirectPutImpl(values, - reinterpret_cast(&values_)); + DirectPutImpl<::arrow::DoubleArray>( + values, reinterpret_cast<::arrow::BufferBuilder*>(&values_)); } template @@ -883,9 +883,9 @@ void ByteStreamSplitEncoder::PutSpaced(const T* src, int num_values, const uint8_t* valid_bits, int64_t valid_bits_offset) { PARQUET_ASSIGN_OR_THROW( - auto buffer, arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); + auto buffer, ::arrow::AllocateBuffer(num_values * sizeof(T), this->memory_pool())); T* data = reinterpret_cast(buffer->mutable_data()); - int num_valid_values = arrow::util::internal::SpacedCompress( + int num_valid_values = ::arrow::util::internal::SpacedCompress( src, num_values, valid_bits, valid_bits_offset, data); Put(data, num_valid_values); } @@ -970,7 +970,7 @@ int PlainDecoder::DecodeArrow( VisitNullBitmapInline( valid_bits, valid_bits_offset, num_values, null_count, [&]() { - builder->UnsafeAppend(arrow::util::SafeLoadAs(data_)); + builder->UnsafeAppend(::arrow::util::SafeLoadAs(data_)); data_ += sizeof(value_type); }, [&]() { builder->UnsafeAppendNull(); }); @@ -997,7 +997,8 @@ int PlainDecoder::DecodeArrow( VisitNullBitmapInline( valid_bits, valid_bits_offset, num_values, null_count, [&]() { - PARQUET_THROW_NOT_OK(builder->Append(arrow::util::SafeLoadAs(data_))); + PARQUET_THROW_NOT_OK( + builder->Append(::arrow::util::SafeLoadAs(data_))); data_ += sizeof(value_type); }, [&]() { PARQUET_THROW_NOT_OK(builder->AppendNull()); }); @@ -1040,7 +1041,7 @@ static inline int64_t ReadByteArray(const uint8_t* data, int64_t data_size, if (ARROW_PREDICT_FALSE(data_size < 4)) { ParquetException::EofException(); } - const int32_t len = arrow::util::SafeLoadAs(data); + const int32_t len = ::arrow::util::SafeLoadAs(data); if (len < 0) { throw ParquetException("Invalid BYTE_ARRAY value"); } @@ -1115,7 +1116,7 @@ class PlainBooleanDecoder : public DecoderImpl, typename EncodingTraits::DictAccumulator* out) override; private: - std::unique_ptr bit_reader_; + std::unique_ptr<::arrow::BitUtil::BitReader> bit_reader_; }; PlainBooleanDecoder::PlainBooleanDecoder(const ColumnDescriptor* descr) @@ -1158,7 +1159,7 @@ inline int PlainBooleanDecoder::DecodeArrow( int PlainBooleanDecoder::Decode(uint8_t* buffer, int max_values) { max_values = std::min(max_values, num_values_); bool val; - arrow::internal::BitmapWriter bit_writer(buffer, 0, max_values); + ::arrow::internal::BitmapWriter bit_writer(buffer, 0, max_values); for (int i = 0; i < max_values; ++i) { if (!bit_reader_->GetValue(1, &val)) { ParquetException::EofException(); @@ -1215,7 +1216,7 @@ struct ArrowBinaryHelper { Status AppendNull() { return builder->AppendNull(); } typename EncodingTraits::Accumulator* out; - arrow::BinaryBuilder* builder; + ::arrow::BinaryBuilder* builder; int64_t chunk_space_remaining; }; @@ -1293,7 +1294,7 @@ class PlainByteArrayDecoder : public PlainDecoder, int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, - arrow::BinaryDictionary32Builder* builder) override { + ::arrow::BinaryDictionary32Builder* builder) override { int result = 0; PARQUET_THROW_NOT_OK(DecodeArrow(num_values, null_count, valid_bits, valid_bits_offset, builder, &result)); @@ -1331,7 +1332,7 @@ class PlainByteArrayDecoder : public PlainDecoder, if (ARROW_PREDICT_FALSE(len_ < 4)) { ParquetException::EofException(); } - auto value_len = arrow::util::SafeLoadAs(data_); + auto value_len = ::arrow::util::SafeLoadAs(data_); if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) { return Status::Invalid("Invalid or corrupted value_len '", value_len, "'"); } @@ -1377,7 +1378,7 @@ class PlainByteArrayDecoder : public PlainDecoder, if (ARROW_PREDICT_FALSE(len_ < 4)) { ParquetException::EofException(); } - auto value_len = arrow::util::SafeLoadAs(data_); + auto value_len = ::arrow::util::SafeLoadAs(data_); if (ARROW_PREDICT_FALSE(value_len < 0 || value_len > INT32_MAX - 4)) { return Status::Invalid("Invalid or corrupted value_len '", value_len, "'"); } @@ -1417,7 +1418,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { // dictionary is not guaranteed to persist in memory after this call so the // dictionary decoder needs to copy the data out if necessary. explicit DictDecoderImpl(const ColumnDescriptor* descr, - MemoryPool* pool = arrow::default_memory_pool()) + MemoryPool* pool = ::arrow::default_memory_pool()) : DecoderImpl(descr, Encoding::RLE_DICTIONARY), dictionary_(AllocateBuffer(pool, 0)), dictionary_length_(0), @@ -1432,14 +1433,14 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { num_values_ = num_values; if (len == 0) { // Initialize dummy decoder to avoid crashes later on - idx_decoder_ = arrow::util::RleDecoder(data, len, /*bit_width=*/1); + idx_decoder_ = ::arrow::util::RleDecoder(data, len, /*bit_width=*/1); return; } uint8_t bit_width = *data; if (ARROW_PREDICT_FALSE(bit_width >= 64)) { throw ParquetException("Invalid or corrupted bit_width"); } - idx_decoder_ = arrow::util::RleDecoder(++data, --len, bit_width); + idx_decoder_ = ::arrow::util::RleDecoder(++data, --len, bit_width); } int Decode(T* buffer, int num_values) override { @@ -1475,11 +1476,11 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { int64_t valid_bits_offset, typename EncodingTraits::DictAccumulator* out) override; - void InsertDictionary(arrow::ArrayBuilder* builder) override; + void InsertDictionary(::arrow::ArrayBuilder* builder) override; int DecodeIndicesSpaced(int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, - arrow::ArrayBuilder* builder) override { + ::arrow::ArrayBuilder* builder) override { if (num_values > 0) { // TODO(wesm): Refactor to batch reads for improved memory use. It is not // trivial because the null_count is relative to the entire bitmap @@ -1497,20 +1498,20 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { /// XXX(wesm): Cannot append "valid bits" directly to the builder std::vector valid_bytes(num_values); - arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); + ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); for (int64_t i = 0; i < num_values; ++i) { valid_bytes[i] = static_cast(bit_reader.IsSet()); bit_reader.Next(); } - auto binary_builder = checked_cast(builder); + auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder); PARQUET_THROW_NOT_OK( binary_builder->AppendIndices(indices_buffer, num_values, valid_bytes.data())); num_values_ -= num_values - null_count; return num_values - null_count; } - int DecodeIndices(int num_values, arrow::ArrayBuilder* builder) override { + int DecodeIndices(int num_values, ::arrow::ArrayBuilder* builder) override { num_values = std::min(num_values, num_values_); if (num_values > 0) { // TODO(wesm): Refactor to batch reads for improved memory use. This is @@ -1524,7 +1525,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { if (num_values != idx_decoder_.GetBatch(indices_buffer, num_values)) { ParquetException::EofException(); } - auto binary_builder = checked_cast(builder); + auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder); PARQUET_THROW_NOT_OK(binary_builder->AppendIndices(indices_buffer, num_values)); num_values_ -= num_values; return num_values; @@ -1566,7 +1567,7 @@ class DictDecoderImpl : public DecoderImpl, virtual public DictDecoder { // BinaryDictionary32Builder std::shared_ptr indices_scratch_space_; - arrow::util::RleDecoder idx_decoder_; + ::arrow::util::RleDecoder idx_decoder_; }; template @@ -1718,9 +1719,9 @@ int DictDecoderImpl::DecodeArrow( int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, typename EncodingTraits::DictAccumulator* builder) { auto value_type = - checked_cast(*builder->type()).value_type(); + checked_cast(*builder->type()).value_type(); auto byte_width = - checked_cast(*value_type).byte_width(); + checked_cast(*value_type).byte_width(); if (byte_width != descr_->type_length()) { throw ParquetException("Byte width mismatch: builder was " + std::to_string(byte_width) + " but decoder was " + @@ -1771,17 +1772,17 @@ int DictDecoderImpl::DecodeArrow( } template -void DictDecoderImpl::InsertDictionary(arrow::ArrayBuilder* builder) { +void DictDecoderImpl::InsertDictionary(::arrow::ArrayBuilder* builder) { ParquetException::NYI("InsertDictionary only implemented for BYTE_ARRAY types"); } template <> -void DictDecoderImpl::InsertDictionary(arrow::ArrayBuilder* builder) { - auto binary_builder = checked_cast(builder); +void DictDecoderImpl::InsertDictionary(::arrow::ArrayBuilder* builder) { + auto binary_builder = checked_cast<::arrow::BinaryDictionary32Builder*>(builder); // Make a BinaryArray referencing the internal dictionary data - auto arr = std::make_shared(dictionary_length_, byte_array_offsets_, - byte_array_data_); + auto arr = std::make_shared<::arrow::BinaryArray>( + dictionary_length_, byte_array_offsets_, byte_array_data_); PARQUET_THROW_NOT_OK(binary_builder->InsertMemoValues(*arr)); } @@ -1793,7 +1794,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, int DecodeArrow(int num_values, int null_count, const uint8_t* valid_bits, int64_t valid_bits_offset, - arrow::BinaryDictionary32Builder* builder) override { + ::arrow::BinaryDictionary32Builder* builder) override { int result = 0; if (null_count == 0) { PARQUET_THROW_NOT_OK(DecodeArrowNonNull(num_values, builder, &result)); @@ -1827,7 +1828,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, ArrowBinaryHelper helper(out); - arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); + ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); auto dict_values = reinterpret_cast(dictionary_->data()); int values_decoded = 0; @@ -1918,7 +1919,7 @@ class DictByteArrayDecoderImpl : public DictDecoderImpl, int32_t indices[kBufferSize]; RETURN_NOT_OK(builder->Reserve(num_values)); - arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); + ::arrow::internal::BitmapReader bit_reader(valid_bits, valid_bits_offset, num_values); auto dict_values = reinterpret_cast(dictionary_->data()); @@ -2002,7 +2003,7 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecodernum_values_ = num_values; - decoder_ = arrow::BitUtil::BitReader(data, len); + decoder_ = ::arrow::BitUtil::BitReader(data, len); values_current_block_ = 0; values_current_mini_block_ = 0; } @@ -2103,7 +2104,7 @@ class DeltaBitPackDecoder : public DecoderImpl, virtual public TypedDecoder { public: explicit DeltaLengthByteArrayDecoder(const ColumnDescriptor* descr, - MemoryPool* pool = arrow::default_memory_pool()) + MemoryPool* pool = ::arrow::default_memory_pool()) : DecoderImpl(descr, Encoding::DELTA_LENGTH_BYTE_ARRAY), len_decoder_(nullptr, pool), pool_(pool) {} @@ -2132,7 +2133,7 @@ class DeltaLengthByteArrayDecoder : public DecoderImpl, void SetData(int num_values, const uint8_t* data, int len) override { num_values_ = num_values; if (len == 0) return; - int total_lengths_len = arrow::util::SafeLoadAs(data); + int total_lengths_len = ::arrow::util::SafeLoadAs(data); data += 4; this->len_decoder_.SetData(num_values, data, total_lengths_len); data_ = data + total_lengths_len; @@ -2178,7 +2179,7 @@ class DeltaByteArrayDecoder : public DecoderImpl, virtual public TypedDecoder { public: explicit DeltaByteArrayDecoder(const ColumnDescriptor* descr, - MemoryPool* pool = arrow::default_memory_pool()) + MemoryPool* pool = ::arrow::default_memory_pool()) : DecoderImpl(descr, Encoding::DELTA_BYTE_ARRAY), prefix_len_decoder_(nullptr, pool), suffix_decoder_(nullptr, pool), @@ -2187,7 +2188,7 @@ class DeltaByteArrayDecoder : public DecoderImpl, virtual void SetData(int num_values, const uint8_t* data, int len) { num_values_ = num_values; if (len == 0) return; - int prefix_len_length = arrow::util::SafeLoadAs(data); + int prefix_len_length = ::arrow::util::SafeLoadAs(data); data += 4; len -= 4; prefix_len_decoder_.SetData(num_values, data, prefix_len_length); @@ -2280,8 +2281,8 @@ int ByteStreamSplitDecoder::Decode(T* buffer, int max_values) { const int num_decoded_previously = num_values_in_buffer_ - num_values_; const uint8_t* data = data_ + num_decoded_previously; - arrow::util::internal::ByteStreamSplitDecode(data, values_to_decode, - num_values_in_buffer_, buffer); + ::arrow::util::internal::ByteStreamSplitDecode(data, values_to_decode, + num_values_in_buffer_, buffer); num_values_ -= values_to_decode; len_ -= sizeof(T) * values_to_decode; return values_to_decode; @@ -2307,8 +2308,8 @@ int ByteStreamSplitDecoder::DecodeArrow( // Use fast decoding into intermediate buffer. This will also decode // some null values, but it's fast enough that we don't care. T* decode_out = EnsureDecodeBuffer(values_decoded); - arrow::util::internal::ByteStreamSplitDecode(data, values_decoded, - num_values_in_buffer_, decode_out); + ::arrow::util::internal::ByteStreamSplitDecode(data, values_decoded, + num_values_in_buffer_, decode_out); // XXX If null_count is 0, we could even append in bulk or decode directly into // builder @@ -2329,7 +2330,7 @@ int ByteStreamSplitDecoder::DecodeArrow( const size_t byte_index = b * num_values_in_buffer_ + offset; gathered_byte_data[b] = data[byte_index]; } - builder->UnsafeAppend(arrow::util::SafeLoadAs(&gathered_byte_data[0])); + builder->UnsafeAppend(::arrow::util::SafeLoadAs(&gathered_byte_data[0])); ++offset; }, [&]() { builder->UnsafeAppendNull(); }); diff --git a/cpp/src/parquet/encoding_benchmark.cc b/cpp/src/parquet/encoding_benchmark.cc index c12ce98a639..8e409c5e429 100644 --- a/cpp/src/parquet/encoding_benchmark.cc +++ b/cpp/src/parquet/encoding_benchmark.cc @@ -202,7 +202,7 @@ BENCHMARK(BM_PlainDecodingFloat)->Range(MIN_RANGE, MAX_RANGE); template struct BM_SpacedEncodingTraits { using ArrowType = typename EncodingTraits::ArrowType; - using ArrayType = typename arrow::TypeTraits::ArrayType; + using ArrayType = typename ::arrow::TypeTraits::ArrayType; using CType = typename ParquetType::c_type; }; @@ -236,7 +236,7 @@ static void BM_PlainEncodingSpaced(benchmark::State& state) { auto rand = ::arrow::random::RandomArrayGenerator(1923); const auto array = rand.Numeric(num_values, -100, 100, null_percent); const auto valid_bits = array->null_bitmap_data(); - const auto array_actual = arrow::internal::checked_pointer_cast(array); + const auto array_actual = ::arrow::internal::checked_pointer_cast(array); const auto raw_values = array_actual->raw_values(); // Guarantee the type cast between raw_values and input of PutSpaced. static_assert(sizeof(CType) == sizeof(*raw_values), "Type mismatch"); @@ -281,7 +281,7 @@ static void BM_PlainDecodingSpaced(benchmark::State& state) { const auto array = rand.Numeric(num_values, -100, 100, null_percent); const auto valid_bits = array->null_bitmap_data(); const int null_count = static_cast(array->null_count()); - const auto array_actual = arrow::internal::checked_pointer_cast(array); + const auto array_actual = ::arrow::internal::checked_pointer_cast(array); const auto raw_values = array_actual->raw_values(); // Guarantee the type cast between raw_values and input of PutSpaced. static_assert(sizeof(CType) == sizeof(*raw_values), "Type mismatch"); @@ -348,22 +348,22 @@ static void BM_ByteStreamSplitEncode(benchmark::State& state, EncodeFunc&& encod static void BM_ByteStreamSplitDecode_Float_Scalar(benchmark::State& state) { BM_ByteStreamSplitDecode( - state, arrow::util::internal::ByteStreamSplitDecodeScalar); + state, ::arrow::util::internal::ByteStreamSplitDecodeScalar); } static void BM_ByteStreamSplitDecode_Double_Scalar(benchmark::State& state) { BM_ByteStreamSplitDecode( - state, arrow::util::internal::ByteStreamSplitDecodeScalar); + state, ::arrow::util::internal::ByteStreamSplitDecodeScalar); } static void BM_ByteStreamSplitEncode_Float_Scalar(benchmark::State& state) { BM_ByteStreamSplitEncode( - state, arrow::util::internal::ByteStreamSplitEncodeScalar); + state, ::arrow::util::internal::ByteStreamSplitEncodeScalar); } static void BM_ByteStreamSplitEncode_Double_Scalar(benchmark::State& state) { BM_ByteStreamSplitEncode( - state, arrow::util::internal::ByteStreamSplitEncodeScalar); + state, ::arrow::util::internal::ByteStreamSplitEncodeScalar); } BENCHMARK(BM_ByteStreamSplitDecode_Float_Scalar)->Range(MIN_RANGE, MAX_RANGE); @@ -374,22 +374,22 @@ BENCHMARK(BM_ByteStreamSplitEncode_Double_Scalar)->Range(MIN_RANGE, MAX_RANGE); #if defined(ARROW_HAVE_SSE4_2) static void BM_ByteStreamSplitDecode_Float_Sse2(benchmark::State& state) { BM_ByteStreamSplitDecode( - state, arrow::util::internal::ByteStreamSplitDecodeSse2); + state, ::arrow::util::internal::ByteStreamSplitDecodeSse2); } static void BM_ByteStreamSplitDecode_Double_Sse2(benchmark::State& state) { BM_ByteStreamSplitDecode( - state, arrow::util::internal::ByteStreamSplitDecodeSse2); + state, ::arrow::util::internal::ByteStreamSplitDecodeSse2); } static void BM_ByteStreamSplitEncode_Float_Sse2(benchmark::State& state) { BM_ByteStreamSplitEncode( - state, arrow::util::internal::ByteStreamSplitEncodeSse2); + state, ::arrow::util::internal::ByteStreamSplitEncodeSse2); } static void BM_ByteStreamSplitEncode_Double_Sse2(benchmark::State& state) { BM_ByteStreamSplitEncode( - state, arrow::util::internal::ByteStreamSplitEncodeSse2); + state, ::arrow::util::internal::ByteStreamSplitEncodeSse2); } BENCHMARK(BM_ByteStreamSplitDecode_Float_Sse2)->Range(MIN_RANGE, MAX_RANGE); @@ -401,22 +401,22 @@ BENCHMARK(BM_ByteStreamSplitEncode_Double_Sse2)->Range(MIN_RANGE, MAX_RANGE); #if defined(ARROW_HAVE_AVX2) static void BM_ByteStreamSplitDecode_Float_Avx2(benchmark::State& state) { BM_ByteStreamSplitDecode( - state, arrow::util::internal::ByteStreamSplitDecodeAvx2); + state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2); } static void BM_ByteStreamSplitDecode_Double_Avx2(benchmark::State& state) { BM_ByteStreamSplitDecode( - state, arrow::util::internal::ByteStreamSplitDecodeAvx2); + state, ::arrow::util::internal::ByteStreamSplitDecodeAvx2); } static void BM_ByteStreamSplitEncode_Float_Avx2(benchmark::State& state) { BM_ByteStreamSplitEncode( - state, arrow::util::internal::ByteStreamSplitEncodeAvx2); + state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2); } static void BM_ByteStreamSplitEncode_Double_Avx2(benchmark::State& state) { BM_ByteStreamSplitEncode( - state, arrow::util::internal::ByteStreamSplitEncodeAvx2); + state, ::arrow::util::internal::ByteStreamSplitEncodeAvx2); } BENCHMARK(BM_ByteStreamSplitDecode_Float_Avx2)->Range(MIN_RANGE, MAX_RANGE); @@ -428,22 +428,22 @@ BENCHMARK(BM_ByteStreamSplitEncode_Double_Avx2)->Range(MIN_RANGE, MAX_RANGE); #if defined(ARROW_HAVE_AVX512) static void BM_ByteStreamSplitDecode_Float_Avx512(benchmark::State& state) { BM_ByteStreamSplitDecode( - state, arrow::util::internal::ByteStreamSplitDecodeAvx512); + state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512); } static void BM_ByteStreamSplitDecode_Double_Avx512(benchmark::State& state) { BM_ByteStreamSplitDecode( - state, arrow::util::internal::ByteStreamSplitDecodeAvx512); + state, ::arrow::util::internal::ByteStreamSplitDecodeAvx512); } static void BM_ByteStreamSplitEncode_Float_Avx512(benchmark::State& state) { BM_ByteStreamSplitEncode( - state, arrow::util::internal::ByteStreamSplitEncodeAvx512); + state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512); } static void BM_ByteStreamSplitEncode_Double_Avx512(benchmark::State& state) { BM_ByteStreamSplitEncode( - state, arrow::util::internal::ByteStreamSplitEncodeAvx512); + state, ::arrow::util::internal::ByteStreamSplitEncodeAvx512); } BENCHMARK(BM_ByteStreamSplitDecode_Float_Avx512)->Range(MIN_RANGE, MAX_RANGE); diff --git a/cpp/src/parquet/encoding_test.cc b/cpp/src/parquet/encoding_test.cc index 6766c95dcdd..7e440fa9a03 100644 --- a/cpp/src/parquet/encoding_test.cc +++ b/cpp/src/parquet/encoding_test.cc @@ -60,7 +60,7 @@ TEST(VectorBooleanTest, TestEncodeDecode) { int nbytes = static_cast(BitUtil::BytesForBits(nvalues)); std::vector draws; - arrow::random_is_valid(nvalues, 0.5 /* null prob */, &draws, 0 /* seed */); + ::arrow::random_is_valid(nvalues, 0.5 /* null prob */, &draws, 0 /* seed */); std::unique_ptr encoder = MakeTypedEncoder(Encoding::PLAIN); @@ -81,7 +81,7 @@ TEST(VectorBooleanTest, TestEncodeDecode) { ASSERT_EQ(nvalues, values_decoded); for (int i = 0; i < nvalues; ++i) { - ASSERT_EQ(draws[i], arrow::BitUtil::GetBit(decode_data, i)) << i; + ASSERT_EQ(draws[i], ::arrow::BitUtil::GetBit(decode_data, i)) << i; } } @@ -341,7 +341,7 @@ class TestDictionaryEncoding : public TestEncodingBase { static constexpr int TYPE = Type::type_num; void CheckRoundtrip() { - std::vector valid_bits(arrow::BitUtil::BytesForBits(num_values_) + 1, 255); + std::vector valid_bits(::arrow::BitUtil::BytesForBits(num_values_) + 1, 255); auto base_encoder = MakeEncoder(Type::type_num, Encoding::PLAIN, true, descr_.get()); auto encoder = @@ -408,8 +408,8 @@ TEST(TestDictionaryEncoding, CannotDictDecodeBoolean) { class TestArrowBuilderDecoding : public ::testing::Test { public: - using DenseBuilder = arrow::internal::ChunkedBinaryBuilder; - using DictBuilder = arrow::BinaryDictionary32Builder; + using DenseBuilder = ::arrow::internal::ChunkedBinaryBuilder; + using DictBuilder = ::arrow::BinaryDictionary32Builder; void SetUp() override { null_probabilities_ = {0.0, 0.5, 1.0}; } void TearDown() override {} @@ -424,7 +424,7 @@ class TestArrowBuilderDecoding : public ::testing::Test { constexpr int repeat = 100; constexpr int64_t min_length = 2; constexpr int64_t max_length = 10; - arrow::random::RandomArrayGenerator rag(0); + ::arrow::random::RandomArrayGenerator rag(0); expected_dense_ = rag.BinaryWithRepeats(repeat * num_unique, num_unique, min_length, max_length, null_probability); @@ -437,7 +437,7 @@ class TestArrowBuilderDecoding : public ::testing::Test { ASSERT_OK(builder->Finish(&expected_dict_)); // Initialize input_data_ for the encoder from the expected_array_ values - const auto& binary_array = static_cast(*expected_dense_); + const auto& binary_array = static_cast(*expected_dense_); input_data_.resize(binary_array.length()); for (int64_t i = 0; i < binary_array.length(); ++i) { @@ -454,7 +454,7 @@ class TestArrowBuilderDecoding : public ::testing::Test { // Setup encoder/decoder pair for testing with virtual void SetupEncoderDecoder() = 0; - void CheckDense(int actual_num_values, const arrow::Array& chunk) { + void CheckDense(int actual_num_values, const ::arrow::Array& chunk) { ASSERT_EQ(actual_num_values, num_values_ - null_count_); ASSERT_ARRAYS_EQUAL(chunk, *expected_dense_); } @@ -462,7 +462,7 @@ class TestArrowBuilderDecoding : public ::testing::Test { template void CheckDict(int actual_num_values, Builder& builder) { ASSERT_EQ(actual_num_values, num_values_ - null_count_); - std::shared_ptr actual; + std::shared_ptr<::arrow::Array> actual; ASSERT_OK(builder.Finish(&actual)); ASSERT_ARRAYS_EQUAL(*actual, *expected_dict_); } @@ -517,8 +517,8 @@ class TestArrowBuilderDecoding : public ::testing::Test { protected: std::vector null_probabilities_; - std::shared_ptr expected_dict_; - std::shared_ptr expected_dense_; + std::shared_ptr<::arrow::Array> expected_dict_; + std::shared_ptr<::arrow::Array> expected_dense_; int num_values_; int null_count_; std::vector input_data_; @@ -572,7 +572,7 @@ TEST(PlainEncodingAdHoc, ArrowBinaryDirectPut) { const double null_probability = 0.25; auto CheckSeed = [&](int seed) { - arrow::random::RandomArrayGenerator rag(seed); + ::arrow::random::RandomArrayGenerator rag(seed); auto values = rag.String(size, min_length, max_length, null_probability); auto encoder = MakeTypedEncoder(Encoding::PLAIN); @@ -585,7 +585,7 @@ TEST(PlainEncodingAdHoc, ArrowBinaryDirectPut) { decoder->SetData(num_values, buf->data(), static_cast(buf->size())); typename EncodingTraits::Accumulator acc; - acc.builder.reset(new arrow::StringBuilder); + acc.builder.reset(new ::arrow::StringBuilder); ASSERT_EQ(num_values, decoder->DecodeArrow(static_cast(values->length()), static_cast(values->null_count()), @@ -594,7 +594,7 @@ TEST(PlainEncodingAdHoc, ArrowBinaryDirectPut) { std::shared_ptr<::arrow::Array> result; ASSERT_OK(acc.builder->Finish(&result)); ASSERT_EQ(50, result->length()); - arrow::AssertArraysEqual(*values, *result); + ::arrow::AssertArraysEqual(*values, *result); }; for (auto seed : {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}) { @@ -642,9 +642,9 @@ class EncodingAdHocTyped : public ::testing::Test { return column_descr.get(); } - std::shared_ptr GetValues(int seed); + std::shared_ptr<::arrow::Array> GetValues(int seed); - static std::shared_ptr arrow_type(); + static std::shared_ptr<::arrow::DataType> arrow_type(); void Plain(int seed) { auto values = GetValues(seed); @@ -658,7 +658,7 @@ class EncodingAdHocTyped : public ::testing::Test { int num_values = static_cast(values->length() - values->null_count()); decoder->SetData(num_values, buf->data(), static_cast(buf->size())); - BuilderType acc(arrow_type(), arrow::default_memory_pool()); + BuilderType acc(arrow_type(), ::arrow::default_memory_pool()); ASSERT_EQ(num_values, decoder->DecodeArrow(static_cast(values->length()), static_cast(values->null_count()), @@ -667,7 +667,7 @@ class EncodingAdHocTyped : public ::testing::Test { std::shared_ptr<::arrow::Array> result; ASSERT_OK(acc.Finish(&result)); ASSERT_EQ(50, result->length()); - arrow::AssertArraysEqual(*values, *result); + ::arrow::AssertArraysEqual(*values, *result); } void ByteStreamSplit(int seed) { @@ -687,7 +687,7 @@ class EncodingAdHocTyped : public ::testing::Test { int num_values = static_cast(values->length() - values->null_count()); decoder->SetData(num_values, buf->data(), static_cast(buf->size())); - BuilderType acc(arrow_type(), arrow::default_memory_pool()); + BuilderType acc(arrow_type(), ::arrow::default_memory_pool()); ASSERT_EQ(num_values, decoder->DecodeArrow(static_cast(values->length()), static_cast(values->null_count()), @@ -696,7 +696,7 @@ class EncodingAdHocTyped : public ::testing::Test { std::shared_ptr<::arrow::Array> result; ASSERT_OK(acc.Finish(&result)); ASSERT_EQ(50, result->length()); - arrow::AssertArraysEqual(*values, *result); + ::arrow::AssertArraysEqual(*values, *result); } void Dict(int seed) { @@ -719,7 +719,7 @@ class EncodingAdHocTyped : public ::testing::Test { std::unique_ptr> decoder; GetDictDecoder(encoder, num_values, &buf, &dict_buf, column_descr(), &decoder); - BuilderType acc(arrow_type(), arrow::default_memory_pool()); + BuilderType acc(arrow_type(), ::arrow::default_memory_pool()); ASSERT_EQ(num_values, decoder->DecodeArrow(static_cast(values->length()), static_cast(values->null_count()), @@ -727,7 +727,7 @@ class EncodingAdHocTyped : public ::testing::Test { std::shared_ptr<::arrow::Array> result; ASSERT_OK(acc.Finish(&result)); - arrow::AssertArraysEqual(*values, *result); + ::arrow::AssertArraysEqual(*values, *result); } void DictPutIndices() { @@ -735,14 +735,15 @@ class EncodingAdHocTyped : public ::testing::Test { return; } - auto dict_values = - arrow::ArrayFromJSON(arrow_type(), std::is_same::value - ? R"(["abcdefgh", "ijklmnop", "qrstuvwx"])" - : "[120, -37, 47]"); - auto indices = arrow::ArrayFromJSON(arrow::int32(), "[0, 1, 2]"); - auto indices_nulls = arrow::ArrayFromJSON(arrow::int32(), "[null, 0, 1, null, 2]"); + auto dict_values = ::arrow::ArrayFromJSON( + arrow_type(), std::is_same::value + ? R"(["abcdefgh", "ijklmnop", "qrstuvwx"])" + : "[120, -37, 47]"); + auto indices = ::arrow::ArrayFromJSON(::arrow::int32(), "[0, 1, 2]"); + auto indices_nulls = + ::arrow::ArrayFromJSON(::arrow::int32(), "[null, 0, 1, null, 2]"); - auto expected = arrow::ArrayFromJSON( + auto expected = ::arrow::ArrayFromJSON( arrow_type(), std::is_same::value ? R"(["abcdefgh", "ijklmnop", "qrstuvwx", null, "abcdefgh", "ijklmnop", null, "qrstuvwx"])" @@ -770,7 +771,7 @@ class EncodingAdHocTyped : public ::testing::Test { std::unique_ptr> decoder; GetDictDecoder(encoder, num_values, &buf, &dict_buf, column_descr(), &decoder); - BuilderType acc(arrow_type(), arrow::default_memory_pool()); + BuilderType acc(arrow_type(), ::arrow::default_memory_pool()); ASSERT_EQ(num_values, decoder->DecodeArrow(static_cast(expected->length()), static_cast(expected->null_count()), expected->null_bitmap_data(), @@ -778,7 +779,7 @@ class EncodingAdHocTyped : public ::testing::Test { std::shared_ptr<::arrow::Array> result; ASSERT_OK(acc.Finish(&result)); - arrow::AssertArraysEqual(*expected, *result); + ::arrow::AssertArraysEqual(*expected, *result); } protected: @@ -787,31 +788,31 @@ class EncodingAdHocTyped : public ::testing::Test { }; template -std::shared_ptr EncodingAdHocTyped::arrow_type() { - return arrow::TypeTraits::type_singleton(); +std::shared_ptr<::arrow::DataType> EncodingAdHocTyped::arrow_type() { + return ::arrow::TypeTraits::type_singleton(); } template <> -std::shared_ptr EncodingAdHocTyped::arrow_type() { - return arrow::fixed_size_binary(sizeof(uint64_t)); +std::shared_ptr<::arrow::DataType> EncodingAdHocTyped::arrow_type() { + return ::arrow::fixed_size_binary(sizeof(uint64_t)); } template -std::shared_ptr EncodingAdHocTyped::GetValues(int seed) { - arrow::random::RandomArrayGenerator rag(seed); +std::shared_ptr<::arrow::Array> EncodingAdHocTyped::GetValues(int seed) { + ::arrow::random::RandomArrayGenerator rag(seed); return rag.Numeric(size_, 0, 10, null_probability_); } template <> -std::shared_ptr EncodingAdHocTyped::GetValues(int seed) { - arrow::random::RandomArrayGenerator rag(seed); +std::shared_ptr<::arrow::Array> EncodingAdHocTyped::GetValues(int seed) { + ::arrow::random::RandomArrayGenerator rag(seed); return rag.Boolean(size_, 0.1, null_probability_); } template <> -std::shared_ptr EncodingAdHocTyped::GetValues(int seed) { - arrow::random::RandomArrayGenerator rag(seed); - std::shared_ptr values; +std::shared_ptr<::arrow::Array> EncodingAdHocTyped::GetValues(int seed) { + ::arrow::random::RandomArrayGenerator rag(seed); + std::shared_ptr<::arrow::Array> values; ARROW_EXPECT_OK( rag.UInt64(size_, 0, std::numeric_limits::max(), null_probability_) ->View(arrow_type()) @@ -842,7 +843,7 @@ TEST(DictEncodingAdHoc, ArrowBinaryDirectPut) { const int64_t min_length = 0; const int64_t max_length = 10; const double null_probability = 0.1; - arrow::random::RandomArrayGenerator rag(0); + ::arrow::random::RandomArrayGenerator rag(0); auto values = rag.String(size, min_length, max_length, null_probability); auto owned_encoder = MakeTypedEncoder(Encoding::PLAIN, @@ -858,7 +859,7 @@ TEST(DictEncodingAdHoc, ArrowBinaryDirectPut) { GetDictDecoder(encoder, num_values, &buf, &dict_buf, nullptr, &decoder); typename EncodingTraits::Accumulator acc; - acc.builder.reset(new arrow::StringBuilder); + acc.builder.reset(new ::arrow::StringBuilder); ASSERT_EQ(num_values, decoder->DecodeArrow(static_cast(values->length()), static_cast(values->null_count()), @@ -866,22 +867,23 @@ TEST(DictEncodingAdHoc, ArrowBinaryDirectPut) { std::shared_ptr<::arrow::Array> result; ASSERT_OK(acc.builder->Finish(&result)); - arrow::AssertArraysEqual(*values, *result); + ::arrow::AssertArraysEqual(*values, *result); } TYPED_TEST(EncodingAdHocTyped, DictArrowDirectPut) { this->Dict(0); } TEST(DictEncodingAdHoc, PutDictionaryPutIndices) { // Part of ARROW-3246 - auto dict_values = arrow::ArrayFromJSON(arrow::binary(), "[\"foo\", \"bar\", \"baz\"]"); + auto dict_values = + ::arrow::ArrayFromJSON(::arrow::binary(), "[\"foo\", \"bar\", \"baz\"]"); - auto CheckIndexType = [&](const std::shared_ptr& index_ty) { - auto indices = arrow::ArrayFromJSON(index_ty, "[0, 1, 2]"); - auto indices_nulls = arrow::ArrayFromJSON(index_ty, "[null, 0, 1, null, 2]"); + auto CheckIndexType = [&](const std::shared_ptr<::arrow::DataType>& index_ty) { + auto indices = ::arrow::ArrayFromJSON(index_ty, "[0, 1, 2]"); + auto indices_nulls = ::arrow::ArrayFromJSON(index_ty, "[null, 0, 1, null, 2]"); - auto expected = arrow::ArrayFromJSON(arrow::binary(), - "[\"foo\", \"bar\", \"baz\", null, " - "\"foo\", \"bar\", null, \"baz\"]"); + auto expected = ::arrow::ArrayFromJSON(::arrow::binary(), + "[\"foo\", \"bar\", \"baz\", null, " + "\"foo\", \"bar\", null, \"baz\"]"); auto owned_encoder = MakeTypedEncoder(Encoding::PLAIN, /*use_dictionary=*/true); @@ -903,7 +905,7 @@ TEST(DictEncodingAdHoc, PutDictionaryPutIndices) { GetDictDecoder(encoder, num_values, &buf, &dict_buf, nullptr, &decoder); typename EncodingTraits::Accumulator acc; - acc.builder.reset(new arrow::BinaryBuilder); + acc.builder.reset(new ::arrow::BinaryBuilder); ASSERT_EQ(num_values, decoder->DecodeArrow(static_cast(expected->length()), static_cast(expected->null_count()), expected->null_bitmap_data(), @@ -911,7 +913,7 @@ TEST(DictEncodingAdHoc, PutDictionaryPutIndices) { std::shared_ptr<::arrow::Array> result; ASSERT_OK(acc.builder->Finish(&result)); - arrow::AssertArraysEqual(*expected, *result); + ::arrow::AssertArraysEqual(*expected, *result); }; for (auto ty : ::arrow::all_dictionary_index_types()) { @@ -988,13 +990,14 @@ TEST_F(DictEncoding, CheckDecodeIndicesSpaced) { num_values_, null_count_, valid_bits_, 0, builder.get()); } ASSERT_EQ(actual_num_values, num_values_ - null_count_); - std::shared_ptr actual; + std::shared_ptr<::arrow::Array> actual; ASSERT_OK(builder->Finish(&actual)); ASSERT_ARRAYS_EQUAL(*actual, *expected_dict_); // Check that null indices are zero-initialized - const auto& dict_actual = checked_cast(*actual); - const auto& indices = checked_cast(*dict_actual.indices()); + const auto& dict_actual = checked_cast(*actual); + const auto& indices = + checked_cast(*dict_actual.indices()); auto raw_values = indices.raw_values(); for (int64_t i = 0; i < indices.length(); ++i) { @@ -1052,11 +1055,11 @@ class TestByteStreamSplitEncoding : public TestEncodingBase { } { - std::vector valid_bits(arrow::BitUtil::BytesForBits(num_values_), 0); + std::vector valid_bits(::arrow::BitUtil::BytesForBits(num_values_), 0); std::vector expected_filtered_output; const int every_nth = 5; expected_filtered_output.reserve((num_values_ + every_nth - 1) / every_nth); - arrow::internal::BitmapWriter writer{valid_bits.data(), 0, num_values_}; + ::arrow::internal::BitmapWriter writer{valid_bits.data(), 0, num_values_}; // Set every fifth bit. for (int i = 0; i < num_values_; ++i) { if (i % every_nth == 0) { diff --git a/cpp/src/parquet/file_reader.cc b/cpp/src/parquet/file_reader.cc index 67f211b29c7..332493cf230 100644 --- a/cpp/src/parquet/file_reader.cc +++ b/cpp/src/parquet/file_reader.cc @@ -88,9 +88,9 @@ const RowGroupMetaData* RowGroupReader::metadata() const { return contents_->met /// Compute the section of the file that should be read for the given /// row group and column chunk. -arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata, - int64_t source_size, int row_group_index, - int column_index) { +::arrow::io::ReadRange ComputeColumnChunkRange(FileMetaData* file_metadata, + int64_t source_size, int row_group_index, + int column_index) { auto row_group_metadata = file_metadata->RowGroup(row_group_index); auto column_metadata = row_group_metadata->ColumnChunk(column_index); @@ -142,7 +142,7 @@ class SerializedRowGroup : public RowGroupReader::Contents { // Read column chunk from the file auto col = row_group_metadata_->ColumnChunk(i); - arrow::io::ReadRange col_range = + ::arrow::io::ReadRange col_range = ComputeColumnChunkRange(file_metadata_, source_size_, row_group_ordinal_, i); std::shared_ptr stream; if (cached_source_) { @@ -254,8 +254,8 @@ class SerializedFile : public ParquetFileReader::Contents { const ::arrow::io::AsyncContext& ctx, const ::arrow::io::CacheOptions& options) { cached_source_ = - std::make_shared(source_, ctx, options); - std::vector ranges; + std::make_shared<::arrow::io::internal::ReadRangeCache>(source_, ctx, options); + std::vector<::arrow::io::ReadRange> ranges; for (int row : row_groups) { for (int col : column_indices) { ranges.push_back( @@ -316,7 +316,7 @@ class SerializedFile : public ParquetFileReader::Contents { private: std::shared_ptr source_; - std::shared_ptr cached_source_; + std::shared_ptr<::arrow::io::internal::ReadRangeCache> cached_source_; int64_t source_size_; std::shared_ptr file_metadata_; ReaderProperties properties_; @@ -344,7 +344,7 @@ void SerializedFile::ParseUnencryptedFileMetadata( const std::shared_ptr& footer_buffer, int64_t footer_read_size, std::shared_ptr* metadata_buffer, uint32_t* metadata_len, uint32_t* read_metadata_len) { - *metadata_len = arrow::util::SafeLoadAs( + *metadata_len = ::arrow::util::SafeLoadAs( reinterpret_cast(footer_buffer->data()) + footer_read_size - kFooterSize); int64_t metadata_start = source_size_ - kFooterSize - *metadata_len; @@ -376,7 +376,7 @@ void SerializedFile::ParseMetaDataOfEncryptedFileWithEncryptedFooter( const std::shared_ptr& footer_buffer, int64_t footer_read_size) { // encryption with encrypted footer // both metadata & crypto metadata length - uint32_t footer_len = arrow::util::SafeLoadAs( + uint32_t footer_len = ::arrow::util::SafeLoadAs( reinterpret_cast(footer_buffer->data()) + footer_read_size - kFooterSize); int64_t crypto_metadata_start = source_size_ - kFooterSize - footer_len; diff --git a/cpp/src/parquet/level_conversion_test.cc b/cpp/src/parquet/level_conversion_test.cc index a3036758a0a..bfce74ae3a8 100644 --- a/cpp/src/parquet/level_conversion_test.cc +++ b/cpp/src/parquet/level_conversion_test.cc @@ -38,7 +38,7 @@ using ::arrow::internal::Bitmap; using ::testing::ElementsAreArray; std::string BitmapToString(const uint8_t* bitmap, int64_t bit_count) { - return arrow::internal::Bitmap(bitmap, /*offset*/ 0, /*length=*/bit_count).ToString(); + return ::arrow::internal::Bitmap(bitmap, /*offset*/ 0, /*length=*/bit_count).ToString(); } std::string BitmapToString(const std::vector& bitmap, int64_t bit_count) { diff --git a/cpp/src/parquet/metadata.cc b/cpp/src/parquet/metadata.cc index 5407922ffa9..c2002efce07 100644 --- a/cpp/src/parquet/metadata.cc +++ b/cpp/src/parquet/metadata.cc @@ -936,7 +936,7 @@ void FileCryptoMetaData::WriteTo(::arrow::io::OutputStream* dst) const { std::string FileMetaData::SerializeToString() const { // We need to pass in an initial size. Since it will automatically // increase the buffer size to hold the metadata, we just leave it 0. - PARQUET_ASSIGN_OR_THROW(auto serializer, arrow::io::BufferOutputStream::Create(0)); + PARQUET_ASSIGN_OR_THROW(auto serializer, ::arrow::io::BufferOutputStream::Create(0)); WriteTo(serializer.get()); PARQUET_ASSIGN_OR_THROW(auto metadata_buffer, serializer->Finish()); return metadata_buffer->ToString(); diff --git a/cpp/src/parquet/platform.cc b/cpp/src/parquet/platform.cc index f1eabe0290f..5c355c28be1 100644 --- a/cpp/src/parquet/platform.cc +++ b/cpp/src/parquet/platform.cc @@ -34,7 +34,7 @@ std::shared_ptr<::arrow::io::BufferOutputStream> CreateOutputStream(MemoryPool* } std::shared_ptr AllocateBuffer(MemoryPool* pool, int64_t size) { - PARQUET_ASSIGN_OR_THROW(auto result, arrow::AllocateResizableBuffer(size, pool)); + PARQUET_ASSIGN_OR_THROW(auto result, ::arrow::AllocateResizableBuffer(size, pool)); return std::move(result); } diff --git a/cpp/src/parquet/printer.cc b/cpp/src/parquet/printer.cc index 224a19dda6b..164e76f1df4 100644 --- a/cpp/src/parquet/printer.cc +++ b/cpp/src/parquet/printer.cc @@ -123,7 +123,7 @@ void ParquetFilePrinter::DebugPrint(std::ostream& stream, std::list selecte } stream << std::endl << " Compression: " - << arrow::internal::AsciiToUpper( + << ::arrow::internal::AsciiToUpper( Codec::GetCodecAsString(column_chunk->compression())) << ", Encodings:"; for (auto encoding : column_chunk->encodings()) { @@ -259,7 +259,7 @@ void ParquetFilePrinter::JSONPrint(std::ostream& stream, std::list selected stream << "\"False\","; } stream << "\n \"Compression\": \"" - << arrow::internal::AsciiToUpper( + << ::arrow::internal::AsciiToUpper( Codec::GetCodecAsString(column_chunk->compression())) << "\", \"Encodings\": \""; for (auto encoding : column_chunk->encodings()) { diff --git a/cpp/src/parquet/properties.h b/cpp/src/parquet/properties.h index 2d9725c2b49..f0422012122 100644 --- a/cpp/src/parquet/properties.h +++ b/cpp/src/parquet/properties.h @@ -31,6 +31,7 @@ #include "parquet/parquet_version.h" #include "parquet/platform.h" #include "parquet/schema.h" +#include "parquet/type_fwd.h" #include "parquet/types.h" namespace parquet { @@ -45,9 +46,7 @@ namespace parquet { /// Note that the 2.x format version series also introduced new serialized /// data page metadata and on disk data page layout. To enable this, use /// ParquetDataPageVersion. -struct ParquetVersion { - enum type { PARQUET_1_0, PARQUET_2_0 }; -}; +struct ParquetVersion; /// Controls serialization format of data pages. parquet-format v2.0.0 /// introduced a new data page metadata type DataPageV2 and serialized page diff --git a/cpp/src/parquet/properties_test.cc b/cpp/src/parquet/properties_test.cc index 39dbb7f0556..7ce96e4a7d7 100644 --- a/cpp/src/parquet/properties_test.cc +++ b/cpp/src/parquet/properties_test.cc @@ -72,7 +72,7 @@ TEST(TestReaderProperties, GetStreamInsufficientData) { // ARROW-6058 std::string data = "shorter than expected"; auto buf = std::make_shared(data); - auto reader = std::make_shared(buf); + auto reader = std::make_shared<::arrow::io::BufferReader>(buf); ReaderProperties props; try { diff --git a/cpp/src/parquet/reader_test.cc b/cpp/src/parquet/reader_test.cc index 5fda439e4a1..8dbcde8e5f2 100644 --- a/cpp/src/parquet/reader_test.cc +++ b/cpp/src/parquet/reader_test.cc @@ -450,22 +450,22 @@ TEST(TestFileReader, BufferedReads) { std::shared_ptr writer_props = WriterProperties::Builder().write_batch_size(64)->data_pagesize(128)->build(); - ASSERT_OK_AND_ASSIGN(auto out_file, arrow::io::BufferOutputStream::Create()); + ASSERT_OK_AND_ASSIGN(auto out_file, ::arrow::io::BufferOutputStream::Create()); std::shared_ptr file_writer = ParquetFileWriter::Open(out_file, schema, writer_props); RowGroupWriter* rg_writer = file_writer->AppendRowGroup(); - std::vector> column_data; + ::arrow::ArrayVector column_data; ::arrow::random::RandomArrayGenerator rag(0); // Scratch space for reads - std::vector> scratch_space; + ::arrow::BufferVector scratch_space; // write columns for (int col_index = 0; col_index < num_columns; ++col_index) { DoubleWriter* writer = static_cast(rg_writer->NextColumn()); - std::shared_ptr col = rag.Float64(num_rows, 0, 100); + std::shared_ptr<::arrow::Array> col = rag.Float64(num_rows, 0, 100); const auto& col_typed = static_cast(*col); writer->WriteBatch(num_rows, nullptr, nullptr, col_typed.raw_values()); column_data.push_back(col); @@ -479,7 +479,7 @@ TEST(TestFileReader, BufferedReads) { // Open the reader ASSERT_OK_AND_ASSIGN(auto file_buf, out_file->Finish()); - auto in_file = std::make_shared(file_buf); + auto in_file = std::make_shared<::arrow::io::BufferReader>(file_buf); ReaderProperties reader_props; reader_props.enable_buffered_stream(); diff --git a/cpp/src/parquet/statistics.cc b/cpp/src/parquet/statistics.cc index bb970367f20..dd019c360cc 100644 --- a/cpp/src/parquet/statistics.cc +++ b/cpp/src/parquet/statistics.cc @@ -56,14 +56,14 @@ struct CompareHelper { // MSVC17 fix, isnan is not overloaded for IntegralType as per C++11 // standard requirements. template - static arrow::enable_if_t::value, T> Coalesce(T val, - T fallback) { + static ::arrow::enable_if_t::value, T> Coalesce(T val, + T fallback) { return std::isnan(val) ? fallback : val; } template - static arrow::enable_if_t::value, T> Coalesce(T val, - T fallback) { + static ::arrow::enable_if_t::value, T> Coalesce( + T val, T fallback) { return val; } @@ -83,7 +83,7 @@ struct UnsignedCompareHelperBase { static T Coalesce(T val, T fallback) { return val; } static inline bool Compare(int type_length, T a, T b) { - return arrow::util::SafeCopy(a) < arrow::util::SafeCopy(b); + return ::arrow::util::SafeCopy(a) < ::arrow::util::SafeCopy(b); } static T Min(int type_length, T a, T b) { return Compare(type_length, a, b) ? a : b; } @@ -117,8 +117,8 @@ struct CompareHelper { if (a.value[2] != b.value[2]) { // Only the MSB bit is by Signed comparison. For little-endian, this is the // last bit of Int96 type. - return arrow::util::SafeCopy(a.value[2]) < - arrow::util::SafeCopy(b.value[2]); + return ::arrow::util::SafeCopy(a.value[2]) < + ::arrow::util::SafeCopy(b.value[2]); } else if (a.value[1] != b.value[1]) { return (a.value[1] < b.value[1]); } @@ -223,11 +223,11 @@ CleanStatistic(std::pair min_max) { // Ignore if one of the value is nan. if (std::isnan(min) || std::isnan(max)) { - return arrow::util::nullopt; + return ::arrow::util::nullopt; } if (min == std::numeric_limits::max() && max == std::numeric_limits::lowest()) { - return arrow::util::nullopt; + return ::arrow::util::nullopt; } T zero{}; @@ -245,7 +245,7 @@ CleanStatistic(std::pair min_max) { optional> CleanStatistic(std::pair min_max) { if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) { - return arrow::util::nullopt; + return ::arrow::util::nullopt; } return min_max; } @@ -253,7 +253,7 @@ optional> CleanStatistic(std::pair min_max) { optional> CleanStatistic( std::pair min_max) { if (min_max.first.ptr == nullptr || min_max.second.ptr == nullptr) { - return arrow::util::nullopt; + return ::arrow::util::nullopt; } return min_max; } diff --git a/cpp/src/parquet/statistics_test.cc b/cpp/src/parquet/statistics_test.cc index 8edc5ee984f..7a86e3338af 100644 --- a/cpp/src/parquet/statistics_test.cc +++ b/cpp/src/parquet/statistics_test.cc @@ -654,7 +654,7 @@ class TestStatisticsSortOrder : public ::testing::Test { // Create a ParquetReader instance std::unique_ptr parquet_reader = parquet::ParquetFileReader::Open( - std::make_shared(pbuffer)); + std::make_shared<::arrow::io::BufferReader>(pbuffer)); // Get the File MetaData std::shared_ptr file_metadata = parquet_reader->metadata(); @@ -867,7 +867,7 @@ TEST_F(TestStatisticsSortOrderFLBA, UnknownSortOrder) { // Create a ParquetReader instance std::unique_ptr parquet_reader = parquet::ParquetFileReader::Open( - std::make_shared(pbuffer)); + std::make_shared<::arrow::io::BufferReader>(pbuffer)); // Get the File MetaData std::shared_ptr file_metadata = parquet_reader->metadata(); std::shared_ptr rg_metadata = file_metadata->RowGroup(0); diff --git a/cpp/src/parquet/stream_reader.h b/cpp/src/parquet/stream_reader.h index 5cac4cd0c85..806b0e8ad9a 100644 --- a/cpp/src/parquet/stream_reader.h +++ b/cpp/src/parquet/stream_reader.h @@ -58,7 +58,7 @@ namespace parquet { class PARQUET_EXPORT StreamReader { public: template - using optional = arrow::util::optional; + using optional = ::arrow::util::optional; // N.B. Default constructed objects are not usable. This // constructor is provided so that the object may be move diff --git a/cpp/src/parquet/stream_reader_test.cc b/cpp/src/parquet/stream_reader_test.cc index f6db4dd1969..eb7b133740e 100644 --- a/cpp/src/parquet/stream_reader_test.cc +++ b/cpp/src/parquet/stream_reader_test.cc @@ -34,7 +34,7 @@ namespace test { template using optional = StreamReader::optional; -using arrow::util::nullopt; +using ::arrow::util::nullopt; struct TestData { static void init() { std::time(&ts_offset_); } @@ -150,7 +150,7 @@ class TestStreamReader : public ::testing::Test { const char* GetDataFile() const { return "stream_reader_test.parquet"; } void SetUp() { - PARQUET_ASSIGN_OR_THROW(auto infile, arrow::io::ReadableFile::Open(GetDataFile())); + PARQUET_ASSIGN_OR_THROW(auto infile, ::arrow::io::ReadableFile::Open(GetDataFile())); auto file_reader = parquet::ParquetFileReader::Open(infile); reader_ = StreamReader{std::move(file_reader)}; } @@ -202,7 +202,7 @@ class TestStreamReader : public ::testing::Test { void createTestFile() { PARQUET_ASSIGN_OR_THROW(auto outfile, - arrow::io::FileOutputStream::Open(GetDataFile())); + ::arrow::io::FileOutputStream::Open(GetDataFile())); auto file_writer = ParquetFileWriter::Open(outfile, GetSchema()); @@ -591,7 +591,7 @@ class TestOptionalFields : public ::testing::Test { const char* GetDataFile() const { return "stream_reader_test_optional_fields.parquet"; } void SetUp() { - PARQUET_ASSIGN_OR_THROW(auto infile, arrow::io::ReadableFile::Open(GetDataFile())); + PARQUET_ASSIGN_OR_THROW(auto infile, ::arrow::io::ReadableFile::Open(GetDataFile())); auto file_reader = ParquetFileReader::Open(infile); @@ -645,7 +645,7 @@ class TestOptionalFields : public ::testing::Test { void createTestFile() { PARQUET_ASSIGN_OR_THROW(auto outfile, - arrow::io::FileOutputStream::Open(GetDataFile())); + ::arrow::io::FileOutputStream::Open(GetDataFile())); StreamWriter os{ParquetFileWriter::Open(outfile, GetSchema())}; @@ -835,8 +835,8 @@ class TestReadingDataFiles : public ::testing::Test { }; TEST_F(TestReadingDataFiles, AllTypesPlain) { - PARQUET_ASSIGN_OR_THROW( - auto infile, arrow::io::ReadableFile::Open(GetDataFile("alltypes_plain.parquet"))); + PARQUET_ASSIGN_OR_THROW(auto infile, ::arrow::io::ReadableFile::Open( + GetDataFile("alltypes_plain.parquet"))); auto file_reader = ParquetFileReader::Open(infile); auto reader = StreamReader{std::move(file_reader)}; @@ -880,7 +880,7 @@ TEST_F(TestReadingDataFiles, AllTypesPlain) { TEST_F(TestReadingDataFiles, Int32Decimal) { PARQUET_ASSIGN_OR_THROW( - auto infile, arrow::io::ReadableFile::Open(GetDataFile("int32_decimal.parquet"))); + auto infile, ::arrow::io::ReadableFile::Open(GetDataFile("int32_decimal.parquet"))); auto file_reader = ParquetFileReader::Open(infile); auto reader = StreamReader{std::move(file_reader)}; @@ -897,7 +897,7 @@ TEST_F(TestReadingDataFiles, Int32Decimal) { TEST_F(TestReadingDataFiles, Int64Decimal) { PARQUET_ASSIGN_OR_THROW( - auto infile, arrow::io::ReadableFile::Open(GetDataFile("int64_decimal.parquet"))); + auto infile, ::arrow::io::ReadableFile::Open(GetDataFile("int64_decimal.parquet"))); auto file_reader = ParquetFileReader::Open(infile); auto reader = StreamReader{std::move(file_reader)}; diff --git a/cpp/src/parquet/stream_writer.cc b/cpp/src/parquet/stream_writer.cc index ef03629d879..253ebf1bc91 100644 --- a/cpp/src/parquet/stream_writer.cc +++ b/cpp/src/parquet/stream_writer.cc @@ -136,7 +136,7 @@ StreamWriter& StreamWriter::operator<<(const std::string& v) { return WriteVariableLength(v.data(), v.size()); } -StreamWriter& StreamWriter::operator<<(arrow::util::string_view v) { +StreamWriter& StreamWriter::operator<<(::arrow::util::string_view v) { return WriteVariableLength(v.data(), v.size()); } diff --git a/cpp/src/parquet/stream_writer.h b/cpp/src/parquet/stream_writer.h index 4c7945960ea..d0db850c341 100644 --- a/cpp/src/parquet/stream_writer.h +++ b/cpp/src/parquet/stream_writer.h @@ -62,7 +62,7 @@ namespace parquet { class PARQUET_EXPORT StreamWriter { public: template - using optional = arrow::util::optional; + using optional = ::arrow::util::optional; // N.B. Default constructed objects are not usable. This // constructor is provided so that the object may be move @@ -149,7 +149,7 @@ class PARQUET_EXPORT StreamWriter { /// \brief Output operators for variable length strings. StreamWriter& operator<<(const char* v); StreamWriter& operator<<(const std::string& v); - StreamWriter& operator<<(arrow::util::string_view v); + StreamWriter& operator<<(::arrow::util::string_view v); /// \brief Output operator for optional fields. template diff --git a/cpp/src/parquet/stream_writer_test.cc b/cpp/src/parquet/stream_writer_test.cc index 62e714c4ec2..d427f51b6ab 100644 --- a/cpp/src/parquet/stream_writer_test.cc +++ b/cpp/src/parquet/stream_writer_test.cc @@ -297,7 +297,8 @@ TEST_F(TestStreamWriter, SkipColumns) { } TEST_F(TestStreamWriter, AppendNotImplemented) { - PARQUET_ASSIGN_OR_THROW(auto outfile, arrow::io::FileOutputStream::Open(GetDataFile())); + PARQUET_ASSIGN_OR_THROW(auto outfile, + ::arrow::io::FileOutputStream::Open(GetDataFile())); writer_ = StreamWriter{ParquetFileWriter::Open(outfile, GetSchema())}; writer_ << false << std::string("Just one row") << 'x' @@ -308,7 +309,7 @@ TEST_F(TestStreamWriter, AppendNotImplemented) { // Re-open file in append mode. PARQUET_ASSIGN_OR_THROW(outfile, - arrow::io::FileOutputStream::Open(GetDataFile(), true)); + ::arrow::io::FileOutputStream::Open(GetDataFile(), true)); EXPECT_THROW(ParquetFileWriter::Open(outfile, GetSchema()), ParquetException); } // namespace test diff --git a/cpp/src/parquet/test_encryption_util.h b/cpp/src/parquet/test_encryption_util.h index 39911e62d7a..4a33534f8fb 100644 --- a/cpp/src/parquet/test_encryption_util.h +++ b/cpp/src/parquet/test_encryption_util.h @@ -46,7 +46,7 @@ namespace parquet { namespace test { -using arrow::internal::TemporaryDir; +using ::arrow::internal::TemporaryDir; using parquet::ConvertedType; using parquet::Repetition; @@ -72,8 +72,8 @@ inline std::string data_file(const char* file) { // A temporary directory that contains the encrypted files generated in the tests. extern std::unique_ptr temp_dir; -inline arrow::Result> temp_data_dir() { - arrow::Result> dir; +inline ::arrow::Result> temp_data_dir() { + ::arrow::Result> dir; ARROW_ASSIGN_OR_RAISE(dir, TemporaryDir::Make("parquet-encryption-test-")); return dir; } diff --git a/cpp/src/parquet/type_fwd.h b/cpp/src/parquet/type_fwd.h index 2f5337b2b84..a427f5a9591 100644 --- a/cpp/src/parquet/type_fwd.h +++ b/cpp/src/parquet/type_fwd.h @@ -19,9 +19,21 @@ namespace parquet { +struct ParquetVersion { + enum type { PARQUET_1_0, PARQUET_2_0 }; +}; + class FileMetaData; class SchemaDescriptor; +class ReaderProperties; +class ArrowReaderProperties; + +class WriterProperties; +class WriterPropertiesBuilder; +class ArrowWriterProperties; +class ArrowWriterPropertiesBuilder; + namespace arrow { class FileWriter; diff --git a/cpp/src/parquet/types.h b/cpp/src/parquet/types.h index 51fb13b2332..21cef0f4da8 100644 --- a/cpp/src/parquet/types.h +++ b/cpp/src/parquet/types.h @@ -27,6 +27,7 @@ #include "arrow/util/string_view.h" #include "parquet/platform.h" +#include "parquet/type_fwd.h" namespace arrow { namespace util { diff --git a/r/NAMESPACE b/r/NAMESPACE index 9bb3e150179..ea611c8b88a 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -161,10 +161,10 @@ export(MemoryMappedFile) export(MessageReader) export(MessageType) export(MetadataVersion) +export(ParquetArrowReaderProperties) export(ParquetFileFormat) export(ParquetFileReader) export(ParquetFileWriter) -export(ParquetReaderProperties) export(ParquetVersionType) export(ParquetWriterProperties) export(Partitioning) diff --git a/r/R/array-data.R b/r/R/array-data.R index 988e2a71b2f..08b09133361 100644 --- a/r/R/array-data.R +++ b/r/R/array-data.R @@ -44,10 +44,10 @@ ArrayData <- R6Class("ArrayData", inherit = ArrowObject, active = list( - type = function() DataType$create(ArrayData__get_type(self)), + type = function() ArrayData__get_type(self), length = function() ArrayData__get_length(self), null_count = function() ArrayData__get_null_count(self), offset = function() ArrayData__get_offset(self), - buffers = function() map(ArrayData__buffers(self), shared_ptr, class = Buffer) + buffers = function() ArrayData__buffers(self) ) ) diff --git a/r/R/array.R b/r/R/array.R index 8c9a29b6680..021417d55a4 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -85,22 +85,6 @@ Array <- R6Class("Array", inherit = ArrowObject, public = list( - ..dispatch = function() { - type_id <- self$type_id() - if (type_id == Type$DICTIONARY){ - shared_ptr(DictionaryArray, self$pointer()) - } else if (type_id == Type$STRUCT) { - shared_ptr(StructArray, self$pointer()) - } else if (type_id == Type$LIST) { - shared_ptr(ListArray, self$pointer()) - } else if (type_id == Type$LARGE_LIST){ - shared_ptr(LargeListArray, self$pointer()) - } else if (type_id == Type$FIXED_SIZE_LIST){ - shared_ptr(FixedSizeListArray, self$pointer()) - } else { - self - } - }, IsNull = function(i) Array__IsNull(self, i), IsValid = function(i) Array__IsValid(self, i), length = function() Array__length(self), @@ -111,7 +95,7 @@ Array <- R6Class("Array", ApproxEquals = function(other) { inherits(other, "Array") && Array__ApproxEquals(self, other) }, - data = function() shared_ptr(ArrayData, Array__data(self)), + data = function() Array__data(self), as_vector = function() Array__as_vector(self), ToString = function() { typ <- paste0("<", self$type$ToString(), ">") @@ -119,9 +103,9 @@ Array <- R6Class("Array", }, Slice = function(offset, length = NULL) { if (is.null(length)) { - Array$create(Array__Slice1(self, offset)) + Array__Slice1(self, offset) } else { - Array$create(Array__Slice2(self, offset, length)) + Array__Slice2(self, offset, length) } }, Take = function(i) { @@ -131,20 +115,14 @@ Array <- R6Class("Array", if (is.integer(i)) { i <- Array$create(i) } - # ARROW-9001: autoboxing in call_function - result <- call_function("take", self, i) - if (inherits(i, "ChunkedArray")) { - return(shared_ptr(ChunkedArray, result)) - } else { - Array$create(result) - } + call_function("take", self, i) }, Filter = function(i, keep_na = TRUE) { if (is.logical(i)) { i <- Array$create(i) } assert_is(i, "Array") - Array$create(call_function("filter", self, i, options = list(keep_na = keep_na))) + call_function("filter", self, i, options = list(keep_na = keep_na)) }, RangeEquals = function(other, start_idx, end_idx, other_start_idx = 0L) { assert_is(other, "Array") @@ -162,17 +140,14 @@ Array <- R6Class("Array", active = list( null_count = function() Array__null_count(self), offset = function() Array__offset(self), - type = function() DataType$create(Array__type(self)) + type = function() Array__type(self) ) ) Array$create <- function(x, type = NULL) { - if (!inherits(x, "externalptr")) { - if (!is.null(type)) { - type <- as_type(type) - } - x <- Array__from_vector(x, type) + if (!is.null(type)) { + type <- as_type(type) } - shared_ptr(Array, x)$..dispatch() + Array__from_vector(x, type) } #' @rdname array @@ -181,8 +156,8 @@ Array$create <- function(x, type = NULL) { #' @export DictionaryArray <- R6Class("DictionaryArray", inherit = Array, public = list( - indices = function() Array$create(DictionaryArray__indices(self)), - dictionary = function() Array$create(DictionaryArray__dictionary(self)) + indices = function() DictionaryArray__indices(self), + dictionary = function() DictionaryArray__dictionary(self) ), active = list( ordered = function() self$type$ordered @@ -203,7 +178,7 @@ DictionaryArray$create <- function(x, dict = NULL) { dict <- Array$create(dict) } type <- DictionaryType$create(x$type, dict$type) - shared_ptr(DictionaryArray, DictionaryArray__FromArrays(type, x, dict)) + DictionaryArray__FromArrays(type, x, dict) } #' @rdname array @@ -212,9 +187,9 @@ DictionaryArray$create <- function(x, dict = NULL) { #' @export StructArray <- R6Class("StructArray", inherit = Array, public = list( - field = function(i) Array$create(StructArray__field(self, i)), - GetFieldByName = function(name) Array$create(StructArray__GetFieldByName(self, name)), - Flatten = function() map(StructArray__Flatten(self), ~ Array$create(.x)) + field = function(i) StructArray__field(self, i), + GetFieldByName = function(name) StructArray__GetFieldByName(self, name), + Flatten = function() StructArray__Flatten(self) ) ) @@ -224,13 +199,13 @@ StructArray <- R6Class("StructArray", inherit = Array, #' @export ListArray <- R6Class("ListArray", inherit = Array, public = list( - values = function() Array$create(ListArray__values(self)), + values = function() ListArray__values(self), value_length = function(i) ListArray__value_length(self, i), value_offset = function(i) ListArray__value_offset(self, i), raw_value_offsets = function() ListArray__raw_value_offsets(self) ), active = list( - value_type = function() DataType$create(ListArray__value_type(self)) + value_type = function() ListArray__value_type(self) ) ) @@ -240,13 +215,13 @@ ListArray <- R6Class("ListArray", inherit = Array, #' @export LargeListArray <- R6Class("LargeListArray", inherit = Array, public = list( - values = function() Array$create(LargeListArray__values(self)), + values = function() LargeListArray__values(self), value_length = function(i) LargeListArray__value_length(self, i), value_offset = function(i) LargeListArray__value_offset(self, i), raw_value_offsets = function() LargeListArray__raw_value_offsets(self) ), active = list( - value_type = function() DataType$create(LargeListArray__value_type(self)) + value_type = function() LargeListArray__value_type(self) ) ) @@ -256,12 +231,12 @@ LargeListArray <- R6Class("LargeListArray", inherit = Array, #' @export FixedSizeListArray <- R6Class("FixedSizeListArray", inherit = Array, public = list( - values = function() Array$create(FixedSizeListArray__values(self)), + values = function() FixedSizeListArray__values(self), value_length = function(i) FixedSizeListArray__value_length(self, i), value_offset = function(i) FixedSizeListArray__value_offset(self, i) ), active = list( - value_type = function() DataType$create(FixedSizeListArray__value_type(self)), + value_type = function() FixedSizeListArray__value_type(self), list_size = function() self$type$list_size ) ) @@ -270,7 +245,7 @@ FixedSizeListArray <- R6Class("FixedSizeListArray", inherit = Array, length.Array <- function(x) x$length() #' @export -is.na.Array <- function(x) shared_ptr(Array, call_function("is_null", x)) +is.na.Array <- function(x) call_function("is_null", x) #' @export as.vector.Array <- function(x, mode) x$as_vector() diff --git a/r/R/arrow-package.R b/r/R/arrow-package.R index bb32efd33d8..5d1dbbebdd3 100644 --- a/r/R/arrow-package.R +++ b/r/R/arrow-package.R @@ -108,6 +108,10 @@ ArrowObject <- R6Class("ArrowObject", cat(self$ToString(), "\n", sep = "") } invisible(self) + }, + + invalidate = function() { + assign(".:xp:.", NULL, envir = self) } ) ) @@ -125,10 +129,3 @@ all.equal.ArrowObject <- function(target, current, ..., check.attributes = TRUE) target$Equals(current, check_metadata = check.attributes) } -shared_ptr <- function(class, xp) { - if (!shared_ptr_is_null(xp)) class$new(xp) -} - -unique_ptr <- function(class, xp) { - if (!unique_ptr_is_null(xp)) class$new(xp) -} diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index cf83d695a67..991925d09fc 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -512,14 +512,6 @@ dataset___Dataset__Write <- function(file_write_options, filesystem, base_dir, p invisible(.Call(`_arrow_dataset___Dataset__Write` , file_write_options, filesystem, base_dir, partitioning, basename_template, scanner)) } -shared_ptr_is_null <- function(xp){ - .Call(`_arrow_shared_ptr_is_null` , xp) -} - -unique_ptr_is_null <- function(xp){ - .Call(`_arrow_unique_ptr_is_null` , xp) -} - Int8__initialize <- function(){ .Call(`_arrow_Int8__initialize` ) } @@ -644,12 +636,12 @@ DataType__Equals <- function(lhs, rhs){ .Call(`_arrow_DataType__Equals` , lhs, rhs) } -DataType__num_children <- function(type){ - .Call(`_arrow_DataType__num_children` , type) +DataType__num_fields <- function(type){ + .Call(`_arrow_DataType__num_fields` , type) } -DataType__children_pointer <- function(type){ - .Call(`_arrow_DataType__children_pointer` , type) +DataType__fields <- function(type){ + .Call(`_arrow_DataType__fields` , type) } DataType__id <- function(type){ diff --git a/r/R/buffer.R b/r/R/buffer.R index 7829e231cf5..db61ed36d78 100644 --- a/r/R/buffer.R +++ b/r/R/buffer.R @@ -55,7 +55,7 @@ Buffer$create <- function(x) { if (inherits(x, "Buffer")) { x } else if (inherits(x, c("raw", "numeric", "integer", "complex"))) { - shared_ptr(Buffer, r___RBuffer__initialize(x)) + r___RBuffer__initialize(x) } else if (inherits(x, "BufferOutputStream")) { x$finish() } else { diff --git a/r/R/chunked-array.R b/r/R/chunked-array.R index d2475eb9a76..89cd6e4f01a 100644 --- a/r/R/chunked-array.R +++ b/r/R/chunked-array.R @@ -63,9 +63,9 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowObject, as_vector = function() ChunkedArray__as_vector(self), Slice = function(offset, length = NULL){ if (is.null(length)) { - shared_ptr(ChunkedArray, ChunkedArray__Slice1(self, offset)) + ChunkedArray__Slice1(self, offset) } else { - shared_ptr(ChunkedArray, ChunkedArray__Slice2(self, offset, length)) + ChunkedArray__Slice2(self, offset, length) } }, Take = function(i) { @@ -75,20 +75,20 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowObject, if (is.integer(i)) { i <- Array$create(i) } - shared_ptr(ChunkedArray, call_function("take", self, i)) + call_function("take", self, i) }, Filter = function(i, keep_na = TRUE) { if (is.logical(i)) { i <- Array$create(i) } - shared_ptr(ChunkedArray, call_function("filter", self, i, options = list(keep_na = keep_na))) + call_function("filter", self, i, options = list(keep_na = keep_na)) }, cast = function(target_type, safe = TRUE, options = cast_options(safe)) { assert_is(options, "CastOptions") - shared_ptr(ChunkedArray, ChunkedArray__cast(self, as_type(target_type), options)) + ChunkedArray__cast(self, as_type(target_type), options) }, View = function(type) { - shared_ptr(ChunkedArray, ChunkedArray__View(self, as_type(type))) + ChunkedArray__View(self, as_type(type)) }, Validate = function() { ChunkedArray__Validate(self) @@ -104,7 +104,7 @@ ChunkedArray <- R6Class("ChunkedArray", inherit = ArrowObject, null_count = function() ChunkedArray__null_count(self), num_chunks = function() ChunkedArray__num_chunks(self), chunks = function() map(ChunkedArray__chunks(self), Array$create), - type = function() DataType$create(ChunkedArray__type(self)) + type = function() ChunkedArray__type(self) ) ) @@ -112,7 +112,7 @@ ChunkedArray$create <- function(..., type = NULL) { if (!is.null(type)) { type <- as_type(type) } - shared_ptr(ChunkedArray, ChunkedArray__from_list(list2(...), type)) + ChunkedArray__from_list(list2(...), type) } #' @param \dots Vectors to coerce @@ -128,7 +128,7 @@ length.ChunkedArray <- function(x) x$length() as.vector.ChunkedArray <- function(x, mode) x$as_vector() #' @export -is.na.ChunkedArray <- function(x) shared_ptr(ChunkedArray, call_function("is_null", x)) +is.na.ChunkedArray <- function(x) call_function("is_null", x) #' @export `[.ChunkedArray` <- filter_rows diff --git a/r/R/compression.R b/r/R/compression.R index 2318f1b841b..ebd4c54cd82 100644 --- a/r/R/compression.R +++ b/r/R/compression.R @@ -46,9 +46,9 @@ Codec <- R6Class("Codec", inherit = ArrowObject, ) Codec$create <- function(type = "gzip", compression_level = NA) { if (is.string(type)) { - type <- shared_ptr(Codec, util___Codec__Create( + type <- util___Codec__Create( compression_from_name(type), compression_level - )) + ) } assert_is(type, "Codec") type @@ -103,7 +103,7 @@ CompressedOutputStream$create <- function(stream, codec = "gzip", compression_le stream <- FileOutputStream$create(stream) } assert_is(stream, "OutputStream") - shared_ptr(CompressedOutputStream, io___CompressedOutputStream__Make(codec, stream)) + io___CompressedOutputStream__Make(codec, stream) } #' @rdname compression @@ -117,5 +117,5 @@ CompressedInputStream$create <- function(stream, codec = "gzip", compression_lev stream <- ReadableFile$create(stream) } assert_is(stream, "InputStream") - shared_ptr(CompressedInputStream, io___CompressedInputStream__Make(codec, stream)) + io___CompressedInputStream__Make(codec, stream) } diff --git a/r/R/compute.R b/r/R/compute.R index 691d59e3666..2c067c238ab 100644 --- a/r/R/compute.R +++ b/r/R/compute.R @@ -76,7 +76,7 @@ scalar_aggregate <- function(FUN, ..., na.rm = FALSE) { return(Scalar$create(NA_real_)) } - Scalar$create(call_function(FUN, a, options = list(na.rm = na.rm))) + call_function(FUN, a, options = list(na.rm = na.rm)) } collect_arrays_from_dots <- function(dots) { @@ -100,7 +100,7 @@ collect_arrays_from_dots <- function(dots) { #' @export unique.Array <- function(x, incomparables = FALSE, ...) { - Array$create(call_function("unique", x)) + call_function("unique", x) } #' @export @@ -127,16 +127,11 @@ match_arrow.Array <- function(x, table, ...) { if (!inherits(table, c("Array", "ChunkedArray"))) { table <- Array$create(table) } - Array$create(call_function("index_in_meta_binary", x, table)) + call_function("index_in_meta_binary", x, table) } #' @export -match_arrow.ChunkedArray <- function(x, table, ...) { - if (!inherits(table, c("Array", "ChunkedArray"))) { - table <- Array$create(table) - } - shared_ptr(ChunkedArray, call_function("index_in_meta_binary", x, table)) -} +match_arrow.ChunkedArray <- match_arrow.Array CastOptions <- R6Class("CastOptions", inherit = ArrowObject) @@ -152,7 +147,5 @@ cast_options <- function(safe = TRUE, allow_int_overflow = !safe, allow_time_truncate = !safe, allow_float_truncate = !safe) { - shared_ptr(CastOptions, - compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate) - ) + compute___CastOptions__initialize(allow_int_overflow, allow_time_truncate, allow_float_truncate) } diff --git a/r/R/csv.R b/r/R/csv.R index 24a0f11312f..160c46e4753 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -289,7 +289,7 @@ read_tsv_arrow <- function(file, #' @export CsvTableReader <- R6Class("CsvTableReader", inherit = ArrowObject, public = list( - Read = function() shared_ptr(Table, csv___TableReader__Read(self)) + Read = function() csv___TableReader__Read(self) ) ) CsvTableReader$create <- function(file, @@ -298,10 +298,7 @@ CsvTableReader$create <- function(file, convert_options = CsvConvertOptions$create(), ...) { assert_is(file, "InputStream") - shared_ptr( - CsvTableReader, - csv___TableReader__Make(file, read_options, parse_options, convert_options) - ) + csv___TableReader__Make(file, read_options, parse_options, convert_options) } #' @title File reader options @@ -400,8 +397,7 @@ CsvReadOptions$create <- function(use_threads = option_use_threads(), skip_rows = 0L, column_names = character(0), autogenerate_column_names = FALSE) { - - shared_ptr(CsvReadOptions, csv___ReadOptions__initialize( + csv___ReadOptions__initialize( list( use_threads = use_threads, block_size = block_size, @@ -409,7 +405,7 @@ CsvReadOptions$create <- function(use_threads = option_use_threads(), column_names = column_names, autogenerate_column_names = autogenerate_column_names ) - )) + ) } readr_to_csv_read_options <- function(skip, col_names, col_types) { @@ -439,7 +435,7 @@ CsvParseOptions$create <- function(delimiter = ",", newlines_in_values = FALSE, ignore_empty_lines = TRUE) { - shared_ptr(CsvParseOptions, csv___ParseOptions__initialize( + csv___ParseOptions__initialize( list( delimiter = delimiter, quoting = quoting, @@ -450,7 +446,7 @@ CsvParseOptions$create <- function(delimiter = ",", newlines_in_values = newlines_in_values, ignore_empty_lines = ignore_empty_lines ) - )) + ) } readr_to_csv_parse_options <- function(delim = ",", @@ -485,9 +481,9 @@ TimestampParser <- R6Class("TimestampParser", inherit = ArrowObject, ) TimestampParser$create <- function(format = NULL) { if (is.null(format)) { - shared_ptr(TimestampParser, TimestampParser__MakeISO8601()) + TimestampParser__MakeISO8601() } else { - shared_ptr(TimestampParser, TimestampParser__MakeStrptime(format)) + TimestampParser__MakeStrptime(format) } } @@ -516,7 +512,7 @@ CsvConvertOptions$create <- function(check_utf8 = TRUE, )) } - shared_ptr(CsvConvertOptions, csv___ConvertOptions__initialize( + csv___ConvertOptions__initialize( list( check_utf8 = check_utf8, null_values = null_values, @@ -530,7 +526,7 @@ CsvConvertOptions$create <- function(check_utf8 = TRUE, include_missing_columns = include_missing_columns, timestamp_parsers = timestamp_parsers ) - )) + ) } readr_to_csv_convert_options <- function(na, diff --git a/r/R/dataset-factory.R b/r/R/dataset-factory.R index 8b3439c419d..c289cf0c8a6 100644 --- a/r/R/dataset-factory.R +++ b/r/R/dataset-factory.R @@ -25,14 +25,13 @@ DatasetFactory <- R6Class("DatasetFactory", inherit = ArrowObject, public = list( Finish = function(schema = NULL, unify_schemas = FALSE) { if (is.null(schema)) { - ptr <- dataset___DatasetFactory__Finish1(self, unify_schemas) + dataset___DatasetFactory__Finish1(self, unify_schemas) } else { - ptr <- dataset___DatasetFactory__Finish2(self, schema) + dataset___DatasetFactory__Finish2(self, schema) } - shared_ptr(Dataset, ptr)$..dispatch() }, Inspect = function(unify_schemas = FALSE) { - shared_ptr(Schema, dataset___DatasetFactory__Inspect(self, unify_schemas)) + dataset___DatasetFactory__Inspect(self, unify_schemas) } ) ) @@ -42,7 +41,7 @@ DatasetFactory$create <- function(x, partitioning = NULL, ...) { if (is_list_of(x, "DatasetFactory")) { - return(shared_ptr(DatasetFactory, dataset___UnionDatasetFactory__Make(x))) + return(dataset___UnionDatasetFactory__Make(x)) } path_and_fs <- get_path_and_filesystem(x, filesystem) @@ -143,5 +142,5 @@ FileSystemDatasetFactory$create <- function(filesystem, ) } - shared_ptr(FileSystemDatasetFactory, ptr) + ptr } diff --git a/r/R/dataset-format.R b/r/R/dataset-format.R index 8300e415e2c..eb57b893e0c 100644 --- a/r/R/dataset-format.R +++ b/r/R/dataset-format.R @@ -49,20 +49,6 @@ #' @name FileFormat #' @export FileFormat <- R6Class("FileFormat", inherit = ArrowObject, - public = list( - ..dispatch = function() { - type <- self$type - if (type == "parquet") { - shared_ptr(ParquetFileFormat, self$pointer()) - } else if (type == "ipc") { - shared_ptr(IpcFileFormat, self$pointer()) - } else if (type == "csv") { - shared_ptr(CsvFileFormat, self$pointer()) - } else { - self - } - } - ), active = list( # @description # Return the `FileFormat`'s type @@ -78,7 +64,7 @@ FileFormat$create <- function(format, ...) { } else if (format == "parquet") { ParquetFileFormat$create(...) } else if (format %in% c("ipc", "arrow", "feather")) { # These are aliases for the same thing - shared_ptr(IpcFileFormat, dataset___IpcFileFormat__Make()) + dataset___IpcFileFormat__Make() } else { stop("Unsupported file format: ", format, call. = FALSE) } @@ -99,8 +85,7 @@ ParquetFileFormat <- R6Class("ParquetFileFormat", inherit = FileFormat) ParquetFileFormat$create <- function(use_buffered_stream = FALSE, buffer_size = 8196, dict_columns = character(0)) { - shared_ptr(ParquetFileFormat, dataset___ParquetFileFormat__Make( - use_buffered_stream, buffer_size, dict_columns)) + dataset___ParquetFileFormat__Make(use_buffered_stream, buffer_size, dict_columns) } #' @usage NULL @@ -115,7 +100,7 @@ IpcFileFormat <- R6Class("IpcFileFormat", inherit = FileFormat) #' @export CsvFileFormat <- R6Class("CsvFileFormat", inherit = FileFormat) CsvFileFormat$create <- function(..., opts = csv_file_format_parse_options(...)) { - shared_ptr(CsvFileFormat, dataset___CsvFileFormat__Make(opts)) + dataset___CsvFileFormat__Make(opts) } csv_file_format_parse_options <- function(...) { @@ -163,6 +148,6 @@ FileWriteOptions$create <- function(format, ...) { if (!inherits(format, "FileFormat")) { format <- FileFormat$create(format) } - options <- shared_ptr(FileWriteOptions, dataset___FileFormat__DefaultWriteOptions(format)) + options <- dataset___FileFormat__DefaultWriteOptions(format) options$update(...) } diff --git a/r/R/dataset-partition.R b/r/R/dataset-partition.R index be47406a6d1..700a57c69a0 100644 --- a/r/R/dataset-partition.R +++ b/r/R/dataset-partition.R @@ -55,18 +55,14 @@ Partitioning <- R6Class("Partitioning", inherit = ArrowObject) #' @rdname Partitioning #' @export DirectoryPartitioning <- R6Class("DirectoryPartitioning", inherit = Partitioning) -DirectoryPartitioning$create <- function(schema) { - shared_ptr(DirectoryPartitioning, dataset___DirectoryPartitioning(schema)) -} +DirectoryPartitioning$create <- dataset___DirectoryPartitioning #' @usage NULL #' @format NULL #' @rdname Partitioning #' @export HivePartitioning <- R6Class("HivePartitioning", inherit = Partitioning) -HivePartitioning$create <- function(schema) { - shared_ptr(HivePartitioning, dataset___HivePartitioning(schema)) -} +HivePartitioning$create <- dataset___HivePartitioning #' Construct Hive partitioning #' @@ -99,15 +95,11 @@ PartitioningFactory <- R6Class("PartitioningFactory", inherit = ArrowObject) #' @rdname Partitioning #' @export DirectoryPartitioningFactory <- R6Class("DirectoryPartitioningFactory ", inherit = PartitioningFactory) -DirectoryPartitioningFactory$create <- function(x) { - shared_ptr(DirectoryPartitioningFactory, dataset___DirectoryPartitioning__MakeFactory(x)) -} +DirectoryPartitioningFactory$create <- dataset___DirectoryPartitioning__MakeFactory #' @usage NULL #' @format NULL #' @rdname Partitioning #' @export HivePartitioningFactory <- R6Class("HivePartitioningFactory", inherit = PartitioningFactory) -HivePartitioningFactory$create <- function() { - shared_ptr(HivePartitioningFactory, dataset___HivePartitioning__MakeFactory()) -} +HivePartitioningFactory$create <- dataset___HivePartitioning__MakeFactory diff --git a/r/R/dataset-scan.R b/r/R/dataset-scan.R index e9017825782..45fc968ed08 100644 --- a/r/R/dataset-scan.R +++ b/r/R/dataset-scan.R @@ -55,11 +55,11 @@ #' @export Scanner <- R6Class("Scanner", inherit = ArrowObject, public = list( - ToTable = function() shared_ptr(Table, dataset___Scanner__ToTable(self)), - Scan = function() map(dataset___Scanner__Scan(self), shared_ptr, class = ScanTask) + ToTable = function() dataset___Scanner__ToTable(self), + Scan = function() dataset___Scanner__Scan(self) ), active = list( - schema = function() shared_ptr(Schema, dataset___Scanner__schema(self)) + schema = function() dataset___Scanner__schema(self) ) ) Scanner$create <- function(dataset, @@ -103,7 +103,7 @@ names.Scanner <- function(x) names(x$schema) ScanTask <- R6Class("ScanTask", inherit = ArrowObject, public = list( - Execute = function() map(dataset___ScanTask__get_batches(self), shared_ptr, class = RecordBatch) + Execute = function() dataset___ScanTask__get_batches(self) ) ) @@ -169,10 +169,10 @@ ScannerBuilder <- R6Class("ScannerBuilder", inherit = ArrowObject, dataset___ScannerBuilder__BatchSize(self, batch_size) self }, - Finish = function() unique_ptr(Scanner, dataset___ScannerBuilder__Finish(self)) + Finish = function() dataset___ScannerBuilder__Finish(self) ), active = list( - schema = function() shared_ptr(Schema, dataset___ScannerBuilder__schema(self)) + schema = function() dataset___ScannerBuilder__schema(self) ) ) diff --git a/r/R/dataset.R b/r/R/dataset.R index 7b1d6609295..e990ff3cb86 100644 --- a/r/R/dataset.R +++ b/r/R/dataset.R @@ -79,7 +79,7 @@ open_dataset <- function(sources, x$schema <- schema x }) - return(shared_ptr(UnionDataset, dataset___UnionDataset__create(sources, schema))) + return(dataset___UnionDataset__create(sources, schema)) } factory <- DatasetFactory$create(sources, partitioning = partitioning, ...) # Default is _not_ to inspect/unify schemas @@ -145,29 +145,19 @@ open_dataset <- function(sources, #' @seealso [open_dataset()] for a simple interface to creating a `Dataset` Dataset <- R6Class("Dataset", inherit = ArrowObject, public = list( - ..dispatch = function() { - type <- self$type - if (type == "union") { - shared_ptr(UnionDataset, self$pointer()) - } else if (type == "filesystem") { - shared_ptr(FileSystemDataset, self$pointer()) - } else { - self - } - }, # @description # Start a new scan of the data # @return A [ScannerBuilder] - NewScan = function() unique_ptr(ScannerBuilder, dataset___Dataset__NewScan(self)), + NewScan = function() dataset___Dataset__NewScan(self), ToString = function() self$schema$ToString() ), active = list( schema = function(schema) { if (missing(schema)) { - shared_ptr(Schema, dataset___Dataset__schema(self)) + dataset___Dataset__schema(self) } else { assert_is(schema, "Schema") - invisible(shared_ptr(Dataset, dataset___Dataset__ReplaceSchema(self, schema))) + invisible(dataset___Dataset__ReplaceSchema(self, schema)) } }, metadata = function() self$schema$metadata, @@ -212,12 +202,12 @@ FileSystemDataset <- R6Class("FileSystemDataset", inherit = Dataset, # @description # Return the format of files in this `Dataset` format = function() { - shared_ptr(FileFormat, dataset___FileSystemDataset__format(self))$..dispatch() + dataset___FileSystemDataset__format(self) }, # @description # Return the filesystem of files in this `Dataset` filesystem = function() { - shared_ptr(FileSystem, dataset___FileSystemDataset__filesystem(self))$..dispatch() + dataset___FileSystemDataset__filesystem(self) }, num_rows = function() { if (inherits(self$format, "ParquetFileFormat")) { @@ -244,7 +234,7 @@ UnionDataset <- R6Class("UnionDataset", inherit = Dataset, # @description # Return the UnionDataset's child `Dataset`s children = function() { - map(dataset___UnionDataset__children(self), ~shared_ptr(Dataset, .)$..dispatch()) + dataset___UnionDataset__children(self) } ) ) @@ -257,7 +247,7 @@ InMemoryDataset$create <- function(x) { if (!inherits(x, "Table")) { x <- Table$create(x) } - shared_ptr(InMemoryDataset, dataset___InMemoryDataset__create(x)) + dataset___InMemoryDataset__create(x) } @@ -274,7 +264,7 @@ c.Dataset <- function(...) Dataset$create(list(...)) head.Dataset <- function(x, n = 6L, ...) { assert_that(n > 0) # For now scanner <- Scanner$create(ensure_group_vars(x)) - shared_ptr(Table, dataset___Scanner__head(scanner, n)) + dataset___Scanner__head(scanner, n) } #' @export @@ -284,7 +274,7 @@ tail.Dataset <- function(x, n = 6L, ...) { batch_num <- 0 scanner <- Scanner$create(ensure_group_vars(x)) for (scan_task in rev(dataset___Scanner__Scan(scanner))) { - for (batch in rev(shared_ptr(ScanTask, scan_task)$Execute())) { + for (batch in rev(scan_task$Execute())) { batch_num <- batch_num + 1 result[[batch_num]] <- tail(batch, n) n <- n - nrow(batch) @@ -321,7 +311,7 @@ take_dataset_rows <- function(x, i) { i <- sort(i) - 1L scanner <- Scanner$create(ensure_group_vars(x)) for (scan_task in dataset___Scanner__Scan(scanner)) { - for (batch in shared_ptr(ScanTask, scan_task)$Execute()) { + for (batch in scan_task$Execute()) { # Take all rows that are in this batch this_batch_nrows <- batch$num_rows in_this_batch <- i > -1L & i < this_batch_nrows diff --git a/r/R/dictionary.R b/r/R/dictionary.R index 6273ffc2c87..b701768d661 100644 --- a/r/R/dictionary.R +++ b/r/R/dictionary.R @@ -37,8 +37,8 @@ DictionaryType <- R6Class("DictionaryType", } ), active = list( - index_type = function() DataType$create(DictionaryType__index_type(self)), - value_type = function() DataType$create(DictionaryType__value_type(self)), + index_type = function() DictionaryType__index_type(self), + value_type = function() DictionaryType__value_type(self), name = function() DictionaryType__name(self), ordered = function() DictionaryType__ordered(self) ) @@ -48,7 +48,7 @@ DictionaryType$create <- function(index_type = int32(), ordered = FALSE) { assert_is(index_type, "DataType") assert_is(value_type, "DataType") - shared_ptr(DictionaryType, DictionaryType__initialize(index_type, value_type, ordered)) + DictionaryType__initialize(index_type, value_type, ordered) } #' Create a dictionary type diff --git a/r/R/dplyr.R b/r/R/dplyr.R index fd69c852e68..f3dd078c952 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -285,15 +285,18 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) { # Pull only the selected rows and cols into R if (query_on_dataset(x)) { # See dataset.R for Dataset and Scanner(Builder) classes - df <- Scanner$create(x)$ToTable() + tab <- Scanner$create(x)$ToTable() } else { # This is a Table/RecordBatch. See record-batch.R for the [ method - df <- x$.data[x$filtered_rows, x$selected_columns, keep_na = FALSE] + tab <- x$.data[x$filtered_rows, x$selected_columns, keep_na = FALSE] } if (as_data_frame) { - df <- as.data.frame(df) + df <- as.data.frame(tab) + tab$invalidate() + restore_dplyr_features(df, x) + } else { + restore_dplyr_features(tab, x) } - restore_dplyr_features(df, x) } collect.Table <- as.data.frame.Table collect.RecordBatch <- as.data.frame.RecordBatch diff --git a/r/R/expression.R b/r/R/expression.R index 092c7caed80..d5623fb7786 100644 --- a/r/R/expression.R +++ b/r/R/expression.R @@ -20,14 +20,12 @@ array_expression <- function(FUN, ..., args = list(...), - options = empty_named_list(), - result_class = .guess_result_class(args[[1]])) { + options = empty_named_list()) { structure( list( fun = FUN, args = args, - options = options, - result_class = result_class + options = options ), class = "array_expression" ) @@ -36,29 +34,22 @@ array_expression <- function(FUN, #' @export Ops.Array <- function(e1, e2) { if (.Generic %in% names(.array_function_map)) { - expr <- build_array_expression(.Generic, e1, e2, result_class = "Array") + expr <- build_array_expression(.Generic, e1, e2) eval_array_expression(expr) } else { - stop("Unsupported operation on Array: ", .Generic, call. = FALSE) + stop(paste0("Unsupported operation on `", class(e1)[1L], "` : "), .Generic, call. = FALSE) } } #' @export -Ops.ChunkedArray <- function(e1, e2) { - if (.Generic %in% names(.array_function_map)) { - expr <- build_array_expression(.Generic, e1, e2, result_class = "ChunkedArray") - eval_array_expression(expr) - } else { - stop("Unsupported operation on ChunkedArray: ", .Generic, call. = FALSE) - } -} +Ops.ChunkedArray <- Ops.Array #' @export Ops.array_expression <- function(e1, e2) { if (.Generic == "!") { - build_array_expression(.Generic, e1, result_class = e1$result_class) + build_array_expression(.Generic, e1) } else { - build_array_expression(.Generic, e1, e2, result_class = e1$result_class) + build_array_expression(.Generic, e1, e2) } } @@ -105,17 +96,6 @@ build_array_expression <- function(.Generic, e1, e2, ...) { .array_function_map <- c(.unary_function_map, .binary_function_map) -.guess_result_class <- function(arg) { - # HACK HACK HACK delete this when call_function returns an ArrowObject itself - if (inherits(arg, "ArrowObject")) { - return(class(arg)[1]) - } else if (inherits(arg, "array_expression")) { - return(arg$result_class) - } else { - stop("Not implemented") - } -} - eval_array_expression <- function(x) { x$args <- lapply(x$args, function (a) { if (inherits(a, "array_expression")) { @@ -124,8 +104,7 @@ eval_array_expression <- function(x) { a } }) - ptr <- call_function(x$fun, args = x$args, options = x$options %||% empty_named_list()) - shared_ptr(get(x$result_class), ptr) + call_function(x$fun, args = x$args, options = x$options %||% empty_named_list()) } #' @export @@ -198,17 +177,17 @@ Expression <- R6Class("Expression", inherit = ArrowObject, Expression$field_ref <- function(name) { assert_is(name, "character") assert_that(length(name) == 1) - shared_ptr(Expression, dataset___expr__field_ref(name)) + dataset___expr__field_ref(name) } Expression$scalar <- function(x) { - shared_ptr(Expression, dataset___expr__scalar(Scalar$create(x))) + dataset___expr__scalar(Scalar$create(x)) } Expression$compare <- function(OP, e1, e2) { comp_func <- comparison_function_map[[OP]] if (is.null(comp_func)) { stop(OP, " is not a supported comparison function", call. = FALSE) } - shared_ptr(Expression, comp_func(e1, e2)) + comp_func(e1, e2) } comparison_function_map <- list( @@ -220,19 +199,19 @@ comparison_function_map <- list( "<=" = dataset___expr__less_equal ) Expression$in_ <- function(x, set) { - shared_ptr(Expression, dataset___expr__in(x, Array$create(set))) + dataset___expr__in(x, Array$create(set)) } Expression$and <- function(e1, e2) { - shared_ptr(Expression, dataset___expr__and(e1, e2)) + dataset___expr__and(e1, e2) } Expression$or <- function(e1, e2) { - shared_ptr(Expression, dataset___expr__or(e1, e2)) + dataset___expr__or(e1, e2) } Expression$not <- function(e1) { - shared_ptr(Expression, dataset___expr__not(e1)) + dataset___expr__not(e1) } Expression$is_valid <- function(e1) { - shared_ptr(Expression, dataset___expr__is_valid(e1)) + dataset___expr__is_valid(e1) } #' @export @@ -248,7 +227,7 @@ make_expression <- function(operator, e1, e2) { # In doesn't take Scalar, it takes Array return(Expression$in_(e1, e2)) } - + # Handle unary functions before touching e2 if (operator == "is.na") { return(is.na(e1)) diff --git a/r/R/feather.R b/r/R/feather.R index 52f8b59ece6..6d29b7d0b89 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -192,7 +192,7 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, ...) { FeatherReader <- R6Class("FeatherReader", inherit = ArrowObject, public = list( Read = function(columns) { - shared_ptr(Table, ipc___feather___Reader__Read(self, columns)) + ipc___feather___Reader__Read(self, columns) } ), active = list( @@ -207,5 +207,5 @@ names.FeatherReader <- function(x) x$column_names FeatherReader$create <- function(file, mmap = TRUE, ...) { assert_is(file, "RandomAccessFile") - shared_ptr(FeatherReader, ipc___feather___Reader__Open(file)) + ipc___feather___Reader__Open(file) } diff --git a/r/R/field.R b/r/R/field.R index 4053a9fa401..33549d344c5 100644 --- a/r/R/field.R +++ b/r/R/field.R @@ -49,7 +49,7 @@ Field <- R6Class("Field", inherit = ArrowObject, Field__nullable(self) }, type = function() { - DataType$create(Field__type(self)) + Field__type(self) } ) ) @@ -57,7 +57,7 @@ Field$create <- function(name, type, metadata) { assert_that(inherits(name, "character"), length(name) == 1L) type <- as_type(type, name) assert_that(missing(metadata), msg = "metadata= is currently ignored") - shared_ptr(Field, Field__initialize(enc2utf8(name), type, TRUE)) + Field__initialize(enc2utf8(name), type, TRUE) } #' @param name field name diff --git a/r/R/filesystem.R b/r/R/filesystem.R index f8b32b8b312..ed0bc7adc7a 100644 --- a/r/R/filesystem.R +++ b/r/R/filesystem.R @@ -108,10 +108,7 @@ FileSelector <- R6Class("FileSelector", ) FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = FALSE) { - shared_ptr( - FileSelector, - fs___FileSelector__create(clean_path_rel(base_dir), allow_not_found, recursive) - ) + fs___FileSelector__create(clean_path_rel(base_dir), allow_not_found, recursive) } #' @title FileSystem classes @@ -203,31 +200,11 @@ FileSelector$create <- function(base_dir, allow_not_found = FALSE, recursive = F #' @export FileSystem <- R6Class("FileSystem", inherit = ArrowObject, public = list( - ..dispatch = function() { - type_name <- self$type_name - if (type_name == "local") { - shared_ptr(LocalFileSystem, self$pointer()) - } else if (type_name == "s3") { - shared_ptr(S3FileSystem, self$pointer()) - } else if (type_name == "subtree") { - shared_ptr(SubTreeFileSystem, self$pointer()) - } else { - self - } - }, GetFileInfo = function(x) { if (inherits(x, "FileSelector")) { - map( - fs___FileSystem__GetTargetInfos_FileSelector(self, x), - shared_ptr, - class = FileInfo - ) + fs___FileSystem__GetTargetInfos_FileSelector(self, x) } else if (is.character(x)){ - map( - fs___FileSystem__GetTargetInfos_Paths(self, clean_path_rel(x)), - shared_ptr, - class = FileInfo - ) + fs___FileSystem__GetTargetInfos_Paths(self, clean_path_rel(x)) } else { abort("incompatible type for FileSystem$GetFileInfo()") } @@ -262,16 +239,16 @@ FileSystem <- R6Class("FileSystem", inherit = ArrowObject, }, OpenInputStream = function(path) { - shared_ptr(InputStream, fs___FileSystem__OpenInputStream(self, clean_path_rel(path))) + fs___FileSystem__OpenInputStream(self, clean_path_rel(path)) }, OpenInputFile = function(path) { - shared_ptr(RandomAccessFile, fs___FileSystem__OpenInputFile(self, clean_path_rel(path))) + fs___FileSystem__OpenInputFile(self, clean_path_rel(path)) }, OpenOutputStream = function(path) { - shared_ptr(OutputStream, fs___FileSystem__OpenOutputStream(self, clean_path_rel(path))) + fs___FileSystem__OpenOutputStream(self, clean_path_rel(path)) }, OpenAppendStream = function(path) { - shared_ptr(OutputStream, fs___FileSystem__OpenAppendStream(self, clean_path_rel(path))) + fs___FileSystem__OpenAppendStream(self, clean_path_rel(path)) }, # Friendlier R user interface @@ -293,9 +270,7 @@ FileSystem <- R6Class("FileSystem", inherit = ArrowObject, ) FileSystem$from_uri <- function(uri) { assert_that(is.string(uri)) - out <- fs___FileSystemFromUri(uri) - out$fs <- shared_ptr(FileSystem, out$fs)$..dispatch() - out + fs___FileSystemFromUri(uri) } get_path_and_filesystem <- function(x, filesystem = NULL) { @@ -326,7 +301,7 @@ is_url <- function(x) is.string(x) && grepl("://", x) #' @export LocalFileSystem <- R6Class("LocalFileSystem", inherit = FileSystem) LocalFileSystem$create <- function() { - shared_ptr(LocalFileSystem, fs___LocalFileSystem__create()) + fs___LocalFileSystem__create() } #' @usage NULL @@ -369,7 +344,7 @@ S3FileSystem$create <- function(anonymous = FALSE, ...) { } } args <- c(modifyList(default_s3_options, args), anonymous = anonymous) - shared_ptr(S3FileSystem, exec(fs___S3FileSystem__create, !!!args)) + exec(fs___S3FileSystem__create, !!!args) } default_s3_options <- list( @@ -429,17 +404,14 @@ s3_bucket <- function(bucket, ...) { SubTreeFileSystem <- R6Class("SubTreeFileSystem", inherit = FileSystem, active = list( base_fs = function() { - shared_ptr(FileSystem, fs___SubTreeFileSystem__base_fs(self))$..dispatch() + fs___SubTreeFileSystem__base_fs(self) }, base_path = function() fs___SubTreeFileSystem__base_path(self) ) ) SubTreeFileSystem$create <- function(base_path, base_fs = NULL) { fs_and_path <- get_path_and_filesystem(base_path, base_fs) - shared_ptr( - SubTreeFileSystem, - fs___SubTreeFileSystem__create(fs_and_path$path, fs_and_path$fs) - ) + fs___SubTreeFileSystem__create(fs_and_path$path, fs_and_path$fs) } #' @export diff --git a/r/R/io.R b/r/R/io.R index b4dbbeb6a5f..5f015ce3b06 100644 --- a/r/R/io.R +++ b/r/R/io.R @@ -68,7 +68,7 @@ OutputStream <- R6Class("OutputStream", inherit = Writable, #' @export FileOutputStream <- R6Class("FileOutputStream", inherit = OutputStream) FileOutputStream$create <- function(path) { - shared_ptr(FileOutputStream, io___FileOutputStream__Open(clean_path_abs(path))) + io___FileOutputStream__Open(clean_path_abs(path)) } #' @usage NULL @@ -78,13 +78,13 @@ FileOutputStream$create <- function(path) { BufferOutputStream <- R6Class("BufferOutputStream", inherit = OutputStream, public = list( capacity = function() io___BufferOutputStream__capacity(self), - finish = function() shared_ptr(Buffer, io___BufferOutputStream__Finish(self)), + finish = function() io___BufferOutputStream__Finish(self), write = function(bytes) io___BufferOutputStream__Write(self, bytes), tell = function() io___BufferOutputStream__Tell(self) ) ) BufferOutputStream$create <- function(initial_capacity = 0L) { - shared_ptr(BufferOutputStream, io___BufferOutputStream__Create(initial_capacity)) + io___BufferOutputStream__Create(initial_capacity) } # InputStream ------------------------------------------------------------- @@ -92,7 +92,7 @@ BufferOutputStream$create <- function(initial_capacity = 0L) { Readable <- R6Class("Readable", inherit = ArrowObject, public = list( - Read = function(nbytes) shared_ptr(Buffer, io___Readable__Read(self, nbytes)) + Read = function(nbytes) io___Readable__Read(self, nbytes) ) ) @@ -148,9 +148,9 @@ RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream, Read = function(nbytes = NULL) { if (is.null(nbytes)) { - shared_ptr(Buffer, io___RandomAccessFile__Read0(self)) + io___RandomAccessFile__Read0(self) } else { - shared_ptr(Buffer, io___Readable__Read(self, nbytes)) + io___Readable__Read(self, nbytes) } }, @@ -158,7 +158,7 @@ RandomAccessFile <- R6Class("RandomAccessFile", inherit = InputStream, if (is.null(nbytes)) { nbytes <- self$GetSize() - position } - shared_ptr(Buffer, io___RandomAccessFile__ReadAt(self, position, nbytes)) + io___RandomAccessFile__ReadAt(self, position, nbytes) } ) ) @@ -179,7 +179,7 @@ MemoryMappedFile <- R6Class("MemoryMappedFile", inherit = RandomAccessFile, #' @export ReadableFile <- R6Class("ReadableFile", inherit = RandomAccessFile) ReadableFile$create <- function(path) { - shared_ptr(ReadableFile, io___ReadableFile__Open(clean_path_abs(path))) + io___ReadableFile__Open(clean_path_abs(path)) } #' @usage NULL @@ -189,7 +189,7 @@ ReadableFile$create <- function(path) { BufferReader <- R6Class("BufferReader", inherit = RandomAccessFile) BufferReader$create <- function(x) { x <- buffer(x) - shared_ptr(BufferReader, io___BufferReader__initialize(x)) + io___BufferReader__initialize(x) } #' Create a new read/write memory mapped file of a given size @@ -202,7 +202,7 @@ BufferReader$create <- function(x) { #' @export mmap_create <- function(path, size) { path <- clean_path_abs(path) - shared_ptr(MemoryMappedFile, io___MemoryMappedFile__Create(path, size)) + io___MemoryMappedFile__Create(path, size) } #' Open a memory mapped file @@ -214,7 +214,7 @@ mmap_create <- function(path, size) { mmap_open <- function(path, mode = c("read", "write", "readwrite")) { mode <- match(match.arg(mode), c("read", "write", "readwrite")) - 1L path <- clean_path_abs(path) - shared_ptr(MemoryMappedFile, io___MemoryMappedFile__Open(path, mode)) + io___MemoryMappedFile__Open(path, mode) } #' Handle a range of possible input sources diff --git a/r/R/json.R b/r/R/json.R index 1cc39fa42d0..7e1ac589dc3 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -64,7 +64,7 @@ read_json_arrow <- function(file, #' @export JsonTableReader <- R6Class("JsonTableReader", inherit = ArrowObject, public = list( - Read = function() shared_ptr(Table, json___TableReader__Read(self)) + Read = function() json___TableReader__Read(self) ) ) JsonTableReader$create <- function(file, @@ -72,10 +72,7 @@ JsonTableReader$create <- function(file, parse_options = JsonParseOptions$create(), ...) { assert_is(file, "InputStream") - shared_ptr( - JsonTableReader, - json___TableReader__Make(file, read_options, parse_options) - ) + json___TableReader__Make(file, read_options, parse_options) } #' @rdname CsvReadOptions @@ -85,7 +82,7 @@ JsonTableReader$create <- function(file, #' @export JsonReadOptions <- R6Class("JsonReadOptions", inherit = ArrowObject) JsonReadOptions$create <- function(use_threads = option_use_threads(), block_size = 1048576L) { - shared_ptr(JsonReadOptions, json___ReadOptions__initialize(use_threads, block_size)) + json___ReadOptions__initialize(use_threads, block_size) } #' @rdname CsvReadOptions @@ -95,5 +92,5 @@ JsonReadOptions$create <- function(use_threads = option_use_threads(), block_siz #' @export JsonParseOptions <- R6Class("JsonParseOptions", inherit = ArrowObject) JsonParseOptions$create <- function(newlines_in_values = FALSE) { - shared_ptr(JsonParseOptions, json___ParseOptions__initialize(newlines_in_values)) + json___ParseOptions__initialize(newlines_in_values) } diff --git a/r/R/list.R b/r/R/list.R index f91fc3b1fdb..7db8d9a2ff1 100644 --- a/r/R/list.R +++ b/r/R/list.R @@ -20,38 +20,38 @@ ListType <- R6Class("ListType", inherit = NestedType, active = list( - value_field = function() shared_ptr(Field, ListType__value_field(self)), - value_type = function() DataType$create(ListType__value_type(self)) + value_field = function() ListType__value_field(self), + value_type = function() ListType__value_type(self) ) ) #' @rdname data-type #' @export -list_of <- function(type) shared_ptr(ListType, list__(type)) +list_of <- function(type) list__(type) LargeListType <- R6Class("LargeListType", inherit = NestedType, active = list( - value_field = function() shared_ptr(Field, LargeListType__value_field(self)), - value_type = function() DataType$create(LargeListType__value_type(self)) + value_field = function() LargeListType__value_field(self), + value_type = function() LargeListType__value_type(self) ) ) #' @rdname data-type #' @export -large_list_of <- function(type) shared_ptr(LargeListType, large_list__(type)) +large_list_of <- function(type) large_list__(type) #' @rdname data-type #' @export FixedSizeListType <- R6Class("FixedSizeListType", inherit = NestedType, active = list( - value_field = function() shared_ptr(Field, FixedSizeListType__value_field(self)), - value_type = function() DataType$create(FixedSizeListType__value_type(self)), + value_field = function() FixedSizeListType__value_field(self), + value_type = function() FixedSizeListType__value_type(self), list_size = function() FixedSizeListType__list_size(self) ) ) #' @rdname data-type #' @export -fixed_size_list_of <- function(type, list_size) shared_ptr(LargeListType, fixed_size_list__(type, list_size)) +fixed_size_list_of <- function(type, list_size) fixed_size_list__(type, list_size) diff --git a/r/R/memory-pool.R b/r/R/memory-pool.R index dfd3a48cb72..547294bb3be 100644 --- a/r/R/memory-pool.R +++ b/r/R/memory-pool.R @@ -48,5 +48,5 @@ MemoryPool <- R6Class("MemoryPool", #' @export #' @keywords internal default_memory_pool <- function() { - shared_ptr(MemoryPool, MemoryPool__default()) + MemoryPool__default() } diff --git a/r/R/message.R b/r/R/message.R index 1cab45915e7..6a374a2b24f 100644 --- a/r/R/message.R +++ b/r/R/message.R @@ -39,8 +39,8 @@ Message <- R6Class("Message", inherit = ArrowObject, ), active = list( type = function() ipc___Message__type(self), - metadata = function() shared_ptr(Buffer, ipc___Message__metadata(self)), - body = function() shared_ptr(Buffer, ipc___Message__body(self)) + metadata = function() ipc___Message__metadata(self), + body = function() ipc___Message__body(self) ) ) @@ -59,7 +59,7 @@ Message <- R6Class("Message", inherit = ArrowObject, #' @export MessageReader <- R6Class("MessageReader", inherit = ArrowObject, public = list( - ReadNextMessage = function() shared_ptr(Message, ipc___MessageReader__ReadNextMessage(self)) + ReadNextMessage = function() ipc___MessageReader__ReadNextMessage(self) ) ) @@ -67,7 +67,7 @@ MessageReader$create <- function(stream) { if (!inherits(stream, "InputStream")) { stream <- BufferReader$create(stream) } - shared_ptr(MessageReader, ipc___MessageReader__Open(stream)) + ipc___MessageReader__Open(stream) } #' Read a Message from a stream @@ -86,7 +86,7 @@ read_message.default <- function(stream) { #' @export read_message.InputStream <- function(stream) { - shared_ptr(Message, ipc___ReadMessage(stream) ) + ipc___ReadMessage(stream) } #' @export diff --git a/r/R/parquet.R b/r/R/parquet.R index 1bc67427b48..722d1d27070 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -21,7 +21,7 @@ #' This function enables you to read Parquet files into R. #' #' @inheritParams read_feather -#' @param props [ParquetReaderProperties] +#' @param props [ParquetArrowReaderProperties] #' @param ... Additional arguments passed to `ParquetFileReader$create()` #' #' @return A [arrow::Table][Table], or a `data.frame` if `as_data_frame` is @@ -38,7 +38,7 @@ read_parquet <- function(file, col_select = NULL, as_data_frame = TRUE, - props = ParquetReaderProperties$create(), + props = ParquetArrowReaderProperties$create(), ...) { if (is.string(file)) { file <- make_readable_file(file) @@ -212,13 +212,10 @@ ParquetArrowWriterProperties$create <- function(use_deprecated_int96_timestamps c("ms" = TimeUnit$MILLI, "us" = TimeUnit$MICRO) ) } - shared_ptr( - ParquetArrowWriterProperties, - parquet___ArrowWriterProperties___create( - use_deprecated_int96_timestamps = isTRUE(use_deprecated_int96_timestamps), - timestamp_unit = timestamp_unit, - allow_truncated_timestamps = isTRUE(allow_truncated_timestamps) - ) + parquet___ArrowWriterProperties___create( + use_deprecated_int96_timestamps = isTRUE(use_deprecated_int96_timestamps), + timestamp_unit = timestamp_unit, + allow_truncated_timestamps = isTRUE(allow_truncated_timestamps) ) } @@ -348,10 +345,7 @@ ParquetWriterProperties$create <- function(table, write_statistics = NULL, data_page_size = NULL, ...) { - builder <- shared_ptr( - ParquetWriterPropertiesBuilder, - parquet___WriterProperties___Builder__create() - ) + builder <- parquet___WriterProperties___Builder__create() if (!is.null(version)) { builder$set_version(version) } @@ -370,7 +364,7 @@ ParquetWriterProperties$create <- function(table, if (!is.null(data_page_size)) { builder$set_data_page_size(data_page_size) } - shared_ptr(ParquetWriterProperties, parquet___WriterProperties___Builder__build(builder)) + parquet___WriterProperties___Builder__build(builder) } #' @title ParquetFileWriter class @@ -412,10 +406,7 @@ ParquetFileWriter$create <- function(schema, properties = ParquetWriterProperties$create(), arrow_properties = ParquetArrowWriterProperties$create()) { assert_is(sink, "OutputStream") - shared_ptr( - ParquetFileWriter, - parquet___arrow___ParquetFileWriter__Open(schema, sink, properties, arrow_properties) - ) + parquet___arrow___ParquetFileWriter__Open(schema, sink, properties, arrow_properties) } @@ -434,7 +425,7 @@ ParquetFileWriter$create <- function(schema, #' #' - `file` A character file name, raw vector, or Arrow file connection object #' (e.g. `RandomAccessFile`). -#' - `props` Optional [ParquetReaderProperties] +#' - `props` Optional [ParquetArrowReaderProperties] #' - `mmap` Logical: whether to memory-map the file (default `TRUE`) #' - `...` Additional arguments, currently ignored #' @@ -484,53 +475,53 @@ ParquetFileReader <- R6Class("ParquetFileReader", public = list( ReadTable = function(column_indices = NULL) { if (is.null(column_indices)) { - shared_ptr(Table, parquet___arrow___FileReader__ReadTable1(self)) + parquet___arrow___FileReader__ReadTable1(self) } else { column_indices <- vec_cast(column_indices, integer()) - shared_ptr(Table, parquet___arrow___FileReader__ReadTable2(self, column_indices)) + parquet___arrow___FileReader__ReadTable2(self, column_indices) } }, ReadRowGroup = function(i, column_indices = NULL) { i <- vec_cast(i, integer()) if (is.null(column_indices)) { - shared_ptr(Table, parquet___arrow___FileReader__ReadRowGroup1(self, i)) + parquet___arrow___FileReader__ReadRowGroup1(self, i) } else { column_indices <- vec_cast(column_indices, integer()) - shared_ptr(Table, parquet___arrow___FileReader__ReadRowGroup2(self, i, column_indices)) + parquet___arrow___FileReader__ReadRowGroup2(self, i, column_indices) } }, ReadRowGroups = function(row_groups, column_indices = NULL) { row_groups <- vec_cast(row_groups, integer()) if (is.null(column_indices)) { - shared_ptr(Table, parquet___arrow___FileReader__ReadRowGroups1(self, row_groups)) + parquet___arrow___FileReader__ReadRowGroups1(self, row_groups) } else { column_indices <- vec_cast(column_indices, integer()) - shared_ptr(Table, parquet___arrow___FileReader__ReadRowGroups2(self, row_groups, column_indices)) + parquet___arrow___FileReader__ReadRowGroups2(self, row_groups, column_indices) } }, ReadColumn = function(i) { i <- vec_cast(i, integer()) - shared_ptr(ChunkedArray, parquet___arrow___FileReader__ReadColumn(self, i)) + parquet___arrow___FileReader__ReadColumn(self, i) }, GetSchema = function() { - shared_ptr(Schema, parquet___arrow___FileReader__GetSchema(self)) + parquet___arrow___FileReader__GetSchema(self) } ) ) ParquetFileReader$create <- function(file, - props = ParquetReaderProperties$create(), + props = ParquetArrowReaderProperties$create(), mmap = TRUE, ...) { file <- make_readable_file(file, mmap) - assert_is(props, "ParquetReaderProperties") + assert_is(props, "ParquetArrowReaderProperties") - shared_ptr(ParquetFileReader, parquet___arrow___FileReader__OpenFile(file, props)) + parquet___arrow___FileReader__OpenFile(file, props) } -#' @title ParquetReaderProperties class -#' @rdname ParquetReaderProperties -#' @name ParquetReaderProperties +#' @title ParquetArrowReaderProperties class +#' @rdname ParquetArrowReaderProperties +#' @name ParquetArrowReaderProperties #' @docType class #' @usage NULL #' @format NULL @@ -539,7 +530,7 @@ ParquetFileReader$create <- function(file, #' #' @section Factory: #' -#' The `ParquetReaderProperties$create()` factory method instantiates the object +#' The `ParquetArrowReaderProperties$create()` factory method instantiates the object #' and takes the following arguments: #' #' - `use_threads` Logical: whether to use multithreading (default `TRUE`) @@ -551,7 +542,7 @@ ParquetFileReader$create <- function(file, #' - `$use_threads(use_threads)` #' #' @export -ParquetReaderProperties <- R6Class("ParquetReaderProperties", +ParquetArrowReaderProperties <- R6Class("ParquetArrowReaderProperties", inherit = ArrowObject, public = list( read_dictionary = function(column_index) { @@ -572,9 +563,6 @@ ParquetReaderProperties <- R6Class("ParquetReaderProperties", ) ) -ParquetReaderProperties$create <- function(use_threads = option_use_threads()) { - shared_ptr( - ParquetReaderProperties, - parquet___arrow___ArrowReaderProperties__Make(isTRUE(use_threads)) - ) +ParquetArrowReaderProperties$create <- function(use_threads = option_use_threads()) { + parquet___arrow___ArrowReaderProperties__Make(isTRUE(use_threads)) } diff --git a/r/R/python.R b/r/R/python.R index 7e29730a352..739f048b218 100644 --- a/r/R/python.R +++ b/r/R/python.R @@ -24,7 +24,7 @@ py_to_r.pyarrow.lib.Array <- function(x, ...) { }) x$`_export_to_c`(array_ptr, schema_ptr) - Array$create(ImportArray(array_ptr, schema_ptr)) + ImportArray(array_ptr, schema_ptr) } r_to_py.Array <- function(x, convert = FALSE) { @@ -53,7 +53,8 @@ py_to_r.pyarrow.lib.RecordBatch <- function(x, ...) { }) x$`_export_to_c`(array_ptr, schema_ptr) - shared_ptr(RecordBatch, ImportRecordBatch(array_ptr, schema_ptr)) + + ImportRecordBatch(array_ptr, schema_ptr) } r_to_py.RecordBatch <- function(x, convert = FALSE) { diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R index 85ce839d0ce..f80df74537d 100644 --- a/r/R/record-batch-reader.R +++ b/r/R/record-batch-reader.R @@ -95,11 +95,11 @@ RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject, public = list( read_next_batch = function() { - shared_ptr(RecordBatch, RecordBatchReader__ReadNext(self)) + RecordBatchReader__ReadNext(self) } ), active = list( - schema = function() shared_ptr(Schema, RecordBatchReader__schema(self)) + schema = function() RecordBatchReader__schema(self) ) ) @@ -109,8 +109,8 @@ RecordBatchReader <- R6Class("RecordBatchReader", inherit = ArrowObject, #' @export RecordBatchStreamReader <- R6Class("RecordBatchStreamReader", inherit = RecordBatchReader, public = list( - batches = function() map(ipc___RecordBatchStreamReader__batches(self), shared_ptr, class = RecordBatch), - read_table = function() shared_ptr(Table, Table__from_RecordBatchStreamReader(self)) + batches = function() ipc___RecordBatchStreamReader__batches(self), + read_table = function() Table__from_RecordBatchStreamReader(self) ) ) RecordBatchStreamReader$create <- function(stream) { @@ -120,7 +120,7 @@ RecordBatchStreamReader$create <- function(stream) { stream <- BufferReader$create(stream) } assert_is(stream, "InputStream") - shared_ptr(RecordBatchStreamReader, ipc___RecordBatchStreamReader__Open(stream)) + ipc___RecordBatchStreamReader__Open(stream) } #' @rdname RecordBatchReader @@ -131,16 +131,16 @@ RecordBatchFileReader <- R6Class("RecordBatchFileReader", inherit = ArrowObject, # Why doesn't this inherit from RecordBatchReader? public = list( get_batch = function(i) { - shared_ptr(RecordBatch, ipc___RecordBatchFileReader__ReadRecordBatch(self, i)) + ipc___RecordBatchFileReader__ReadRecordBatch(self, i) }, batches = function() { - map(ipc___RecordBatchFileReader__batches(self), shared_ptr, class = RecordBatch) + ipc___RecordBatchFileReader__batches(self) }, - read_table = function() shared_ptr(Table, Table__from_RecordBatchFileReader(self)) + read_table = function() Table__from_RecordBatchFileReader(self) ), active = list( num_record_batches = function() ipc___RecordBatchFileReader__num_record_batches(self), - schema = function() shared_ptr(Schema, ipc___RecordBatchFileReader__schema(self)) + schema = function() ipc___RecordBatchFileReader__schema(self) ) ) RecordBatchFileReader$create <- function(file) { @@ -150,5 +150,5 @@ RecordBatchFileReader$create <- function(file) { file <- BufferReader$create(file) } assert_is(file, "InputStream") - shared_ptr(RecordBatchFileReader, ipc___RecordBatchFileReader__Open(file)) + ipc___RecordBatchFileReader__Open(file) } diff --git a/r/R/record-batch-writer.R b/r/R/record-batch-writer.R index 8b51603110d..60e87c951dd 100644 --- a/r/R/record-batch-writer.R +++ b/r/R/record-batch-writer.R @@ -133,13 +133,11 @@ RecordBatchStreamWriter$create <- function(sink, assert_is(sink, "OutputStream") assert_is(schema, "Schema") - shared_ptr(RecordBatchStreamWriter, - ipc___RecordBatchStreamWriter__Open( - sink, - schema, - get_ipc_use_legacy_format(use_legacy_format), - get_ipc_metadata_version(metadata_version) - ) + ipc___RecordBatchStreamWriter__Open( + sink, + schema, + get_ipc_use_legacy_format(use_legacy_format), + get_ipc_metadata_version(metadata_version) ) } @@ -162,13 +160,11 @@ RecordBatchFileWriter$create <- function(sink, assert_is(sink, "OutputStream") assert_is(schema, "Schema") - shared_ptr(RecordBatchFileWriter, - ipc___RecordBatchFileWriter__Open( - sink, - schema, - get_ipc_use_legacy_format(use_legacy_format), - get_ipc_metadata_version(metadata_version) - ) + ipc___RecordBatchFileWriter__Open( + sink, + schema, + get_ipc_use_legacy_format(use_legacy_format), + get_ipc_metadata_version(metadata_version) ) } diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 1b5fcbba24e..c050ef806b8 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -72,7 +72,7 @@ #' @name RecordBatch RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, public = list( - column = function(i) shared_ptr(Array, RecordBatch__column(self, i)), + column = function(i) RecordBatch__column(self, i), column_name = function(i) RecordBatch__column_name(self, i), names = function() RecordBatch__names(self), Equals = function(other, check_metadata = FALSE, ...) { @@ -80,19 +80,19 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, }, GetColumnByName = function(name) { assert_that(is.string(name)) - shared_ptr(Array, RecordBatch__GetColumnByName(self, name)) + RecordBatch__GetColumnByName(self, name) }, SelectColumns = function(indices) { - shared_ptr(RecordBatch, RecordBatch__SelectColumns(self, indices)) + RecordBatch__SelectColumns(self, indices) }, RemoveColumn = function(i){ - shared_ptr(RecordBatch, RecordBatch__RemoveColumn(self, i)) + RecordBatch__RemoveColumn(self, i) }, Slice = function(offset, length = NULL) { if (is.null(length)) { - shared_ptr(RecordBatch, RecordBatch__Slice1(self, offset)) + RecordBatch__Slice1(self, offset) } else { - shared_ptr(RecordBatch, RecordBatch__Slice2(self, offset, length)) + RecordBatch__Slice2(self, offset, length) } }, Take = function(i) { @@ -103,14 +103,14 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, i <- Array$create(i) } assert_is(i, "Array") - shared_ptr(RecordBatch, call_function("take", self, i)) + call_function("take", self, i) }, Filter = function(i, keep_na = TRUE) { if (is.logical(i)) { i <- Array$create(i) } assert_that(is.Array(i, "bool")) - shared_ptr(RecordBatch, call_function("filter", self, i, options = list(keep_na = keep_na))) + call_function("filter", self, i, options = list(keep_na = keep_na)) }, serialize = function() ipc___SerializeRecordBatch__Raw(self), ToString = function() ToString_tabular(self), @@ -119,14 +119,20 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, assert_is(target_schema, "Schema") assert_is(options, "CastOptions") assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") - shared_ptr(RecordBatch, RecordBatch__cast(self, target_schema, options)) + + RecordBatch__cast(self, target_schema, options) + }, + + invalidate = function() { + .Call(`_arrow_RecordBatch__Reset`, self) + super$invalidate() } ), active = list( num_columns = function() RecordBatch__num_columns(self), num_rows = function() RecordBatch__num_rows(self), - schema = function() shared_ptr(Schema, RecordBatch__schema(self)), + schema = function() RecordBatch__schema(self), metadata = function(new) { if (missing(new)) { # Get the metadata (from the schema) @@ -137,11 +143,11 @@ RecordBatch <- R6Class("RecordBatch", inherit = ArrowObject, out <- RecordBatch__ReplaceSchemaMetadata(self, new) # ReplaceSchemaMetadata returns a new object but we're modifying in place, # so swap in that new C++ object pointer into our R6 object - self$set_pointer(out) + self$set_pointer(out$pointer()) self } }, - columns = function() map(RecordBatch__columns(self), shared_ptr, Array) + columns = function() RecordBatch__columns(self) ) ) @@ -156,8 +162,9 @@ RecordBatch$create <- function(..., schema = NULL) { names(arrays) <- rep_len("", length(arrays)) } stopifnot(length(arrays) > 0) + # TODO: should this also assert that they're all Arrays? - shared_ptr(RecordBatch, RecordBatch__from_arrays(schema, arrays)) + RecordBatch__from_arrays(schema, arrays) } RecordBatch$from_message <- function(obj, schema) { @@ -168,9 +175,9 @@ RecordBatch$from_message <- function(obj, schema) { on.exit(obj$close()) } if (inherits(obj, "InputStream")) { - shared_ptr(RecordBatch, ipc___ReadRecordBatch__InputStream__Schema(obj, schema)) + ipc___ReadRecordBatch__InputStream__Schema(obj, schema) } else { - shared_ptr(RecordBatch, ipc___ReadRecordBatch__Message__Schema(obj, schema)) + ipc___ReadRecordBatch__Message__Schema(obj, schema) } } diff --git a/r/R/scalar.R b/r/R/scalar.R index f387fb13a48..12f29990e0a 100644 --- a/r/R/scalar.R +++ b/r/R/scalar.R @@ -31,37 +31,26 @@ Scalar <- R6Class("Scalar", inherit = ArrowObject, # TODO: document the methods public = list( - ..dispatch = function() { - type_id <- self$type$id - if (type_id == Type$STRUCT) { - shared_ptr(StructScalar, self$pointer()) - } else { - self - } - }, ToString = function() Scalar__ToString(self), cast = function(target_type) { - Scalar$create(Scalar__CastTo(self, as_type(target_type))) + Scalar__CastTo(self, as_type(target_type)) }, as_vector = function() Scalar__as_vector(self) ), active = list( is_valid = function() Scalar__is_valid(self), null_count = function() sum(!self$is_valid), - type = function() DataType$create(Scalar__type(self)) + type = function() Scalar__type(self) ) ) Scalar$create <- function(x, type = NULL) { - if (!inherits(x, "externalptr")) { - if (is.null(x)) { - x <- vctrs::unspecified(1) - } else if (length(x) != 1 && !is.data.frame(x)) { - # Wrap in a list type - x <- list(x) - } - x <- Array__GetScalar(Array$create(x, type = type), 0) + if (is.null(x)) { + x <- vctrs::unspecified(1) + } else if (length(x) != 1 && !is.data.frame(x)) { + # Wrap in a list type + x <- list(x) } - shared_ptr(Scalar, x)$..dispatch() + Array__GetScalar(Array$create(x, type = type), 0) } #' @rdname array @@ -71,8 +60,8 @@ Scalar$create <- function(x, type = NULL) { StructScalar <- R6Class("StructScalar", inherit = Scalar, public = list( - field = function(i) Scalar$create(StructScalar__field(self, i)), - GetFieldByName = function(name) Scalar$create(StructScalar__GetFieldByName(self, name)) + field = function(i) StructScalar__field(self, i), + GetFieldByName = function(name) StructScalar__GetFieldByName(self, name) ) ) diff --git a/r/R/schema.R b/r/R/schema.R index 4bba8b87f47..9a0ad85acac 100644 --- a/r/R/schema.R +++ b/r/R/schema.R @@ -71,12 +71,12 @@ Schema <- R6Class("Schema", } fields }, - field = function(i) shared_ptr(Field, Schema__field(self, i)), - GetFieldByName = function(x) shared_ptr(Field, Schema__GetFieldByName(self, x)), + field = function(i) Schema__field(self, i), + GetFieldByName = function(x) Schema__GetFieldByName(self, x), serialize = function() Schema__serialize(self), WithMetadata = function(metadata = NULL) { metadata <- prepare_key_value_metadata(metadata) - shared_ptr(Schema, Schema__WithMetadata(self, metadata)) + Schema__WithMetadata(self, metadata) }, Equals = function(other, check_metadata = FALSE, ...) { inherits(other, "Schema") && Schema__Equals(self, other, isTRUE(check_metadata)) @@ -87,7 +87,7 @@ Schema <- R6Class("Schema", Schema__field_names(self) }, num_fields = function() Schema__num_fields(self), - fields = function() map(Schema__fields(self), shared_ptr, class = Field), + fields = function() Schema__fields(self), HasMetadata = function() Schema__HasMetadata(self), metadata = function(new_metadata) { if (missing(new_metadata)) { @@ -103,7 +103,7 @@ Schema <- R6Class("Schema", } ) ) -Schema$create <- function(...) shared_ptr(Schema, schema_(.fields(list2(...)))) +Schema$create <- function(...) schema_(.fields(list2(...))) prepare_key_value_metadata <- function(metadata) { # key-value-metadata must be a named character vector; @@ -169,7 +169,7 @@ length.Schema <- function(x) x$num_fields call. = FALSE ) } - shared_ptr(Schema, schema_(fields)) + schema_(fields) } #' @export @@ -193,13 +193,13 @@ as.list.Schema <- function(x, ...) x$fields #' @export read_schema <- function(stream, ...) { if (inherits(stream, "Message")) { - return(shared_ptr(Schema, ipc___ReadSchema_Message(stream))) + return(ipc___ReadSchema_Message(stream)) } else { if (!inherits(stream, "InputStream")) { stream <- BufferReader$create(stream) on.exit(stream$close()) } - return(shared_ptr(Schema, ipc___ReadSchema_InputStream(stream))) + return(ipc___ReadSchema_InputStream(stream)) } } @@ -216,7 +216,7 @@ read_schema <- function(stream, ...) { #' unify_schemas(a, z) #' } unify_schemas <- function(..., schemas = list(...)) { - shared_ptr(Schema, arrow__UnifySchemas(schemas)) + arrow__UnifySchemas(schemas) } #' @export diff --git a/r/R/struct.R b/r/R/struct.R index feda966f56d..7cfa7c9326c 100644 --- a/r/R/struct.R +++ b/r/R/struct.R @@ -20,11 +20,11 @@ StructType <- R6Class("StructType", inherit = NestedType, public = list( - GetFieldByName = function(name) shared_ptr(Field, StructType__GetFieldByName(self, name)), + GetFieldByName = function(name) StructType__GetFieldByName(self, name), GetFieldIndex = function(name) StructType__GetFieldIndex(self, name) ) ) -StructType$create <- function(...) shared_ptr(StructType, struct__(.fields(list(...)))) +StructType$create <- function(...) struct__(.fields(list(...))) #' @rdname data-type #' @export diff --git a/r/R/table.R b/r/R/table.R index 6c87a3c3318..bc85483be4e 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -93,15 +93,15 @@ Table <- R6Class("Table", inherit = ArrowObject, public = list( column = function(i) { - shared_ptr(ChunkedArray, Table__column(self, i)) + Table__column(self, i) }, ColumnNames = function() Table__ColumnNames(self), GetColumnByName = function(name) { assert_is(name, "character") assert_that(length(name) == 1) - shared_ptr(ChunkedArray, Table__GetColumnByName(self, name)) + Table__GetColumnByName(self, name) }, - field = function(i) shared_ptr(Field, Table__field(self, i)), + field = function(i) Table__field(self, i), serialize = function(output_stream, ...) write_table(self, output_stream, ...), ToString = function() ToString_tabular(self), @@ -110,18 +110,18 @@ Table <- R6Class("Table", inherit = ArrowObject, assert_is(target_schema, "Schema") assert_is(options, "CastOptions") assert_that(identical(self$schema$names, target_schema$names), msg = "incompatible schemas") - shared_ptr(Table, Table__cast(self, target_schema, options)) + Table__cast(self, target_schema, options) }, SelectColumns = function(indices) { - shared_ptr(Table, Table__SelectColumns(self, indices)) + Table__SelectColumns(self, indices) }, Slice = function(offset, length = NULL) { if (is.null(length)) { - shared_ptr(Table, Table__Slice1(self, offset)) + Table__Slice1(self, offset) } else { - shared_ptr(Table, Table__Slice2(self, offset, length)) + Table__Slice2(self, offset, length) } }, Take = function(i) { @@ -131,13 +131,13 @@ Table <- R6Class("Table", inherit = ArrowObject, if (is.integer(i)) { i <- Array$create(i) } - shared_ptr(Table, call_function("take", self, i)) + call_function("take", self, i) }, Filter = function(i, keep_na = TRUE) { if (is.logical(i)) { i <- Array$create(i) } - shared_ptr(Table, call_function("filter", self, i, options = list(keep_na = keep_na))) + call_function("filter", self, i, options = list(keep_na = keep_na)) }, Equals = function(other, check_metadata = FALSE, ...) { @@ -150,13 +150,19 @@ Table <- R6Class("Table", inherit = ArrowObject, ValidateFull = function() { Table__ValidateFull(self) + }, + + invalidate = function() { + .Call(`_arrow_Table__Reset`, self) + super$invalidate() } + ), active = list( num_columns = function() Table__num_columns(self), num_rows = function() Table__num_rows(self), - schema = function() shared_ptr(Schema, Table__schema(self)), + schema = function() Table__schema(self), metadata = function(new) { if (missing(new)) { # Get the metadata (from the schema) @@ -167,11 +173,11 @@ Table <- R6Class("Table", inherit = ArrowObject, out <- Table__ReplaceSchemaMetadata(self, new) # ReplaceSchemaMetadata returns a new object but we're modifying in place, # so swap in that new C++ object pointer into our R6 object - self$set_pointer(out) + self$set_pointer(out$pointer()) self } }, - columns = function() map(Table__columns(self), shared_ptr, class = ChunkedArray) + columns = function() Table__columns(self) ) ) @@ -218,9 +224,9 @@ Table$create <- function(..., schema = NULL) { } stopifnot(length(dots) > 0) if (all_record_batches(dots)) { - shared_ptr(Table, Table__from_record_batches(dots, schema)) + Table__from_record_batches(dots, schema) } else { - shared_ptr(Table, Table__from_dots(dots, schema)) + Table__from_dots(dots, schema) } } diff --git a/r/R/type.R b/r/R/type.R index 921786bfd21..b21ead8e5ed 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -37,51 +37,11 @@ DataType <- R6Class("DataType", Equals = function(other, ...) { inherits(other, "DataType") && DataType__Equals(self, other) }, - num_children = function() { - DataType__num_children(self) + num_fields = function() { + DataType__num_fields(self) }, - children = function() { - map(DataType__children_pointer(self), shared_ptr, class = Field) - }, - - ..dispatch = function() { - switch(names(Type)[self$id + 1], - "NA" = null(), - BOOL = boolean(), - UINT8 = uint8(), - INT8 = int8(), - UINT16 = uint16(), - INT16 = int16(), - UINT32 = uint32(), - INT32 = int32(), - UINT64 = uint64(), - INT64 = int64(), - HALF_FLOAT = float16(), - FLOAT = float32(), - DOUBLE = float64(), - STRING = utf8(), - BINARY = binary(), - FIXED_SIZE_BINARY = shared_ptr(FixedSizeBinary, self$pointer()), - DATE32 = date32(), - DATE64 = date64(), - TIMESTAMP = shared_ptr(Timestamp, self$pointer()), - TIME32 = shared_ptr(Time32, self$pointer()), - TIME64 = shared_ptr(Time64, self$pointer()), - INTERVAL = stop("Type INTERVAL not implemented yet"), - DECIMAL = shared_ptr(Decimal128Type, self$pointer()), - LIST = shared_ptr(ListType, self$pointer()), - STRUCT = shared_ptr(StructType, self$pointer()), - SPARSE_UNION = stop("Type SPARSE_UNION not implemented yet"), - DENSE_UNION = stop("Type DENSE_UNION not implemented yet"), - DICTIONARY = shared_ptr(DictionaryType, self$pointer()), - MAP = stop("Type MAP not implemented yet"), - EXTENSION = stop("Type EXTENSION not implemented yet"), - FIXED_SIZE_LIST = shared_ptr(FixedSizeListType, self$pointer()), - DURATION = stop("Type DURATION not implemented yet"), - LARGE_STRING = large_utf8(), - LARGE_BINARY = large_binary(), - LARGE_LIST = shared_ptr(LargeListType, self$pointer()) - ) + fields = function() { + DataType__fields(self) } ), @@ -91,8 +51,6 @@ DataType <- R6Class("DataType", ) ) -DataType$create <- function(xp) shared_ptr(DataType, xp)$..dispatch() - INTEGER_TYPES <- as.character(outer(c("uint", "int"), c(8, 16, 32, 64), paste0)) FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double") @@ -105,7 +63,7 @@ FLOAT_TYPES <- c("float16", "float32", "float64", "halffloat", "float", "double" type <- function(x) UseMethod("type") #' @export -type.default <- function(x) DataType$create(Array__infer_type(x)) +type.default <- function(x) Array__infer_type(x) #' @export type.Array <- function(x) x$type @@ -232,39 +190,39 @@ NestedType <- R6Class("NestedType", inherit = DataType) #' timestamp("ms", timezone = "CEST") #' time64("ns") #' } -int8 <- function() shared_ptr(Int8, Int8__initialize()) +int8 <- function() Int8__initialize() #' @rdname data-type #' @export -int16 <- function() shared_ptr(Int16, Int16__initialize()) +int16 <- function() Int16__initialize() #' @rdname data-type #' @export -int32 <- function() shared_ptr(Int32, Int32__initialize()) +int32 <- function() Int32__initialize() #' @rdname data-type #' @export -int64 <- function() shared_ptr(Int64, Int64__initialize()) +int64 <- function() Int64__initialize() #' @rdname data-type #' @export -uint8 <- function() shared_ptr(UInt8, UInt8__initialize()) +uint8 <- function() UInt8__initialize() #' @rdname data-type #' @export -uint16 <- function() shared_ptr(UInt16, UInt16__initialize()) +uint16 <- function() UInt16__initialize() #' @rdname data-type #' @export -uint32 <- function() shared_ptr(UInt32, UInt32__initialize()) +uint32 <- function() UInt32__initialize() #' @rdname data-type #' @export -uint64 <- function() shared_ptr(UInt64, UInt64__initialize()) +uint64 <- function() UInt64__initialize() #' @rdname data-type #' @export -float16 <- function() shared_ptr(Float16, Float16__initialize()) +float16 <- function() Float16__initialize() #' @rdname data-type #' @export @@ -272,7 +230,7 @@ halffloat <- float16 #' @rdname data-type #' @export -float32 <- function() shared_ptr(Float32, Float32__initialize()) +float32 <- function() Float32__initialize() #' @rdname data-type #' @export @@ -280,11 +238,11 @@ float <- float32 #' @rdname data-type #' @export -float64 <- function() shared_ptr(Float64, Float64__initialize()) +float64 <- function() Float64__initialize() #' @rdname data-type #' @export -boolean <- function() shared_ptr(Boolean, Boolean__initialize()) +boolean <- function() Boolean__initialize() #' @rdname data-type #' @export @@ -292,29 +250,23 @@ bool <- boolean #' @rdname data-type #' @export -utf8 <- function() shared_ptr(Utf8, Utf8__initialize()) +utf8 <- function() Utf8__initialize() #' @rdname data-type #' @export -large_utf8 <- function() shared_ptr(LargeUtf8, LargeUtf8__initialize()) +large_utf8 <- function() LargeUtf8__initialize() #' @rdname data-type #' @export -binary <- function() { - shared_ptr(Binary, Binary__initialize()) -} +binary <- function() Binary__initialize() #' @rdname data-type #' @export -large_binary <- function() { - shared_ptr(LargeBinary, LargeBinary__initialize()) -} +large_binary <- function() LargeBinary__initialize() #' @rdname data-type #' @export -fixed_size_binary <- function(byte_width) { - shared_ptr(FixedSizeBinary, FixedSizeBinary__initialize(byte_width)) -} +fixed_size_binary <- function(byte_width) FixedSizeBinary__initialize(byte_width) #' @rdname data-type #' @export @@ -322,11 +274,11 @@ string <- utf8 #' @rdname data-type #' @export -date32 <- function() shared_ptr(Date32, Date32__initialize()) +date32 <- function() Date32__initialize() #' @rdname data-type #' @export -date64 <- function() shared_ptr(Date64, Date64__initialize()) +date64 <- function() Date64__initialize() #' @rdname data-type #' @export @@ -335,7 +287,7 @@ time32 <- function(unit = c("ms", "s")) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, valid_time32_units) - shared_ptr(Time32, Time32__initialize(unit)) + Time32__initialize(unit) } valid_time32_units <- c( @@ -371,12 +323,12 @@ time64 <- function(unit = c("ns", "us")) { unit <- match.arg(unit) } unit <- make_valid_time_unit(unit, valid_time64_units) - shared_ptr(Time64, Time64__initialize(unit)) + Time64__initialize(unit) } #' @rdname data-type #' @export -null <- function() shared_ptr(Null, Null__initialize()) +null <- function() Null__initialize() #' @rdname data-type #' @export @@ -386,7 +338,7 @@ timestamp <- function(unit = c("s", "ms", "us", "ns"), timezone = "") { } unit <- make_valid_time_unit(unit, c(valid_time64_units, valid_time32_units)) assert_that(is.string(timezone)) - shared_ptr(Timestamp, Timestamp__initialize(unit, timezone)) + Timestamp__initialize(unit, timezone) } #' @rdname data-type @@ -402,7 +354,7 @@ decimal <- function(precision, scale) { } else { stop('"scale" must be an integer', call. = FALSE) } - shared_ptr(Decimal128Type, Decimal128Type__initialize(precision, scale)) + Decimal128Type__initialize(precision, scale) } as_type <- function(type, name = "type") { diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R index ede4043c46f..80660b30d06 100644 --- a/r/data-raw/codegen.R +++ b/r/data-raw/codegen.R @@ -71,6 +71,11 @@ wrap_call <- function(name, return_type, args) { all_decorations <- cpp_decorations() arrow_exports <- get_exported_functions(all_decorations, c("arrow", "s3")) +arrow_classes <- c( + "Table" = "arrow::Table", + "RecordBatch" = "arrow::RecordBatch" +) + cpp_functions_definitions <- arrow_exports %>% select(name, return_type, args, file, line, decoration) %>% pmap_chr(function(name, return_type, args, file, line, decoration){ @@ -106,14 +111,39 @@ cpp_functions_registration <- arrow_exports %>% }) %>% glue_collapse(sep = "\n") +cpp_classes_finalizers <- map2(names(arrow_classes), arrow_classes, function(name, class) { + glue::glue(' + # if defined(ARROW_R_WITH_ARROW) + extern "C" SEXP _arrow_{name}__Reset(SEXP r6) {{ + BEGIN_CPP11 + arrow::r::r6_reset_pointer<{class}>(r6); + END_CPP11 + return R_NilValue; + }} + # else + extern "C" SEXP _arrow_{name}__Reset(SEXP r6) {{ + Rf_error("Cannot call _arrow_{name}__Reset(). Please use arrow::install_arrow() to install required runtime libraries. "); + }} + # endif + ') +}) %>% + glue_collapse(sep = "\n") + +classes_finalizers_registration <- map2(names(arrow_classes), arrow_classes, function(name, class) { + glue('\t\t{{ "_arrow_{name}__Reset", (DL_FUNC) &_arrow_{name}__Reset, 1}}, ') + }) %>% + glue_collapse(sep = "\n") + + writeLines(con = "src/arrowExports.cpp", glue::glue(' // Generated by using data-raw/codegen.R -> do not edit by hand #include #include -#include "./arrow_exports.h" +#include "./arrow_types.h" {cpp_functions_definitions} +{cpp_classes_finalizers} extern "C" SEXP _arrow_available() {{ return Rf_ScalarLogical( @@ -139,6 +169,7 @@ static const R_CallMethodDef CallEntries[] = {{ \t\t{{ "_arrow_available", (DL_FUNC)& _arrow_available, 0 }}, \t\t{{ "_s3_available", (DL_FUNC)& _s3_available, 0 }}, {cpp_functions_registration} +{classes_finalizers_registration} \t\t{{NULL, NULL, 0}} }}; diff --git a/r/man/ParquetReaderProperties.Rd b/r/man/ParquetArrowReaderProperties.Rd similarity index 72% rename from r/man/ParquetReaderProperties.Rd rename to r/man/ParquetArrowReaderProperties.Rd index 5cf777039e6..33a50f71266 100644 --- a/r/man/ParquetReaderProperties.Rd +++ b/r/man/ParquetArrowReaderProperties.Rd @@ -1,9 +1,9 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/parquet.R \docType{class} -\name{ParquetReaderProperties} -\alias{ParquetReaderProperties} -\title{ParquetReaderProperties class} +\name{ParquetArrowReaderProperties} +\alias{ParquetArrowReaderProperties} +\title{ParquetArrowReaderProperties class} \description{ This class holds settings to control how a Parquet file is read by \link{ParquetFileReader}. @@ -11,7 +11,7 @@ by \link{ParquetFileReader}. \section{Factory}{ -The \code{ParquetReaderProperties$create()} factory method instantiates the object +The \code{ParquetArrowReaderProperties$create()} factory method instantiates the object and takes the following arguments: \itemize{ \item \code{use_threads} Logical: whether to use multithreading (default \code{TRUE}) diff --git a/r/man/ParquetFileReader.Rd b/r/man/ParquetFileReader.Rd index 828ba696e28..13682a7ee35 100644 --- a/r/man/ParquetFileReader.Rd +++ b/r/man/ParquetFileReader.Rd @@ -15,7 +15,7 @@ takes the following arguments: \itemize{ \item \code{file} A character file name, raw vector, or Arrow file connection object (e.g. \code{RandomAccessFile}). -\item \code{props} Optional \link{ParquetReaderProperties} +\item \code{props} Optional \link{ParquetArrowReaderProperties} \item \code{mmap} Logical: whether to memory-map the file (default \code{TRUE}) \item \code{...} Additional arguments, currently ignored } diff --git a/r/man/read_parquet.Rd b/r/man/read_parquet.Rd index ec36734e599..4e9b526b369 100644 --- a/r/man/read_parquet.Rd +++ b/r/man/read_parquet.Rd @@ -8,7 +8,7 @@ read_parquet( file, col_select = NULL, as_data_frame = TRUE, - props = ParquetReaderProperties$create(), + props = ParquetArrowReaderProperties$create(), ... ) } @@ -27,7 +27,7 @@ of columns, as used in \code{dplyr::select()}.} \item{as_data_frame}{Should the function return a \code{data.frame} (default) or an Arrow \link{Table}?} -\item{props}{\link{ParquetReaderProperties}} +\item{props}{\link{ParquetArrowReaderProperties}} \item{...}{Additional arguments passed to \code{ParquetFileReader$create()}} } diff --git a/r/src/array.cpp b/r/src/array.cpp index 844ac2a704d..e96e286a073 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -22,6 +22,29 @@ #include #include +namespace cpp11 { + +const char* r6_class_name::get(const std::shared_ptr& array) { + auto type = array->type_id(); + switch (type) { + case arrow::Type::DICTIONARY: + return "DictionaryArray"; + case arrow::Type::STRUCT: + return "StructArray"; + case arrow::Type::LIST: + return "ListArray"; + case arrow::Type::LARGE_LIST: + return "LargeListArray"; + case arrow::Type::FIXED_SIZE_LIST: + return "FixedSizeListArray"; + + default: + return "Array"; + } +} + +} // namespace cpp11 + void arrow::r::validate_slice_offset(R_xlen_t offset, int64_t len) { if (offset == NA_INTEGER) { cpp11::stop("Slice 'offset' cannot be NA"); @@ -176,9 +199,8 @@ std::shared_ptr StructArray__GetFieldByName( } // [[arrow::export]] -arrow::ArrayVector StructArray__Flatten( - const std::shared_ptr& array) { - return ValueOrStop(array->Flatten()); +cpp11::list StructArray__Flatten(const std::shared_ptr& array) { + return arrow::r::to_r_list(ValueOrStop(array->Flatten())); } // [[arrow::export]] diff --git a/r/src/arraydata.cpp b/r/src/arraydata.cpp index a8ee60f36a0..179532a6437 100644 --- a/r/src/arraydata.cpp +++ b/r/src/arraydata.cpp @@ -43,7 +43,7 @@ int ArrayData__get_offset(const std::shared_ptr& x) { // [[arrow::export]] cpp11::list ArrayData__buffers(const std::shared_ptr& x) { - return cpp11::as_sexp(x->buffers); + return arrow::r::to_r_list(x->buffers); } #endif diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index cadfc8c0745..81e426ef656 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -2,7 +2,7 @@ #include #include -#include "./arrow_exports.h" +#include "./arrow_types.h" // array.cpp #if defined(ARROW_R_WITH_ARROW) @@ -321,7 +321,7 @@ extern "C" SEXP _arrow_StructArray__GetFieldByName(SEXP array_sexp, SEXP name_se // array.cpp #if defined(ARROW_R_WITH_ARROW) -arrow::ArrayVector StructArray__Flatten(const std::shared_ptr& array); +cpp11::list StructArray__Flatten(const std::shared_ptr& array); extern "C" SEXP _arrow_StructArray__Flatten(SEXP array_sexp){ BEGIN_CPP11 arrow::r::Input&>::type array(array_sexp); @@ -1410,7 +1410,7 @@ extern "C" SEXP _arrow_dataset___Dataset__ReplaceSchema(SEXP dataset_sexp, SEXP // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr dataset___UnionDataset__create(const ds::DatasetVector& datasets, const std::shared_ptr& schm); +std::shared_ptr dataset___UnionDataset__create(const ds::DatasetVector& datasets, const std::shared_ptr& schm); extern "C" SEXP _arrow_dataset___UnionDataset__create(SEXP datasets_sexp, SEXP schm_sexp){ BEGIN_CPP11 arrow::r::Input::type datasets(datasets_sexp); @@ -1426,7 +1426,7 @@ extern "C" SEXP _arrow_dataset___UnionDataset__create(SEXP datasets_sexp, SEXP s // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr dataset___InMemoryDataset__create(const std::shared_ptr& table); +std::shared_ptr dataset___InMemoryDataset__create(const std::shared_ptr& table); extern "C" SEXP _arrow_dataset___InMemoryDataset__create(SEXP table_sexp){ BEGIN_CPP11 arrow::r::Input&>::type table(table_sexp); @@ -1441,7 +1441,7 @@ extern "C" SEXP _arrow_dataset___InMemoryDataset__create(SEXP table_sexp){ // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -ds::DatasetVector dataset___UnionDataset__children(const std::shared_ptr& ds); +cpp11::list dataset___UnionDataset__children(const std::shared_ptr& ds); extern "C" SEXP _arrow_dataset___UnionDataset__children(SEXP ds_sexp){ BEGIN_CPP11 arrow::r::Input&>::type ds(ds_sexp); @@ -1564,7 +1564,7 @@ extern "C" SEXP _arrow_dataset___UnionDatasetFactory__Make(SEXP children_sexp){ // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr dataset___FileSystemDatasetFactory__Make2(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format, const std::shared_ptr& partitioning); +std::shared_ptr dataset___FileSystemDatasetFactory__Make2(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format, const std::shared_ptr& partitioning); extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make2(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp, SEXP partitioning_sexp){ BEGIN_CPP11 arrow::r::Input&>::type fs(fs_sexp); @@ -1582,7 +1582,7 @@ extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make2(SEXP fs_sexp, S // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr dataset___FileSystemDatasetFactory__Make1(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format); +std::shared_ptr dataset___FileSystemDatasetFactory__Make1(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format); extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make1(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp){ BEGIN_CPP11 arrow::r::Input&>::type fs(fs_sexp); @@ -1599,7 +1599,7 @@ extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make1(SEXP fs_sexp, S // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr dataset___FileSystemDatasetFactory__Make3(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format, const std::shared_ptr& factory); +std::shared_ptr dataset___FileSystemDatasetFactory__Make3(const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format, const std::shared_ptr& factory); extern "C" SEXP _arrow_dataset___FileSystemDatasetFactory__Make3(SEXP fs_sexp, SEXP selector_sexp, SEXP format_sexp, SEXP factory_sexp){ BEGIN_CPP11 arrow::r::Input&>::type fs(fs_sexp); @@ -1763,7 +1763,7 @@ extern "C" SEXP _arrow_dataset___CsvFileFormat__Make(SEXP parse_options_sexp){ // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr dataset___DirectoryPartitioning(const std::shared_ptr& schm); +std::shared_ptr dataset___DirectoryPartitioning(const std::shared_ptr& schm); extern "C" SEXP _arrow_dataset___DirectoryPartitioning(SEXP schm_sexp){ BEGIN_CPP11 arrow::r::Input&>::type schm(schm_sexp); @@ -1793,7 +1793,7 @@ extern "C" SEXP _arrow_dataset___DirectoryPartitioning__MakeFactory(SEXP field_n // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr dataset___HivePartitioning(const std::shared_ptr& schm); +std::shared_ptr dataset___HivePartitioning(const std::shared_ptr& schm); extern "C" SEXP _arrow_dataset___HivePartitioning(SEXP schm_sexp){ BEGIN_CPP11 arrow::r::Input&>::type schm(schm_sexp); @@ -1951,7 +1951,7 @@ extern "C" SEXP _arrow_dataset___Scanner__head(SEXP scanner_sexp, SEXP n_sexp){ // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::vector> dataset___Scanner__Scan(const std::shared_ptr& scanner); +cpp11::list dataset___Scanner__Scan(const std::shared_ptr& scanner); extern "C" SEXP _arrow_dataset___Scanner__Scan(SEXP scanner_sexp){ BEGIN_CPP11 arrow::r::Input&>::type scanner(scanner_sexp); @@ -1981,7 +1981,7 @@ extern "C" SEXP _arrow_dataset___Scanner__schema(SEXP sc_sexp){ // dataset.cpp #if defined(ARROW_R_WITH_ARROW) -std::vector> dataset___ScanTask__get_batches(const std::shared_ptr& scan_task); +cpp11::list dataset___ScanTask__get_batches(const std::shared_ptr& scan_task); extern "C" SEXP _arrow_dataset___ScanTask__get_batches(SEXP scan_task_sexp){ BEGIN_CPP11 arrow::r::Input&>::type scan_task(scan_task_sexp); @@ -2015,36 +2015,6 @@ extern "C" SEXP _arrow_dataset___Dataset__Write(SEXP file_write_options_sexp, SE } #endif -// datatype.cpp -#if defined(ARROW_R_WITH_ARROW) -bool shared_ptr_is_null(SEXP xp); -extern "C" SEXP _arrow_shared_ptr_is_null(SEXP xp_sexp){ -BEGIN_CPP11 - arrow::r::Input::type xp(xp_sexp); - return cpp11::as_sexp(shared_ptr_is_null(xp)); -END_CPP11 -} -#else -extern "C" SEXP _arrow_shared_ptr_is_null(SEXP xp_sexp){ - Rf_error("Cannot call shared_ptr_is_null(). Please use arrow::install_arrow() to install required runtime libraries. "); -} -#endif - -// datatype.cpp -#if defined(ARROW_R_WITH_ARROW) -bool unique_ptr_is_null(SEXP xp); -extern "C" SEXP _arrow_unique_ptr_is_null(SEXP xp_sexp){ -BEGIN_CPP11 - arrow::r::Input::type xp(xp_sexp); - return cpp11::as_sexp(unique_ptr_is_null(xp)); -END_CPP11 -} -#else -extern "C" SEXP _arrow_unique_ptr_is_null(SEXP xp_sexp){ - Rf_error("Cannot call unique_ptr_is_null(). Please use arrow::install_arrow() to install required runtime libraries. "); -} -#endif - // datatype.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Int8__initialize(); @@ -2390,7 +2360,7 @@ extern "C" SEXP _arrow_Time64__initialize(SEXP unit_sexp){ // datatype.cpp #if defined(ARROW_R_WITH_ARROW) -SEXP list__(SEXP x); +std::shared_ptr list__(SEXP x); extern "C" SEXP _arrow_list__(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input::type x(x_sexp); @@ -2405,7 +2375,7 @@ extern "C" SEXP _arrow_list__(SEXP x_sexp){ // datatype.cpp #if defined(ARROW_R_WITH_ARROW) -SEXP large_list__(SEXP x); +std::shared_ptr large_list__(SEXP x); extern "C" SEXP _arrow_large_list__(SEXP x_sexp){ BEGIN_CPP11 arrow::r::Input::type x(x_sexp); @@ -2420,7 +2390,7 @@ extern "C" SEXP _arrow_large_list__(SEXP x_sexp){ // datatype.cpp #if defined(ARROW_R_WITH_ARROW) -SEXP fixed_size_list__(SEXP x, int list_size); +std::shared_ptr fixed_size_list__(SEXP x, int list_size); extern "C" SEXP _arrow_fixed_size_list__(SEXP x_sexp, SEXP list_size_sexp){ BEGIN_CPP11 arrow::r::Input::type x(x_sexp); @@ -2497,31 +2467,31 @@ extern "C" SEXP _arrow_DataType__Equals(SEXP lhs_sexp, SEXP rhs_sexp){ // datatype.cpp #if defined(ARROW_R_WITH_ARROW) -int DataType__num_children(const std::shared_ptr& type); -extern "C" SEXP _arrow_DataType__num_children(SEXP type_sexp){ +int DataType__num_fields(const std::shared_ptr& type); +extern "C" SEXP _arrow_DataType__num_fields(SEXP type_sexp){ BEGIN_CPP11 arrow::r::Input&>::type type(type_sexp); - return cpp11::as_sexp(DataType__num_children(type)); + return cpp11::as_sexp(DataType__num_fields(type)); END_CPP11 } #else -extern "C" SEXP _arrow_DataType__num_children(SEXP type_sexp){ - Rf_error("Cannot call DataType__num_children(). Please use arrow::install_arrow() to install required runtime libraries. "); +extern "C" SEXP _arrow_DataType__num_fields(SEXP type_sexp){ + Rf_error("Cannot call DataType__num_fields(). Please use arrow::install_arrow() to install required runtime libraries. "); } #endif // datatype.cpp #if defined(ARROW_R_WITH_ARROW) -cpp11::writable::list DataType__children_pointer(const std::shared_ptr& type); -extern "C" SEXP _arrow_DataType__children_pointer(SEXP type_sexp){ +cpp11::list DataType__fields(const std::shared_ptr& type); +extern "C" SEXP _arrow_DataType__fields(SEXP type_sexp){ BEGIN_CPP11 arrow::r::Input&>::type type(type_sexp); - return cpp11::as_sexp(DataType__children_pointer(type)); + return cpp11::as_sexp(DataType__fields(type)); END_CPP11 } #else -extern "C" SEXP _arrow_DataType__children_pointer(SEXP type_sexp){ - Rf_error("Cannot call DataType__children_pointer(). Please use arrow::install_arrow() to install required runtime libraries. "); +extern "C" SEXP _arrow_DataType__fields(SEXP type_sexp){ + Rf_error("Cannot call DataType__fields(). Please use arrow::install_arrow() to install required runtime libraries. "); } #endif @@ -3490,7 +3460,7 @@ extern "C" SEXP _arrow_fs___FileSelector__create(SEXP base_dir_sexp, SEXP allow_ // filesystem.cpp #if defined(ARROW_R_WITH_ARROW) -std::vector> fs___FileSystem__GetTargetInfos_Paths(const std::shared_ptr& file_system, const std::vector& paths); +cpp11::list fs___FileSystem__GetTargetInfos_Paths(const std::shared_ptr& file_system, const std::vector& paths); extern "C" SEXP _arrow_fs___FileSystem__GetTargetInfos_Paths(SEXP file_system_sexp, SEXP paths_sexp){ BEGIN_CPP11 arrow::r::Input&>::type file_system(file_system_sexp); @@ -3506,7 +3476,7 @@ extern "C" SEXP _arrow_fs___FileSystem__GetTargetInfos_Paths(SEXP file_system_se // filesystem.cpp #if defined(ARROW_R_WITH_ARROW) -std::vector> fs___FileSystem__GetTargetInfos_FileSelector(const std::shared_ptr& file_system, const std::shared_ptr& selector); +cpp11::list fs___FileSystem__GetTargetInfos_FileSelector(const std::shared_ptr& file_system, const std::shared_ptr& selector); extern "C" SEXP _arrow_fs___FileSystem__GetTargetInfos_FileSelector(SEXP file_system_sexp, SEXP selector_sexp){ BEGIN_CPP11 arrow::r::Input&>::type file_system(file_system_sexp); @@ -5132,12 +5102,12 @@ extern "C" SEXP _arrow_ExportArray(SEXP array_sexp, SEXP array_ptr_sexp, SEXP sc // py-to-r.cpp #if defined(ARROW_R_WITH_ARROW) -void ExportRecordBatch(const std::shared_ptr& batch, arrow::r::Pointer array_ptr, arrow::r::Pointer schema_ptr); +void ExportRecordBatch(const std::shared_ptr& batch, arrow::r::Pointer array_ptr, arrow::r::Pointer schema_ptr); extern "C" SEXP _arrow_ExportRecordBatch(SEXP batch_sexp, SEXP array_ptr_sexp, SEXP schema_ptr_sexp){ BEGIN_CPP11 arrow::r::Input&>::type batch(batch_sexp); - arrow::r::Input>::type array_ptr(array_ptr_sexp); - arrow::r::Input>::type schema_ptr(schema_ptr_sexp); + arrow::r::Input>::type array_ptr(array_ptr_sexp); + arrow::r::Input>::type schema_ptr(schema_ptr_sexp); ExportRecordBatch(batch, array_ptr, schema_ptr); return R_NilValue; END_CPP11 @@ -5211,7 +5181,7 @@ extern "C" SEXP _arrow_RecordBatch__ReplaceSchemaMetadata(SEXP x_sexp, SEXP meta // recordbatch.cpp #if defined(ARROW_R_WITH_ARROW) -arrow::ArrayVector RecordBatch__columns(const std::shared_ptr& batch); +cpp11::list RecordBatch__columns(const std::shared_ptr& batch); extern "C" SEXP _arrow_RecordBatch__columns(SEXP batch_sexp){ BEGIN_CPP11 arrow::r::Input&>::type batch(batch_sexp); @@ -5448,7 +5418,7 @@ extern "C" SEXP _arrow_RecordBatchReader__ReadNext(SEXP reader_sexp){ // recordbatchreader.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr ipc___RecordBatchStreamReader__Open(const std::shared_ptr& stream); +std::shared_ptr ipc___RecordBatchStreamReader__Open(const std::shared_ptr& stream); extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){ BEGIN_CPP11 arrow::r::Input&>::type stream(stream_sexp); @@ -5463,7 +5433,7 @@ extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__Open(SEXP stream_sexp){ // recordbatchreader.cpp #if defined(ARROW_R_WITH_ARROW) -std::vector> ipc___RecordBatchStreamReader__batches(const std::shared_ptr& reader); +cpp11::list ipc___RecordBatchStreamReader__batches(const std::shared_ptr& reader); extern "C" SEXP _arrow_ipc___RecordBatchStreamReader__batches(SEXP reader_sexp){ BEGIN_CPP11 arrow::r::Input&>::type reader(reader_sexp); @@ -5569,7 +5539,7 @@ extern "C" SEXP _arrow_Table__from_RecordBatchStreamReader(SEXP reader_sexp){ // recordbatchreader.cpp #if defined(ARROW_R_WITH_ARROW) -std::vector> ipc___RecordBatchFileReader__batches(const std::shared_ptr& reader); +cpp11::list ipc___RecordBatchFileReader__batches(const std::shared_ptr& reader); extern "C" SEXP _arrow_ipc___RecordBatchFileReader__batches(SEXP reader_sexp){ BEGIN_CPP11 arrow::r::Input&>::type reader(reader_sexp); @@ -5871,7 +5841,7 @@ extern "C" SEXP _arrow_Schema__GetFieldByName(SEXP s_sexp, SEXP x_sexp){ // schema.cpp #if defined(ARROW_R_WITH_ARROW) -std::vector> Schema__fields(const std::shared_ptr& schema); +cpp11::list Schema__fields(const std::shared_ptr& schema); extern "C" SEXP _arrow_Schema__fields(SEXP schema_sexp){ BEGIN_CPP11 arrow::r::Input&>::type schema(schema_sexp); @@ -6087,7 +6057,7 @@ extern "C" SEXP _arrow_Table__field(SEXP table_sexp, SEXP i_sexp){ // table.cpp #if defined(ARROW_R_WITH_ARROW) -std::vector> Table__columns(const std::shared_ptr& table); +cpp11::list Table__columns(const std::shared_ptr& table); extern "C" SEXP _arrow_Table__columns(SEXP table_sexp){ BEGIN_CPP11 arrow::r::Input&>::type table(table_sexp); @@ -6304,6 +6274,30 @@ extern "C" SEXP _arrow_SetCpuThreadPoolCapacity(SEXP threads_sexp){ } #endif +# if defined(ARROW_R_WITH_ARROW) +extern "C" SEXP _arrow_Table__Reset(SEXP r6) { + BEGIN_CPP11 + arrow::r::r6_reset_pointer(r6); + END_CPP11 + return R_NilValue; +} +# else +extern "C" SEXP _arrow_Table__Reset(SEXP r6) { + Rf_error("Cannot call _arrow_Table__Reset(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +# endif +# if defined(ARROW_R_WITH_ARROW) +extern "C" SEXP _arrow_RecordBatch__Reset(SEXP r6) { + BEGIN_CPP11 + arrow::r::r6_reset_pointer(r6); + END_CPP11 + return R_NilValue; +} +# else +extern "C" SEXP _arrow_RecordBatch__Reset(SEXP r6) { + Rf_error("Cannot call _arrow_RecordBatch__Reset(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +# endif extern "C" SEXP _arrow_available() { return Rf_ScalarLogical( @@ -6456,8 +6450,6 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, { "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, { "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, - { "_arrow_shared_ptr_is_null", (DL_FUNC) &_arrow_shared_ptr_is_null, 1}, - { "_arrow_unique_ptr_is_null", (DL_FUNC) &_arrow_unique_ptr_is_null, 1}, { "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, { "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, { "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, @@ -6489,8 +6481,8 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, { "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, { "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2}, - { "_arrow_DataType__num_children", (DL_FUNC) &_arrow_DataType__num_children, 1}, - { "_arrow_DataType__children_pointer", (DL_FUNC) &_arrow_DataType__children_pointer, 1}, + { "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1}, + { "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1}, { "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, { "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, { "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, @@ -6730,6 +6722,8 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, { "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, { "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, + { "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, + { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, {NULL, NULL, 0} }; diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h index 859b0491cd0..2329db11e99 100644 --- a/r/src/arrow_cpp11.h +++ b/r/src/arrow_cpp11.h @@ -22,22 +22,10 @@ #include #undef Free -#include "./nameof.h" - -namespace cpp11 { - -template -SEXP as_sexp(const std::shared_ptr& ptr); - -template -SEXP as_sexp(const std::vector>& vec); - -} // namespace cpp11 - -// TODO: move this include up once we can resolve this issue in cpp11 -// https://github.com/apache/arrow/pull/7819#discussion_r471664878 #include +#include "./nameof.h" + // borrowed from enc package // because R does not make these macros available (i.e. from Defn.h) #define UTF8_MASK (1 << 3) @@ -130,6 +118,8 @@ struct symbols { static SEXP byte_width; static SEXP list_size; static SEXP arrow_attributes; + static SEXP new_; + static SEXP create; }; struct data { @@ -172,6 +162,16 @@ Pointer r6_to_pointer(SEXP self) { return reinterpret_cast(p); } +template +void r6_reset_pointer(SEXP r6) { + SEXP xp = Rf_findVarInFrame(r6, arrow::r::symbols::xp); + void* p = R_ExternalPtrAddr(xp); + if (p != nullptr) { + delete reinterpret_cast*>(p); + R_SetExternalPtrAddr(xp, nullptr); + } +} + // T is either std::shared_ptr or std::unique_ptr // e.g. T = std::shared_ptr template @@ -265,21 +265,16 @@ cpp11::writable::strings to_r_strings(const std::vector>& x, return to_r_vector(x, std::forward(to_string)); } -template -cpp11::writable::list to_r_list(const std::vector>& x) { - auto as_sexp = [](const std::shared_ptr& t) { return cpp11::as_sexp(t); }; - return to_r_vector(x, as_sexp); -} - template cpp11::writable::list to_r_list(const std::vector>& x, ToListElement&& to_element) { - auto as_sexp = [&](const std::shared_ptr& t) { - return cpp11::as_sexp(to_element(t)); - }; + auto as_sexp = [&](const std::shared_ptr& t) { return to_element(t); }; return to_r_vector(x, as_sexp); } +template +cpp11::writable::list to_r_list(const std::vector>& x); + inline cpp11::writable::integers short_row_names(int n) { return {NA_INTEGER, -n}; } template @@ -300,22 +295,72 @@ bool GetBoolOption(const std::string& name, bool default_); namespace cpp11 { template -using enable_if_shared_ptr = typename std::enable_if< - std::is_same, T>::value, T>::type; +SEXP to_r6(const std::shared_ptr& ptr, const char* r6_class_name) { + if (ptr == nullptr) return R_NilValue; + + cpp11::external_pointer> xp(new std::shared_ptr(ptr)); + SEXP r6_class = Rf_install(r6_class_name); + + if (Rf_findVarInFrame3(arrow::r::ns::arrow, r6_class, FALSE) == R_UnboundValue) { + cpp11::stop("No arrow R6 class named '%s'", r6_class_name); + } + + // make call: $new() + SEXP call = PROTECT(Rf_lang3(R_DollarSymbol, r6_class, arrow::r::symbols::new_)); + SEXP call2 = PROTECT(Rf_lang2(call, xp)); + + // and then eval in arrow:: + SEXP r6 = PROTECT(Rf_eval(call2, arrow::r::ns::arrow)); + UNPROTECT(3); + return r6; +} + +/// This trait defines a single static function which returns the name of the R6 class +/// which corresponds to T. By default, this is just the c++ class name with any +/// namespaces stripped, for example the R6 class for arrow::ipc::RecordBatchStreamReader +/// is simply named "RecordBatchStreamReader". +/// +/// Some classes require specializations of this trait. For example the R6 classes which +/// wrap arrow::csv::ReadOptions and arrow::json::ReadOptions would collide if both were +/// named "ReadOptions", so they are named "CsvReadOptions" and "JsonReadOptions" +/// respectively. Other classes such as arrow::Array are base classes and the proper R6 +/// class name must be derived by examining a discriminant like Array::type_id. +/// +/// All specializations are located in arrow_types.h template -enable_if_shared_ptr as_cpp(SEXP from) { - return arrow::r::ExternalPtrInput(from); +struct r6_class_name; + +template +SEXP to_r6(const std::shared_ptr& x) { + if (x == nullptr) return R_NilValue; + + return to_r6(x, cpp11::r6_class_name::get(x)); } +} // namespace cpp11 + +namespace arrow { +namespace r { + template -SEXP as_sexp(const std::shared_ptr& ptr) { - return cpp11::external_pointer>(new std::shared_ptr(ptr)); +cpp11::writable::list to_r_list(const std::vector>& x) { + auto as_sexp = [&](const std::shared_ptr& t) { return cpp11::to_r6(t); }; + return to_r_vector(x, as_sexp); } +} // namespace r +} // namespace arrow + +namespace cpp11 { + template -SEXP as_sexp(const std::vector>& vec) { - return arrow::r::to_r_list(vec); +using enable_if_shared_ptr = typename std::enable_if< + std::is_same, T>::value, T>::type; + +template +enable_if_shared_ptr as_cpp(SEXP from) { + return arrow::r::ExternalPtrInput(from); } template @@ -323,4 +368,9 @@ enable_if_enum as_sexp(E e) { return as_sexp(static_cast(e)); } +template +SEXP as_sexp(const std::shared_ptr& ptr) { + return cpp11::to_r6(ptr); +} + } // namespace cpp11 diff --git a/r/src/arrow_exports.h b/r/src/arrow_exports.h deleted file mode 100644 index c4cc0ff6ede..00000000000 --- a/r/src/arrow_exports.h +++ /dev/null @@ -1,87 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -// This gets included in arrowExports.cpp - -#pragma once - -#include "./arrow_cpp11.h" - -#if defined(ARROW_R_WITH_ARROW) -#include -#include -#include -#include -#include -#include -#include -#include - -namespace arrow { - -namespace compute { -struct CastOptions; - -} // namespace compute - -namespace csv { - -class TableReader; -struct ConvertOptions; -struct ReadOptions; -struct ParseOptions; - -} // namespace csv - -namespace json { - -class TableReader; -struct ReadOptions; -struct ParseOptions; - -} // namespace json - -} // namespace arrow - -namespace ds = ::arrow::dataset; -namespace fs = ::arrow::fs; - -namespace parquet { - -struct ParquetVersion { - enum type { - // forward declaration - }; -}; - -class ReaderProperties; -class ArrowReaderProperties; - -class WriterProperties; -class WriterPropertiesBuilder; -class ArrowWriterProperties; -class ArrowWriterPropertiesBuilder; - -namespace arrow { - -class FileReader; -class FileWriter; - -} // namespace arrow -} // namespace parquet - -#endif diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 4a1845a16bc..e6c2362017a 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -23,15 +23,29 @@ #if defined(ARROW_R_WITH_ARROW) +#include +#include +#include + #include // for RBuffer definition below -#include #include +#include + +// forward declaration-only headers +#include +#include +#include +#include +#include +#include +#include +#include #include +#include +#include -#include -#include -#include -#include +namespace ds = ::arrow::dataset; +namespace fs = ::arrow::fs; SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); SEXP Array__as_vector(const std::shared_ptr& array); @@ -119,4 +133,75 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, } // namespace r } // namespace arrow +namespace cpp11 { + +template +struct r6_class_name { + static const char* get(const std::shared_ptr& ptr) { + static const std::string name = arrow::util::nameof(/*strip_namespace=*/true); + return name.c_str(); + } +}; + +// Overrides of default R6 class names: +#define R6_CLASS_NAME(CLASS, NAME) \ + template <> \ + struct r6_class_name { \ + static const char* get(const std::shared_ptr&) { return NAME; } \ + } + +R6_CLASS_NAME(arrow::csv::ReadOptions, "CsvReadOptions"); +R6_CLASS_NAME(arrow::csv::ParseOptions, "CsvParseOptions"); +R6_CLASS_NAME(arrow::csv::ConvertOptions, "CsvConvertOptions"); +R6_CLASS_NAME(arrow::csv::TableReader, "CsvTableReader"); + +R6_CLASS_NAME(parquet::ArrowReaderProperties, "ParquetArrowReaderProperties"); +R6_CLASS_NAME(parquet::ArrowWriterProperties, "ParquetArrowWriterProperties"); +R6_CLASS_NAME(parquet::WriterProperties, "ParquetWriterProperties"); +R6_CLASS_NAME(parquet::arrow::FileReader, "ParquetFileReader"); +R6_CLASS_NAME(parquet::WriterPropertiesBuilder, "ParquetWriterPropertiesBuilder"); +R6_CLASS_NAME(parquet::arrow::FileWriter, "ParquetFileWriter"); + +R6_CLASS_NAME(arrow::ipc::feather::Reader, "FeatherReader"); + +R6_CLASS_NAME(arrow::json::ReadOptions, "JsonReadOptions"); +R6_CLASS_NAME(arrow::json::ParseOptions, "JsonParseOptions"); +R6_CLASS_NAME(arrow::json::TableReader, "JsonTableReader"); + +#undef R6_CLASS_NAME + +// Declarations of discriminated base classes. +// Definitions reside in corresponding .cpp files. +template <> +struct r6_class_name { + static const char* get(const std::shared_ptr&); +}; + +template <> +struct r6_class_name { + static const char* get(const std::shared_ptr&); +}; + +template <> +struct r6_class_name { + static const char* get(const std::shared_ptr&); +}; + +template <> +struct r6_class_name { + static const char* get(const std::shared_ptr&); +}; + +template <> +struct r6_class_name { + static const char* get(const std::shared_ptr&); +}; + +template <> +struct r6_class_name { + static const char* get(const std::shared_ptr&); +}; + +} // namespace cpp11 + #endif diff --git a/r/src/chunkedarray.cpp b/r/src/chunkedarray.cpp index f52f20ee0de..52ceff7d914 100644 --- a/r/src/chunkedarray.cpp +++ b/r/src/chunkedarray.cpp @@ -46,7 +46,7 @@ std::shared_ptr ChunkedArray__chunk( // [[arrow::export]] cpp11::list ChunkedArray__chunks( const std::shared_ptr& chunked_array) { - return cpp11::as_sexp(chunked_array->chunks()); + return arrow::r::to_r_list(chunked_array->chunks()); } // [[arrow::export]] diff --git a/r/src/compute.cpp b/r/src/compute.cpp index 3c288c93455..a456ec4711b 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -66,7 +66,8 @@ std::shared_ptr RecordBatch__cast( arrow::ArrayVector columns(nc); for (int i = 0; i < nc; i++) { - columns[i] = Array__cast(batch->column(i), schema->field(i)->type(), options); + columns[i] = ValueOrStop( + arrow::compute::Cast(*batch->column(i), schema->field(i)->type(), *options)); } return arrow::RecordBatch::Make(schema, batch->num_rows(), std::move(columns)); @@ -82,7 +83,10 @@ std::shared_ptr Table__cast( using ColumnVector = std::vector>; ColumnVector columns(nc); for (int i = 0; i < nc; i++) { - columns[i] = ChunkedArray__cast(table->column(i), schema->field(i)->type(), options); + arrow::Datum value(table->column(i)); + arrow::Datum out = + ValueOrStop(arrow::compute::Cast(value, schema->field(i)->type(), *options)); + columns[i] = out.chunked_array(); } return arrow::Table::Make(schema, std::move(columns), table->num_rows()); } @@ -129,19 +133,19 @@ arrow::Datum as_cpp(SEXP x) { SEXP from_datum(arrow::Datum datum) { switch (datum.kind()) { case arrow::Datum::SCALAR: - return cpp11::as_sexp(datum.scalar()); + return cpp11::to_r6(datum.scalar()); case arrow::Datum::ARRAY: - return cpp11::as_sexp(datum.make_array()); + return cpp11::to_r6(datum.make_array()); case arrow::Datum::CHUNKED_ARRAY: - return cpp11::as_sexp(datum.chunked_array()); + return cpp11::to_r6(datum.chunked_array()); case arrow::Datum::RECORD_BATCH: - return cpp11::as_sexp(datum.record_batch()); + return cpp11::to_r6(datum.record_batch()); case arrow::Datum::TABLE: - return cpp11::as_sexp(datum.table()); + return cpp11::to_r6(datum.table()); default: break; diff --git a/r/src/dataset.cpp b/r/src/dataset.cpp index 4327b80f187..2ad59677eb0 100644 --- a/r/src/dataset.cpp +++ b/r/src/dataset.cpp @@ -23,11 +23,44 @@ #include #include #include +#include #include namespace ds = ::arrow::dataset; namespace fs = ::arrow::fs; +namespace cpp11 { + +const char* r6_class_name::get(const std::shared_ptr& dataset) { + auto type_name = dataset->type_name(); + + if (type_name == "union") { + return "UnionDataset"; + } else if (type_name == "filesystem") { + return "FileSystemDataset"; + } else if (type_name == "in-memory") { + return "InMemoryDataset"; + } else { + return "Dataset"; + } +} + +const char* r6_class_name::get( + const std::shared_ptr& file_format) { + auto type_name = file_format->type_name(); + if (type_name == "parquet") { + return "ParquetFileFormat"; + } else if (type_name == "ipc") { + return "IpcFileFormat"; + } else if (type_name == "csv") { + return "CsvFileFormat"; + } else { + return "FileFormat"; + } +} + +} // namespace cpp11 + // Dataset, UnionDataset, FileSystemDataset // [[arrow::export]] @@ -57,21 +90,21 @@ std::shared_ptr dataset___Dataset__ReplaceSchema( } // [[arrow::export]] -std::shared_ptr dataset___UnionDataset__create( +std::shared_ptr dataset___UnionDataset__create( const ds::DatasetVector& datasets, const std::shared_ptr& schm) { return ValueOrStop(ds::UnionDataset::Make(schm, datasets)); } // [[arrow::export]] -std::shared_ptr dataset___InMemoryDataset__create( +std::shared_ptr dataset___InMemoryDataset__create( const std::shared_ptr& table) { return std::make_shared(table); } // [[arrow::export]] -ds::DatasetVector dataset___UnionDataset__children( +cpp11::list dataset___UnionDataset__children( const std::shared_ptr& ds) { - return ds->children(); + return arrow::r::to_r_list(ds->children()); } // [[arrow::export]] @@ -128,7 +161,7 @@ std::shared_ptr dataset___UnionDatasetFactory__Make( } // [[arrow::export]] -std::shared_ptr dataset___FileSystemDatasetFactory__Make2( +std::shared_ptr dataset___FileSystemDatasetFactory__Make2( const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format, @@ -139,11 +172,12 @@ std::shared_ptr dataset___FileSystemDatasetFactory__Make2( options.partitioning = partitioning; } - return ValueOrStop(ds::FileSystemDatasetFactory::Make(fs, *selector, format, options)); + return arrow::internal::checked_pointer_cast( + ValueOrStop(ds::FileSystemDatasetFactory::Make(fs, *selector, format, options))); } // [[arrow::export]] -std::shared_ptr dataset___FileSystemDatasetFactory__Make1( +std::shared_ptr dataset___FileSystemDatasetFactory__Make1( const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format) { @@ -151,7 +185,7 @@ std::shared_ptr dataset___FileSystemDatasetFactory__Make1( } // [[arrow::export]] -std::shared_ptr dataset___FileSystemDatasetFactory__Make3( +std::shared_ptr dataset___FileSystemDatasetFactory__Make3( const std::shared_ptr& fs, const std::shared_ptr& selector, const std::shared_ptr& format, @@ -162,7 +196,8 @@ std::shared_ptr dataset___FileSystemDatasetFactory__Make3( options.partitioning = factory; } - return ValueOrStop(ds::FileSystemDatasetFactory::Make(fs, *selector, format, options)); + return arrow::internal::checked_pointer_cast( + ValueOrStop(ds::FileSystemDatasetFactory::Make(fs, *selector, format, options))); } // FileFormat, ParquetFileFormat, IpcFileFormat @@ -244,7 +279,7 @@ std::shared_ptr dataset___CsvFileFormat__Make( // DirectoryPartitioning, HivePartitioning // [[arrow::export]] -std::shared_ptr dataset___DirectoryPartitioning( +std::shared_ptr dataset___DirectoryPartitioning( const std::shared_ptr& schm) { return std::make_shared(schm); } @@ -256,7 +291,7 @@ std::shared_ptr dataset___DirectoryPartitioning__MakeFa } // [[arrow::export]] -std::shared_ptr dataset___HivePartitioning( +std::shared_ptr dataset___HivePartitioning( const std::shared_ptr& schm) { return std::make_shared(schm); } @@ -333,8 +368,7 @@ std::shared_ptr dataset___Scanner__head( } // [[arrow::export]] -std::vector> dataset___Scanner__Scan( - const std::shared_ptr& scanner) { +cpp11::list dataset___Scanner__Scan(const std::shared_ptr& scanner) { auto it = ValueOrStop(scanner->Scan()); std::vector> out; std::shared_ptr scan_task; @@ -343,7 +377,8 @@ std::vector> dataset___Scanner__Scan( scan_task = ValueOrStop(st); out.push_back(scan_task); } - return out; + + return arrow::r::to_r_list(out); } // [[arrow::export]] @@ -353,7 +388,7 @@ std::shared_ptr dataset___Scanner__schema( } // [[arrow::export]] -std::vector> dataset___ScanTask__get_batches( +cpp11::list dataset___ScanTask__get_batches( const std::shared_ptr& scan_task) { arrow::RecordBatchIterator rbi; rbi = ValueOrStop(scan_task->Execute()); @@ -363,7 +398,7 @@ std::vector> dataset___ScanTask__get_batches batch = ValueOrStop(b); out.push_back(batch); } - return out; + return arrow::r::to_r_list(out); } // [[arrow::export]] diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index 81d2bd40dd7..6e7398bdff0 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -20,18 +20,100 @@ #if defined(ARROW_R_WITH_ARROW) #include -// [[arrow::export]] -bool shared_ptr_is_null(SEXP xp) { - return reinterpret_cast*>(R_ExternalPtrAddr(xp))->get() == - nullptr; -} +namespace cpp11 { -// [[arrow::export]] -bool unique_ptr_is_null(SEXP xp) { - return reinterpret_cast*>(R_ExternalPtrAddr(xp))->get() == - nullptr; +const char* r6_class_name::get( + const std::shared_ptr& type) { + using arrow::Type; + + switch (type->id()) { + case Type::NA: + return "Null"; + case Type::BOOL: + return "Boolean"; + case Type::UINT8: + return "UInt8"; + case Type::UINT16: + return "UInt16"; + case Type::UINT32: + return "UInt32"; + case Type::UINT64: + return "UInt64"; + + case Type::INT8: + return "Int8"; + case Type::INT16: + return "Int16"; + case Type::INT32: + return "Int32"; + case Type::INT64: + return "Int64"; + + case Type::HALF_FLOAT: + return "Float16"; + case Type::FLOAT: + return "Float32"; + case Type::DOUBLE: + return "Float64"; + + case Type::STRING: + return "Utf8"; + case Type::LARGE_STRING: + return "LargeUtf8"; + + case Type::BINARY: + return "Binary"; + case Type::FIXED_SIZE_BINARY: + return "FixedSizeBinary"; + case Type::LARGE_BINARY: + return "LargeBinary"; + + case Type::DATE32: + return "Date32"; + case Type::DATE64: + return "Date64"; + case Type::TIMESTAMP: + return "Timestamp"; + + case Type::TIME32: + return "Time32"; + case Type::TIME64: + return "Time64"; + + case Type::DECIMAL: + return "Decimal128Type"; + + case Type::LIST: + return "ListType"; + case Type::LARGE_LIST: + return "LargeListType"; + case Type::FIXED_SIZE_LIST: + return "FixedSizeListType"; + + case Type::STRUCT: + return "StructType"; + case Type::DICTIONARY: + return "DictionaryType"; + + default: + break; + } + + // No R6 classes are defined for: + // INTERVAL + // SPARSE_UNION + // DENSE_UNION + // MAP + // EXTENSION + // DURATION + // + // If a c++ function returns one it will be wrapped as a DataType. + + return "DataType"; } +} // namespace cpp11 + // [[arrow::export]] std::shared_ptr Int8__initialize() { return arrow::int8(); } @@ -126,51 +208,48 @@ std::shared_ptr Time64__initialize(arrow::TimeUnit::type unit) } // [[arrow::export]] -SEXP list__(SEXP x) { +std::shared_ptr list__(SEXP x) { if (Rf_inherits(x, "Field")) { auto field = cpp11::as_cpp>(x); - return cpp11::as_sexp(arrow::list(field)); + return arrow::list(field); } - if (Rf_inherits(x, "DataType")) { - auto type = cpp11::as_cpp>(x); - return cpp11::as_sexp(arrow::list(type)); + if (!Rf_inherits(x, "DataType")) { + cpp11::stop("incompatible"); } - cpp11::stop("incompatible"); - return R_NilValue; + auto type = cpp11::as_cpp>(x); + return arrow::list(type); } // [[arrow::export]] -SEXP large_list__(SEXP x) { +std::shared_ptr large_list__(SEXP x) { if (Rf_inherits(x, "Field")) { auto field = cpp11::as_cpp>(x); - return cpp11::as_sexp(arrow::large_list(field)); + return arrow::large_list(field); } - if (Rf_inherits(x, "DataType")) { - auto type = cpp11::as_cpp>(x); - return cpp11::as_sexp(arrow::large_list(type)); + if (!Rf_inherits(x, "DataType")) { + cpp11::stop("incompatible"); } - cpp11::stop("incompatible"); - return R_NilValue; + auto type = cpp11::as_cpp>(x); + return arrow::large_list(type); } // [[arrow::export]] -SEXP fixed_size_list__(SEXP x, int list_size) { +std::shared_ptr fixed_size_list__(SEXP x, int list_size) { if (Rf_inherits(x, "Field")) { auto field = cpp11::as_cpp>(x); - return cpp11::as_sexp(arrow::fixed_size_list(field, list_size)); + return arrow::fixed_size_list(field, list_size); } - if (Rf_inherits(x, "DataType")) { - auto type = cpp11::as_cpp>(x); - return cpp11::as_sexp(arrow::fixed_size_list(type, list_size)); + if (!Rf_inherits(x, "DataType")) { + cpp11::stop("incompatible"); } - cpp11::stop("incompatible"); - return R_NilValue; + auto type = cpp11::as_cpp>(x); + return arrow::fixed_size_list(type, list_size); } // [[arrow::export]] @@ -196,13 +275,12 @@ bool DataType__Equals(const std::shared_ptr& lhs, } // [[arrow::export]] -int DataType__num_children(const std::shared_ptr& type) { +int DataType__num_fields(const std::shared_ptr& type) { return type->num_fields(); } // [[arrow::export]] -cpp11::writable::list DataType__children_pointer( - const std::shared_ptr& type) { +cpp11::list DataType__fields(const std::shared_ptr& type) { return arrow::r::to_r_list(type->fields()); } diff --git a/r/src/filesystem.cpp b/r/src/filesystem.cpp index 53959804fe8..066e5b540f2 100644 --- a/r/src/filesystem.cpp +++ b/r/src/filesystem.cpp @@ -24,7 +24,24 @@ namespace fs = ::arrow::fs; -// FileInfo +namespace cpp11 { + +const char* r6_class_name::get( + const std::shared_ptr& file_system) { + auto type_name = file_system->type_name(); + + if (type_name == "local") { + return "LocalFileSystem"; + } else if (type_name == "s3") { + return "S3FileSystem"; + } else if (type_name == "subtree") { + return "SubTreeFileSystem"; + } else { + return "FileSystem"; + } +} + +} // namespace cpp11 // [[arrow::export]] fs::FileType fs___FileInfo__type(const std::shared_ptr& x) { @@ -123,19 +140,20 @@ std::vector> shared_ptr_vector(const std::vector& vec) { } // [[arrow::export]] -std::vector> fs___FileSystem__GetTargetInfos_Paths( +cpp11::list fs___FileSystem__GetTargetInfos_Paths( const std::shared_ptr& file_system, const std::vector& paths) { auto results = ValueOrStop(file_system->GetFileInfo(paths)); - return shared_ptr_vector(results); + return arrow::r::to_r_list(shared_ptr_vector(results)); } // [[arrow::export]] -std::vector> fs___FileSystem__GetTargetInfos_FileSelector( +cpp11::list fs___FileSystem__GetTargetInfos_FileSelector( const std::shared_ptr& file_system, const std::shared_ptr& selector) { auto results = ValueOrStop(file_system->GetFileInfo(*selector)); - return shared_ptr_vector(results); + + return arrow::r::to_r_list(shared_ptr_vector(results)); } // [[arrow::export]] @@ -238,8 +256,9 @@ cpp11::writable::list fs___FileSystemFromUri(const std::string& path) { using cpp11::literals::operator"" _nm; std::string out_path; - auto file_system = ValueOrStop(fs::FileSystemFromUri(path, &out_path)); - return cpp11::writable::list({"fs"_nm = file_system, "path"_nm = out_path}); + return cpp11::writable::list( + {"fs"_nm = cpp11::to_r6(ValueOrStop(fs::FileSystemFromUri(path, &out_path))), + "path"_nm = out_path}); } // [[arrow::export]] diff --git a/r/src/memorypool.cpp b/r/src/memorypool.cpp index 05b79dc3929..1345d0b7446 100644 --- a/r/src/memorypool.cpp +++ b/r/src/memorypool.cpp @@ -66,7 +66,7 @@ arrow::MemoryPool* gc_memory_pool() { return &g_pool; } // [[arrow::export]] std::shared_ptr MemoryPool__default() { - return std::shared_ptr(&g_pool, [](...) {}); + return std::shared_ptr(&g_pool, [](...) {}); } // [[arrow::export]] diff --git a/r/src/message.cpp b/r/src/message.cpp index fd50007d93d..f2524644a61 100644 --- a/r/src/message.cpp +++ b/r/src/message.cpp @@ -61,8 +61,9 @@ std::shared_ptr ipc___ReadRecordBatch__Message__Schema( const std::shared_ptr& schema) { // TODO: perhaps this should come from the R side arrow::ipc::DictionaryMemo memo; - return ValueOrStop(arrow::ipc::ReadRecordBatch(*message, schema, &memo, - arrow::ipc::IpcReadOptions::Defaults())); + auto batch = ValueOrStop(arrow::ipc::ReadRecordBatch( + *message, schema, &memo, arrow::ipc::IpcReadOptions::Defaults())); + return batch; } // [[arrow::export]] @@ -85,7 +86,8 @@ std::shared_ptr ipc___ReadSchema_Message( // [[arrow::export]] std::shared_ptr ipc___MessageReader__Open( const std::shared_ptr& stream) { - return arrow::ipc::MessageReader::Open(stream); + return std::shared_ptr( + arrow::ipc::MessageReader::Open(stream)); } // [[arrow::export]] diff --git a/r/src/nameof.h b/r/src/nameof.h index a46ac762869..397c690cc51 100644 --- a/r/src/nameof.h +++ b/r/src/nameof.h @@ -78,8 +78,15 @@ const char* typename_begin() { } // namespace detail template -std::string nameof() { - return {detail::typename_begin(), detail::typename_length()}; +std::string nameof(bool strip_namespace = false) { + std::string name{detail::typename_begin(), detail::typename_length()}; + if (strip_namespace) { + auto i = name.find_last_of("::"); + if (i != std::string::npos) { + name = name.substr(i + 1); + } + } + return name; } } // namespace util diff --git a/r/src/parquet.cpp b/r/src/parquet.cpp index 6f8db31410f..1b0bc41b833 100644 --- a/r/src/parquet.cpp +++ b/r/src/parquet.cpp @@ -24,6 +24,20 @@ #include #include +namespace parquet { + +class WriterPropertiesBuilder : public WriterProperties::Builder { + public: + using WriterProperties::Builder::Builder; +}; + +class ArrowWriterPropertiesBuilder : public ArrowWriterProperties::Builder { + public: + using ArrowWriterProperties::Builder::Builder; +}; + +} // namespace parquet + // [[arrow::export]] std::shared_ptr parquet___arrow___ArrowReaderProperties__Make(bool use_threads) { @@ -145,20 +159,6 @@ std::shared_ptr parquet___arrow___FileReader__ReadColumn( return array; } -namespace parquet { - -class WriterPropertiesBuilder : public WriterProperties::Builder { - public: - using WriterProperties::Builder::Builder; -}; - -class ArrowWriterPropertiesBuilder : public ArrowWriterProperties::Builder { - public: - using ArrowWriterProperties::Builder::Builder; -}; - -} // namespace parquet - // [[arrow::export]] std::shared_ptr parquet___ArrowWriterProperties___create( bool allow_truncated_timestamps, bool use_deprecated_int96_timestamps, diff --git a/r/src/py-to-r.cpp b/r/src/py-to-r.cpp index d2ff13bc2f2..56777919c5b 100644 --- a/r/src/py-to-r.cpp +++ b/r/src/py-to-r.cpp @@ -19,6 +19,8 @@ #if defined(ARROW_R_WITH_ARROW) +#include + // [[arrow::export]] std::shared_ptr ImportArray(arrow::r::Pointer array, arrow::r::Pointer schema) { @@ -65,8 +67,8 @@ void ExportArray(const std::shared_ptr& array, // [[arrow::export]] void ExportRecordBatch(const std::shared_ptr& batch, - arrow::r::Pointer array_ptr, - arrow::r::Pointer schema_ptr) { + arrow::r::Pointer array_ptr, + arrow::r::Pointer schema_ptr) { StopIfNotOk(arrow::ExportRecordBatch(*batch, array_ptr, schema_ptr)); } diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 02b61f60633..bae5b8e713a 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -53,14 +53,13 @@ std::shared_ptr RecordBatch__ReplaceSchemaMetadata( } // [[arrow::export]] -arrow::ArrayVector RecordBatch__columns( - const std::shared_ptr& batch) { +cpp11::list RecordBatch__columns(const std::shared_ptr& batch) { auto nc = batch->num_columns(); arrow::ArrayVector res(nc); for (int i = 0; i < nc; i++) { res[i] = batch->column(i); } - return res; + return arrow::r::to_r_list(res); } // [[arrow::export]] diff --git a/r/src/recordbatchreader.cpp b/r/src/recordbatchreader.cpp index 7ecb42002a9..76487164f4f 100644 --- a/r/src/recordbatchreader.cpp +++ b/r/src/recordbatchreader.cpp @@ -38,14 +38,13 @@ std::shared_ptr RecordBatchReader__ReadNext( // -------- RecordBatchStreamReader // [[arrow::export]] -std::shared_ptr ipc___RecordBatchStreamReader__Open( +std::shared_ptr ipc___RecordBatchStreamReader__Open( const std::shared_ptr& stream) { - std::shared_ptr reader; return ValueOrStop(arrow::ipc::RecordBatchStreamReader::Open(stream)); } // [[arrow::export]] -std::vector> ipc___RecordBatchStreamReader__batches( +cpp11::list ipc___RecordBatchStreamReader__batches( const std::shared_ptr& reader) { std::vector> res; @@ -57,7 +56,7 @@ std::vector> ipc___RecordBatchStreamReader__ res.push_back(batch); } - return res; + return arrow::r::to_r_list(res); } // -------- RecordBatchFileReader @@ -86,7 +85,6 @@ std::shared_ptr ipc___RecordBatchFileReader__ReadRecordBatch // [[arrow::export]] std::shared_ptr ipc___RecordBatchFileReader__Open( const std::shared_ptr& file) { - std::shared_ptr reader; return ValueOrStop(arrow::ipc::RecordBatchFileReader::Open(file)); } @@ -117,7 +115,7 @@ std::shared_ptr Table__from_RecordBatchStreamReader( } // [[arrow::export]] -std::vector> ipc___RecordBatchFileReader__batches( +cpp11::list ipc___RecordBatchFileReader__batches( const std::shared_ptr& reader) { auto n = reader->num_record_batches(); std::vector> res(n); @@ -126,7 +124,7 @@ std::vector> ipc___RecordBatchFileReader__ba res[i] = ValueOrStop(reader->ReadRecordBatch(i)); } - return res; + return arrow::r::to_r_list(res); } #endif diff --git a/r/src/scalar.cpp b/r/src/scalar.cpp index d9a3b569c36..2c2d291b5bf 100644 --- a/r/src/scalar.cpp +++ b/r/src/scalar.cpp @@ -22,6 +22,19 @@ #include #include #include +#include + +namespace cpp11 { + +const char* r6_class_name::get( + const std::shared_ptr& scalar) { + if (scalar->type->id() == arrow::Type::STRUCT) { + return "StructScalar"; + } + return "Scalar"; +} + +} // namespace cpp11 // [[arrow::export]] std::shared_ptr Array__GetScalar(const std::shared_ptr& x, diff --git a/r/src/schema.cpp b/r/src/schema.cpp index 7cb9a02eeab..d298c80154a 100644 --- a/r/src/schema.cpp +++ b/r/src/schema.cpp @@ -55,9 +55,8 @@ std::shared_ptr Schema__GetFieldByName( } // [[arrow::export]] -std::vector> Schema__fields( - const std::shared_ptr& schema) { - return schema->fields(); +cpp11::list Schema__fields(const std::shared_ptr& schema) { + return arrow::r::to_r_list(schema->fields()); } // [[arrow::export]] diff --git a/r/src/symbols.cpp b/r/src/symbols.cpp index abb9cd49576..256f9e7acce 100644 --- a/r/src/symbols.cpp +++ b/r/src/symbols.cpp @@ -31,6 +31,8 @@ SEXP symbols::ptype = Rf_install("ptype"); SEXP symbols::byte_width = Rf_install("byte_width"); SEXP symbols::list_size = Rf_install("list_size"); SEXP symbols::arrow_attributes = Rf_install("arrow_attributes"); +SEXP symbols::new_ = Rf_install("new"); +SEXP symbols::create = Rf_install("create"); // persistently protect `x` and return it SEXP precious(SEXP x) { diff --git a/r/src/table.cpp b/r/src/table.cpp index 97da768aa59..2c89934eb86 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -60,14 +60,13 @@ std::shared_ptr Table__field(const std::shared_ptr& } // [[arrow::export]] -std::vector> Table__columns( - const std::shared_ptr& table) { +cpp11::list Table__columns(const std::shared_ptr& table) { auto nc = table->num_columns(); std::vector> res(nc); for (int i = 0; i < nc; i++) { res[i] = table->column(i); } - return res; + return arrow::r::to_r_list(res); } // [[arrow::export]] diff --git a/r/tests/testthat/helper-roundtrip.R b/r/tests/testthat/helper-roundtrip.R index 16d002c9f2a..4aa435cd298 100644 --- a/r/tests/testthat/helper-roundtrip.R +++ b/r/tests/testthat/helper-roundtrip.R @@ -19,7 +19,7 @@ expect_array_roundtrip <- function(x, type, as = NULL) { a <- Array$create(x, type = as) expect_type_equal(a$type, type) expect_identical(length(a), length(x)) - if (!inherits(type, c("ListType", "LargeListType"))) { + if (!inherits(type, c("ListType", "LargeListType", "FixedSizeListType"))) { # TODO: revisit how missingness works with ListArrays # R list objects don't handle missingness the same way as other vectors. # Is there some vctrs thing we should do on the roundtrip back to R? @@ -34,7 +34,7 @@ expect_array_roundtrip <- function(x, type, as = NULL) { x_sliced <- x[-1] expect_type_equal(a_sliced$type, type) expect_identical(length(a_sliced), length(x_sliced)) - if (!inherits(type, c("ListType", "LargeListType"))) { + if (!inherits(type, c("ListType", "LargeListType", "FixedSizeListType"))) { expect_equal(as.vector(is.na(a_sliced)), is.na(x_sliced)) } expect_equivalent(as.vector(a_sliced), x_sliced) diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R index 8ff3fc36713..476b00c7854 100644 --- a/r/tests/testthat/test-data-type.R +++ b/r/tests/testthat/test-data-type.R @@ -24,8 +24,8 @@ test_that("null type works as expected",{ expect_equal(x$ToString(), "null") expect_true(x == x) expect_false(x == int8()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) }) test_that("boolean type work as expected",{ @@ -35,8 +35,8 @@ test_that("boolean type work as expected",{ expect_equal(x$ToString(), "bool") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 1L) }) @@ -47,8 +47,8 @@ test_that("int types works as expected",{ expect_equal(x$ToString(), "uint8") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 8L) x <- int8() @@ -57,8 +57,8 @@ test_that("int types works as expected",{ expect_equal(x$ToString(), "int8") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 8L) x <- uint16() @@ -67,8 +67,8 @@ test_that("int types works as expected",{ expect_equal(x$ToString(), "uint16") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 16L) x <- int16() @@ -77,8 +77,8 @@ test_that("int types works as expected",{ expect_equal(x$ToString(), "int16") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 16L) x <- uint32() @@ -87,8 +87,8 @@ test_that("int types works as expected",{ expect_equal(x$ToString(), "uint32") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 32L) x <- int32() @@ -97,8 +97,8 @@ test_that("int types works as expected",{ expect_equal(x$ToString(), "int32") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 32L) x <- uint64() @@ -107,8 +107,8 @@ test_that("int types works as expected",{ expect_equal(x$ToString(), "uint64") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) x <- int64() @@ -117,8 +117,8 @@ test_that("int types works as expected",{ expect_equal(x$ToString(), "int64") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) }) @@ -129,8 +129,8 @@ test_that("float types work as expected",{ expect_equal(x$ToString(), "halffloat") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 16L) x <- float32() @@ -139,8 +139,8 @@ test_that("float types work as expected",{ expect_equal(x$ToString(), "float") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 32L) x <- float64() @@ -149,8 +149,8 @@ test_that("float types work as expected",{ expect_equal(x$ToString(), "double") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) }) @@ -161,8 +161,8 @@ test_that("utf8 type works as expected",{ expect_equal(x$ToString(), "string") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) }) test_that("date types work as expected", { @@ -172,8 +172,8 @@ test_that("date types work as expected", { expect_equal(x$ToString(), "date32[day]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$unit(), unclass(DateUnit$DAY)) x <- date64() @@ -182,8 +182,8 @@ test_that("date types work as expected", { expect_equal(x$ToString(), "date64[ms]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$unit(), unclass(DateUnit$MILLI)) }) @@ -194,8 +194,8 @@ test_that("timestamp type works as expected", { expect_equal(x$ToString(), "timestamp[s]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$SECOND)) @@ -206,8 +206,8 @@ test_that("timestamp type works as expected", { expect_equal(x$ToString(), "timestamp[ms]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$MILLI)) @@ -218,8 +218,8 @@ test_that("timestamp type works as expected", { expect_equal(x$ToString(), "timestamp[us]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$MICRO)) @@ -230,8 +230,8 @@ test_that("timestamp type works as expected", { expect_equal(x$ToString(), "timestamp[ns]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) expect_equal(x$timezone(), "") expect_equal(x$unit(), unclass(TimeUnit$NANO)) @@ -248,8 +248,8 @@ test_that("time32 types work as expected", { expect_equal(x$ToString(), "time32[s]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 32L) expect_equal(x$unit(), unclass(TimeUnit$SECOND)) @@ -259,8 +259,8 @@ test_that("time32 types work as expected", { expect_equal(x$ToString(), "time32[ms]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 32L) expect_equal(x$unit(), unclass(TimeUnit$MILLI)) }) @@ -272,8 +272,8 @@ test_that("time64 types work as expected", { expect_equal(x$ToString(), "time64[us]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) expect_equal(x$unit(), unclass(TimeUnit$MICRO)) @@ -283,8 +283,8 @@ test_that("time64 types work as expected", { expect_equal(x$ToString(), "time64[ns]") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 0L) - expect_equal(x$children(), list()) + expect_equal(x$num_fields(), 0L) + expect_equal(x$fields(), list()) expect_equal(x$bit_width, 64L) expect_equal(x$unit(), unclass(TimeUnit$NANO)) }) @@ -329,9 +329,9 @@ test_that("list type works as expected", { expect_equal(x$ToString(), "list") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 1L) + expect_equal(x$num_fields(), 1L) expect_equal( - x$children(), + x$fields(), list(field("item", int32())) ) expect_equal(x$value_type, int32()) @@ -345,9 +345,9 @@ test_that("struct type works as expected", { expect_equal(x$ToString(), "struct") expect_true(x == x) expect_false(x == null()) - expect_equal(x$num_children(), 2L) + expect_equal(x$num_fields(), 2L) expect_equal( - x$children(), + x$fields(), list(field("x", int32()), field("y", boolean())) ) expect_equal(x$GetFieldIndex("x"), 0L) diff --git a/r/tests/testthat/test-read-record-batch.R b/r/tests/testthat/test-read-record-batch.R index 8eb196a1eab..9383c476588 100644 --- a/r/tests/testthat/test-read-record-batch.R +++ b/r/tests/testthat/test-read-record-batch.R @@ -34,7 +34,7 @@ test_that("RecordBatchFileWriter / RecordBatchFileReader roundtrips", { stream <- FileOutputStream$create(tf) writer <- RecordBatchFileWriter$create(stream, tab$schema) - expect_is(writer, "RecordBatchFileWriter") + expect_is(writer, "RecordBatchWriter") writer$write_table(tab) writer$close() stream$close() diff --git a/r/tests/testthat/test-record-batch-reader.R b/r/tests/testthat/test-record-batch-reader.R index d9c34068425..533d53e7ffb 100644 --- a/r/tests/testthat/test-record-batch-reader.R +++ b/r/tests/testthat/test-record-batch-reader.R @@ -28,7 +28,7 @@ test_that("RecordBatchStreamReader / Writer", { sink <- BufferOutputStream$create() expect_equal(sink$tell(), 0) writer <- RecordBatchStreamWriter$create(sink, batch$schema) - expect_is(writer, "RecordBatchStreamWriter") + expect_is(writer, "RecordBatchWriter") writer$write(batch) writer$write(tab) writer$write(tbl) @@ -56,7 +56,7 @@ test_that("RecordBatchStreamReader / Writer", { test_that("RecordBatchFileReader / Writer", { sink <- BufferOutputStream$create() writer <- RecordBatchFileWriter$create(sink, batch$schema) - expect_is(writer, "RecordBatchFileWriter") + expect_is(writer, "RecordBatchWriter") writer$write(batch) writer$write(tab) writer$write(tbl) diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 23b08da5457..2730cb50839 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -76,7 +76,7 @@ test_that("reading schema from Buffer", { stream <- BufferOutputStream$create() writer <- RecordBatchStreamWriter$create(stream, batch$schema) - expect_is(writer, "RecordBatchStreamWriter") + expect_is(writer, "RecordBatchWriter") writer$close() buffer <- stream$finish()