diff --git a/r/NAMESPACE b/r/NAMESPACE index d0ab7c6fdf4..82e7e4c59f8 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -186,6 +186,9 @@ export(int16) export(int32) export(int64) export(int8) +export(large_binary) +export(large_list_of) +export(large_utf8) export(last_col) export(list_of) export(map_batches) diff --git a/r/R/array.R b/r/R/array.R index 194cdeb42f8..5d4d8c66aad 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -93,6 +93,8 @@ Array <- R6Class("Array", shared_ptr(StructArray, self$pointer()) } else if (type_id == Type$LIST) { shared_ptr(ListArray, self$pointer()) + } else if (type_id == Type$LARGE_LIST){ + shared_ptr(LargeListArray, self$pointer()) } else { self } @@ -230,6 +232,22 @@ ListArray <- R6Class("ListArray", inherit = Array, ) ) +#' @rdname array +#' @usage NULL +#' @format NULL +#' @export +LargeListArray <- R6Class("LargeListArray", inherit = Array, + public = list( + values = function() Array$create(LargeListArray__values(self)), + value_length = function(i) LargeListArray__value_length(self, i), + value_offset = function(i) LargeListArray__value_offset(self, i), + raw_value_offsets = function() LargeListArray__raw_value_offsets(self) + ), + active = list( + value_type = function() DataType$create(LargeListArray__value_type(self)) + ) +) + #' @export length.Array <- function(x) x$length() diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 54fadcb8055..37059c70fb2 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -92,22 +92,42 @@ ListArray__value_type <- function(array){ .Call(`_arrow_ListArray__value_type` , array) } +LargeListArray__value_type <- function(array){ + .Call(`_arrow_LargeListArray__value_type` , array) +} + ListArray__values <- function(array){ .Call(`_arrow_ListArray__values` , array) } +LargeListArray__values <- function(array){ + .Call(`_arrow_LargeListArray__values` , array) +} + ListArray__value_length <- function(array, i){ .Call(`_arrow_ListArray__value_length` , array, i) } +LargeListArray__value_length <- function(array, i){ + .Call(`_arrow_LargeListArray__value_length` , array, i) +} + ListArray__value_offset <- function(array, i){ .Call(`_arrow_ListArray__value_offset` , array, i) } +LargeListArray__value_offset <- function(array, i){ + .Call(`_arrow_LargeListArray__value_offset` , array, i) +} + ListArray__raw_value_offsets <- function(array){ .Call(`_arrow_ListArray__raw_value_offsets` , array) } +LargeListArray__raw_value_offsets <- function(array){ + .Call(`_arrow_LargeListArray__raw_value_offsets` , array) +} + Array__infer_type <- function(x){ .Call(`_arrow_Array__infer_type` , x) } @@ -484,10 +504,18 @@ Utf8__initialize <- function(){ .Call(`_arrow_Utf8__initialize` ) } +LargeUtf8__initialize <- function(){ + .Call(`_arrow_LargeUtf8__initialize` ) +} + Binary__initialize <- function(){ .Call(`_arrow_Binary__initialize` ) } +LargeBinary__initialize <- function(){ + .Call(`_arrow_LargeBinary__initialize` ) +} + Date32__initialize <- function(){ .Call(`_arrow_Date32__initialize` ) } @@ -524,6 +552,10 @@ list__ <- function(x){ .Call(`_arrow_list__` , x) } +large_list__ <- function(x){ + .Call(`_arrow_large_list__` , x) +} + struct_ <- function(fields){ .Call(`_arrow_struct_` , fields) } @@ -620,6 +652,14 @@ ListType__value_type <- function(type){ .Call(`_arrow_ListType__value_type` , type) } +LargeListType__value_field <- function(type){ + .Call(`_arrow_LargeListType__value_field` , type) +} + +LargeListType__value_type <- function(type){ + .Call(`_arrow_LargeListType__value_type` , type) +} + dataset___expr__field_ref <- function(name){ .Call(`_arrow_dataset___expr__field_ref` , name) } diff --git a/r/R/enums.R b/r/R/enums.R index 223d5a799a4..e9420ce4562 100644 --- a/r/R/enums.R +++ b/r/R/enums.R @@ -71,7 +71,13 @@ Type <- enum("Type::type", SPARSE_UNION = 26L, DENSE_UNION = 27L, DICTIONARY = 28L, - MAP = 29L + MAP = 29L, + EXTENSION = 30L, + FIXED_SIZE_LIST = 31L, + DURATION = 32L, + LARGE_STRING = 33L, + LARGE_BINARY = 34L, + LARGE_LIST = 35L ) #' @rdname enums diff --git a/r/R/list.R b/r/R/list.R index 31c72bb3034..ada47e5bc65 100644 --- a/r/R/list.R +++ b/r/R/list.R @@ -28,3 +28,15 @@ ListType <- R6Class("ListType", #' @rdname data-type #' @export list_of <- function(type) shared_ptr(ListType, list__(type)) + +LargeListType <- R6Class("LargeListType", + inherit = NestedType, + active = list( + value_field = function() shared_ptr(Field, LargeListType__value_field(self)), + value_type = function() DataType$create(LargeListType__value_type(self)) + ) +) + +#' @rdname data-type +#' @export +large_list_of <- function(type) shared_ptr(LargeListType, large_list__(type)) diff --git a/r/R/type.R b/r/R/type.R index ab30524ed3f..430440d5779 100644 --- a/r/R/type.R +++ b/r/R/type.R @@ -60,7 +60,7 @@ DataType <- R6Class("DataType", FLOAT = float32(), DOUBLE = float64(), STRING = utf8(), - BINARY = stop("Type BINARY not implemented yet"), + BINARY = binary(), DATE32 = date32(), DATE64 = date64(), TIMESTAMP = shared_ptr(Timestamp, self$pointer()), @@ -73,7 +73,13 @@ DataType <- R6Class("DataType", SPARSE_UNION = stop("Type SPARSE_UNION not implemented yet"), DENSE_UNION = stop("Type DENSE_UNION not implemented yet"), DICTIONARY = shared_ptr(DictionaryType, self$pointer()), - MAP = stop("Type MAP not implemented yet") + MAP = stop("Type MAP not implemented yet"), + EXTENSION = stop("Type EXTENSION not implemented yet"), + FIXED_SIZE_LIST = stop("Type FIXED_SIZE_LIST not implemented yet"), + DURATION = stop("Type DURATION not implemented yet"), + LARGE_STRING = large_utf8(), + LARGE_BINARY = large_binary(), + LARGE_LIST = shared_ptr(LargeListType, self$pointer()) ) } ), @@ -141,8 +147,10 @@ Float32 <- R6Class("Float32", inherit = FixedWidthType) Float64 <- R6Class("Float64", inherit = FixedWidthType) Boolean <- R6Class("Boolean", inherit = FixedWidthType) Utf8 <- R6Class("Utf8", inherit = DataType) +LargeUtf8 <- R6Class("LargeUtf8", inherit = DataType) Binary <- R6Class("Binary", inherit = DataType) FixedSizeBinary <- R6Class("FixedSizeBinary", inherit = FixedWidthType) +LargeBinary <- R6Class("LargeBinary", inherit = DataType) DateType <- R6Class("DateType", inherit = FixedWidthType, @@ -286,6 +294,10 @@ bool <- boolean #' @export utf8 <- function() shared_ptr(Utf8, Utf8__initialize()) +#' @rdname data-type +#' @export +large_utf8 <- function() shared_ptr(LargeUtf8, LargeUtf8__initialize()) + #' @rdname data-type #' @export binary <- function(byte_width = NULL) { @@ -296,6 +308,12 @@ binary <- function(byte_width = NULL) { } } +#' @rdname data-type +#' @export +large_binary <- function() { + shared_ptr(LargeBinary, LargeBinary__initialize()) +} + #' @rdname data-type #' @export string <- utf8 diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd index 6881b9603b2..6f71140c863 100644 --- a/r/man/data-type.Rd +++ b/r/man/data-type.Rd @@ -18,7 +18,9 @@ \alias{boolean} \alias{bool} \alias{utf8} +\alias{large_utf8} \alias{binary} +\alias{large_binary} \alias{string} \alias{date32} \alias{date64} @@ -28,6 +30,7 @@ \alias{timestamp} \alias{decimal} \alias{list_of} +\alias{large_list_of} \alias{struct} \title{Apache Arrow data types} \usage{ @@ -63,8 +66,12 @@ bool() utf8() +large_utf8() + binary(byte_width = NULL) +large_binary() + string() date32() @@ -83,6 +90,8 @@ decimal(precision, scale) list_of(type) +large_list_of(type) + struct(...) } \arguments{ diff --git a/r/man/enums.Rd b/r/man/enums.Rd index 0b47b66cc23..b592af916bb 100644 --- a/r/man/enums.Rd +++ b/r/man/enums.Rd @@ -18,7 +18,7 @@ An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of le An object of class \code{DateUnit} (inherits from \code{arrow-enum}) of length 2. -An object of class \code{Type::type} (inherits from \code{arrow-enum}) of length 30. +An object of class \code{Type::type} (inherits from \code{arrow-enum}) of length 36. An object of class \code{StatusCode} (inherits from \code{arrow-enum}) of length 17. diff --git a/r/src/array.cpp b/r/src/array.cpp index 9ada75d9b51..da980602bce 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -206,24 +206,48 @@ std::shared_ptr ListArray__value_type( return array->value_type(); } +// [[arrow::export]] +std::shared_ptr LargeListArray__value_type( + const std::shared_ptr& array) { + return array->value_type(); +} + // [[arrow::export]] std::shared_ptr ListArray__values( const std::shared_ptr& array) { return array->values(); } +// [[arrow::export]] +std::shared_ptr LargeListArray__values( + const std::shared_ptr& array) { + return array->values(); +} + // [[arrow::export]] int32_t ListArray__value_length(const std::shared_ptr& array, int64_t i) { return array->value_length(i); } +// [[arrow::export]] +int64_t LargeListArray__value_length(const std::shared_ptr& array, + int64_t i) { + return array->value_length(i); +} + // [[arrow::export]] int32_t ListArray__value_offset(const std::shared_ptr& array, int64_t i) { return array->value_offset(i); } +// [[arrow::export]] +int64_t LargeListArray__value_offset(const std::shared_ptr& array, + int64_t i) { + return array->value_offset(i); +} + // [[arrow::export]] Rcpp::IntegerVector ListArray__raw_value_offsets( const std::shared_ptr& array) { @@ -231,4 +255,11 @@ Rcpp::IntegerVector ListArray__raw_value_offsets( return Rcpp::IntegerVector(offsets, offsets + array->length()); } +// [[arrow::export]] +Rcpp::IntegerVector LargeListArray__raw_value_offsets( + const std::shared_ptr& array) { + auto offsets = array->raw_value_offsets(); + return Rcpp::IntegerVector(offsets, offsets + array->length()); +} + #endif diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 7035dd3e170..19322309a6b 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -142,6 +142,15 @@ struct VectorToArrayConverter { return Status::OK(); } + Status Visit(const arrow::BinaryType& type) { + if (!(Rf_inherits(x, "vctrs_list_of") && + TYPEOF(Rf_getAttrib(x, symbols::ptype)) == RAWSXP)) { + return Status::RError("Expecting a list of raw vectors"); + } + + return Status::OK(); + } + template arrow::enable_if_base_binary Visit(const T& type) { using BuilderType = typename TypeTraits::BuilderType; @@ -182,7 +191,7 @@ struct VectorToArrayConverter { RETURN_NOT_OK(builder->Reserve(n)); for (R_xlen_t i = 0; i < n; i++) { SEXP vector = VECTOR_ELT(x, i); - if (vector == R_NilValue) { + if (Rf_isNull(vector)) { RETURN_NOT_OK(list_builder->AppendNull()); continue; } @@ -364,10 +373,6 @@ std::shared_ptr MakeStructArray(SEXP df, const std::shared_ptr& return std::make_shared(type, rows, children); } -std::shared_ptr MakeListArray(SEXP x, const std::shared_ptr& type) { - return VectorToArrayConverter::Visit(x, type); -} - template int64_t time_cast(T value); @@ -916,6 +921,99 @@ class Time64Converter : public TimeConverter { } }; +template +class BinaryVectorConverter : public VectorConverter { + public: + ~BinaryVectorConverter() {} + + Status Init(ArrayBuilder* builder) { + typed_builder_ = checked_cast(builder); + return Status::OK(); + } + + Status Ingest(SEXP obj) { + ARROW_RETURN_IF(TYPEOF(obj) != VECSXP, Status::RError("Expecting a list")); + R_xlen_t n = XLENGTH(obj); + + // Reserve enough space before appending + int64_t size = 0; + for (R_xlen_t i = 0; i < n; i++) { + SEXP obj_i = VECTOR_ELT(obj, i); + if (!Rf_isNull(obj_i)) { + ARROW_RETURN_IF(TYPEOF(obj_i) != RAWSXP, + Status::RError("Expecting a raw vector")); + size += XLENGTH(obj_i); + } + } + RETURN_NOT_OK(typed_builder_->Reserve(size)); + + // append + for (R_xlen_t i = 0; i < n; i++) { + SEXP obj_i = VECTOR_ELT(obj, i); + if (Rf_isNull(obj_i)) { + RETURN_NOT_OK(typed_builder_->AppendNull()); + } else { + RETURN_NOT_OK(typed_builder_->Append(RAW(obj_i), XLENGTH(obj_i))); + } + } + return Status::OK(); + } + + Status GetResult(std::shared_ptr* result) { + return typed_builder_->Finish(result); + } + + private: + Builder* typed_builder_; +}; + +template +class StringVectorConverter : public VectorConverter { + public: + ~StringVectorConverter() {} + + Status Init(ArrayBuilder* builder) { + typed_builder_ = checked_cast(builder); + return Status::OK(); + } + + Status Ingest(SEXP obj) { + ARROW_RETURN_IF(TYPEOF(obj) != STRSXP, + Status::RError("Expecting a character vector")); + R_xlen_t n = XLENGTH(obj); + + // Reserve enough space before appending + int64_t size = 0; + for (R_xlen_t i = 0; i < n; i++) { + SEXP string_i = STRING_ELT(obj, i); + if (string_i != NA_STRING) { + size += XLENGTH(Rf_mkCharCE(Rf_translateCharUTF8(string_i), CE_UTF8)); + } + } + RETURN_NOT_OK(typed_builder_->Reserve(size)); + + // append + for (R_xlen_t i = 0; i < n; i++) { + SEXP string_i = STRING_ELT(obj, i); + if (string_i == NA_STRING) { + RETURN_NOT_OK(typed_builder_->AppendNull()); + } else { + // Make sure we're ingesting UTF-8 + string_i = Rf_mkCharCE(Rf_translateCharUTF8(string_i), CE_UTF8); + RETURN_NOT_OK(typed_builder_->Append(CHAR(string_i), XLENGTH(string_i))); + } + } + return Status::OK(); + } + + Status GetResult(std::shared_ptr* result) { + return typed_builder_->Finish(result); + } + + private: + Builder* typed_builder_; +}; + #define NUMERIC_CONVERTER(TYPE_ENUM, TYPE) \ case Type::TYPE_ENUM: \ *out = \ @@ -936,7 +1034,11 @@ class Time64Converter : public TimeConverter { Status GetConverter(const std::shared_ptr& type, std::unique_ptr* out) { switch (type->id()) { + SIMPLE_CONVERTER_CASE(BINARY, BinaryVectorConverter); + SIMPLE_CONVERTER_CASE(LARGE_BINARY, BinaryVectorConverter); SIMPLE_CONVERTER_CASE(BOOL, BooleanVectorConverter); + SIMPLE_CONVERTER_CASE(STRING, StringVectorConverter); + SIMPLE_CONVERTER_CASE(LARGE_STRING, StringVectorConverter); NUMERIC_CONVERTER(INT8, Int8Type); NUMERIC_CONVERTER(INT16, Int16Type); NUMERIC_CONVERTER(INT32, Int32Type); @@ -1066,12 +1168,22 @@ std::shared_ptr InferArrowTypeFromVector(SEXP x) { if (Rf_inherits(x, "data.frame") || Rf_inherits(x, "POSIXlt")) { return InferArrowTypeFromDataFrame(x); } else { - if (XLENGTH(x) == 0) { - Rcpp::stop( - "Requires at least one element to infer the values' type of a list vector"); + SEXP ptype = Rf_getAttrib(x, symbols::ptype); + if (Rf_isNull(ptype)) { + if (XLENGTH(x) == 0) { + Rcpp::stop( + "Requires at least one element to infer the values' type of a list vector"); + } + + ptype = VECTOR_ELT(x, 0); } - return arrow::list(InferArrowType(VECTOR_ELT(x, 0))); + // special case list(raw()) -> BinaryArray + if (TYPEOF(ptype) == RAWSXP) { + return arrow::binary(); + } + + return arrow::list(InferArrowType(ptype)); } } @@ -1235,11 +1347,6 @@ std::shared_ptr Array__from_vector( return arrow::r::Array__from_vector_reuse_memory(x); } - // treat strings separately for now - if (type->id() == Type::STRING) { - return VectorToArrayConverter::Visit(x, type); - } - // factors only when type has been inferred if (type->id() == Type::DICTIONARY) { if (type_inferred || arrow::r::CheckCompatibleFactor(x, type)) { @@ -1253,8 +1360,8 @@ std::shared_ptr Array__from_vector( Rcpp::stop("Object incompatible with dictionary type"); } - if (type->id() == Type::LIST) { - return arrow::r::MakeListArray(x, type); + if (type->id() == Type::LIST || type->id() == Type::LARGE_LIST) { + return VectorToArrayConverter::Visit(x, type); } // struct types @@ -1285,7 +1392,6 @@ std::shared_ptr Array__from_vector( StopIfNotOk(converter->Ingest(x)); std::shared_ptr result; StopIfNotOk(converter->GetResult(&result)); - return result; } diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index ebcdb184821..9586c839970 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -19,6 +19,7 @@ #if defined(ARROW_R_WITH_ARROW) #include +#include #include #include #include @@ -186,6 +187,7 @@ class Converter_Date32 : public Converter_SimpleArray { } }; +template struct Converter_String : public Converter { public: explicit Converter_String(const ArrayVector& arrays) : Converter(arrays) {} @@ -220,7 +222,7 @@ struct Converter_String : public Converter { return Status::OK(); } - arrow::StringArray* string_array = static_cast(array.get()); + StringArrayType* string_array = static_cast(array.get()); if (array->null_count()) { // need to watch for nulls arrow::internal::BitmapReader null_reader(array->null_bitmap_data(), @@ -285,6 +287,61 @@ class Converter_Boolean : public Converter { } }; +template +class Converter_Binary : public Converter { + public: + using offset_type = typename ArrayType::offset_type; + explicit Converter_Binary(const ArrayVector& arrays) : Converter(arrays) {} + + SEXP Allocate(R_xlen_t n) const { + SEXP res = PROTECT(Rf_allocVector(VECSXP, n)); + Rf_setAttrib(res, R_ClassSymbol, data::classes_vctrs_list_of); + Rf_setAttrib(res, symbols::ptype, data::empty_raw); + UNPROTECT(1); + return res; + } + + Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { + return Status::OK(); + } + + Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, + R_xlen_t start, R_xlen_t n) const { + const ArrayType* binary_array = checked_cast(array.get()); + + auto ingest_one = [&](R_xlen_t i) { + offset_type ni; + auto value = binary_array->GetValue(i, &ni); + if (ni > R_XLEN_T_MAX) { + return Status::RError("Array too big to be represented as a raw vector"); + } + SEXP raw = PROTECT(Rf_allocVector(RAWSXP, ni)); + std::copy(value, value + ni, RAW(raw)); + + SET_VECTOR_ELT(data, i, raw); + UNPROTECT(1); + + return Status::OK(); + }; + + if (array->null_count()) { + internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), + n); + + for (R_xlen_t i = 0; i < n; i++, bitmap_reader.Next()) { + if (bitmap_reader.IsSet()) RETURN_NOT_OK(ingest_one(i)); + } + + } else { + for (R_xlen_t i = 0; i < n; i++) { + RETURN_NOT_OK(ingest_one(i)); + } + } + + return Status::OK(); + } +}; + class Converter_Dictionary : public Converter { public: explicit Converter_Dictionary(const ArrayVector& arrays) : Converter(arrays) {} @@ -579,11 +636,33 @@ class Converter_Decimal : public Converter { } }; +template class Converter_List : public Converter { + private: + std::shared_ptr value_type_; + public: - explicit Converter_List(const ArrayVector& arrays) : Converter(arrays) {} + explicit Converter_List(const ArrayVector& arrays, + const std::shared_ptr& value_type) + : Converter(arrays), value_type_(value_type) {} - SEXP Allocate(R_xlen_t n) const { return Rcpp::List(no_init(n)); } + SEXP Allocate(R_xlen_t n) const { + Rcpp::List res(no_init(n)); + Rf_setAttrib(res, R_ClassSymbol, arrow::r::data::classes_vctrs_list_of); + + // Build an empty array to match value_type + std::unique_ptr builder; + StopIfNotOk(arrow::MakeBuilder(arrow::default_memory_pool(), value_type_, &builder)); + + std::shared_ptr array; + StopIfNotOk(builder->Finish(&array)); + + // convert to an R object to store as the list' ptype + SEXP ptype = Array__as_vector(array); + Rf_setAttrib(res, arrow::r::symbols::ptype, ptype); + + return res; + } Status Ingest_all_nulls(SEXP data, R_xlen_t start, R_xlen_t n) const { // nothing to do, list contain NULL by default @@ -592,7 +671,7 @@ class Converter_List : public Converter { Status Ingest_some_nulls(SEXP data, const std::shared_ptr& array, R_xlen_t start, R_xlen_t n) const { - auto list_array = checked_cast(array.get()); + auto list_array = checked_cast(array.get()); auto values_array = list_array->values(); auto ingest_one = [&](R_xlen_t i) { @@ -711,9 +790,22 @@ std::shared_ptr Converter::Make(const std::shared_ptr& type case Type::BOOL: return std::make_shared(std::move(arrays)); + case Type::BINARY: + return std::make_shared>( + std::move(arrays)); + + case Type::LARGE_BINARY: + return std::make_shared>( + std::move(arrays)); + // handle memory dense strings case Type::STRING: - return std::make_shared(std::move(arrays)); + return std::make_shared>( + std::move(arrays)); + + case Type::LARGE_STRING: + return std::make_shared>( + std::move(arrays)); case Type::DICTIONARY: return std::make_shared(std::move(arrays)); @@ -796,7 +888,14 @@ std::shared_ptr Converter::Make(const std::shared_ptr& type return std::make_shared(std::move(arrays)); case Type::LIST: - return std::make_shared(std::move(arrays)); + return std::make_shared>( + std::move(arrays), + checked_cast(type.get())->value_type()); + + case Type::LARGE_LIST: + return std::make_shared>( + std::move(arrays), + checked_cast(type.get())->value_type()); case Type::NA: return std::make_shared(std::move(arrays)); diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 3fa77c24adc..63b6524976c 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -364,6 +364,21 @@ RcppExport SEXP _arrow_ListArray__value_type(SEXP array_sexp){ } #endif +// array.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr LargeListArray__value_type(const std::shared_ptr& array); +RcppExport SEXP _arrow_LargeListArray__value_type(SEXP array_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter&>::type array(array_sexp); + return Rcpp::wrap(LargeListArray__value_type(array)); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeListArray__value_type(SEXP array_sexp){ + Rf_error("Cannot call LargeListArray__value_type(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // array.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr ListArray__values(const std::shared_ptr& array); @@ -379,6 +394,21 @@ RcppExport SEXP _arrow_ListArray__values(SEXP array_sexp){ } #endif +// array.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr LargeListArray__values(const std::shared_ptr& array); +RcppExport SEXP _arrow_LargeListArray__values(SEXP array_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter&>::type array(array_sexp); + return Rcpp::wrap(LargeListArray__values(array)); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeListArray__values(SEXP array_sexp){ + Rf_error("Cannot call LargeListArray__values(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // array.cpp #if defined(ARROW_R_WITH_ARROW) int32_t ListArray__value_length(const std::shared_ptr& array, int64_t i); @@ -395,6 +425,22 @@ RcppExport SEXP _arrow_ListArray__value_length(SEXP array_sexp, SEXP i_sexp){ } #endif +// array.cpp +#if defined(ARROW_R_WITH_ARROW) +int64_t LargeListArray__value_length(const std::shared_ptr& array, int64_t i); +RcppExport SEXP _arrow_LargeListArray__value_length(SEXP array_sexp, SEXP i_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter&>::type array(array_sexp); + Rcpp::traits::input_parameter::type i(i_sexp); + return Rcpp::wrap(LargeListArray__value_length(array, i)); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeListArray__value_length(SEXP array_sexp, SEXP i_sexp){ + Rf_error("Cannot call LargeListArray__value_length(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // array.cpp #if defined(ARROW_R_WITH_ARROW) int32_t ListArray__value_offset(const std::shared_ptr& array, int64_t i); @@ -411,6 +457,22 @@ RcppExport SEXP _arrow_ListArray__value_offset(SEXP array_sexp, SEXP i_sexp){ } #endif +// array.cpp +#if defined(ARROW_R_WITH_ARROW) +int64_t LargeListArray__value_offset(const std::shared_ptr& array, int64_t i); +RcppExport SEXP _arrow_LargeListArray__value_offset(SEXP array_sexp, SEXP i_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter&>::type array(array_sexp); + Rcpp::traits::input_parameter::type i(i_sexp); + return Rcpp::wrap(LargeListArray__value_offset(array, i)); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeListArray__value_offset(SEXP array_sexp, SEXP i_sexp){ + Rf_error("Cannot call LargeListArray__value_offset(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // array.cpp #if defined(ARROW_R_WITH_ARROW) Rcpp::IntegerVector ListArray__raw_value_offsets(const std::shared_ptr& array); @@ -426,6 +488,21 @@ RcppExport SEXP _arrow_ListArray__raw_value_offsets(SEXP array_sexp){ } #endif +// array.cpp +#if defined(ARROW_R_WITH_ARROW) +Rcpp::IntegerVector LargeListArray__raw_value_offsets(const std::shared_ptr& array); +RcppExport SEXP _arrow_LargeListArray__raw_value_offsets(SEXP array_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter&>::type array(array_sexp); + return Rcpp::wrap(LargeListArray__raw_value_offsets(array)); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeListArray__raw_value_offsets(SEXP array_sexp){ + Rf_error("Cannot call LargeListArray__raw_value_offsets(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // array_from_vector.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Array__infer_type(SEXP x); @@ -1877,6 +1954,20 @@ RcppExport SEXP _arrow_Utf8__initialize(){ } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr LargeUtf8__initialize(); +RcppExport SEXP _arrow_LargeUtf8__initialize(){ +BEGIN_RCPP + return Rcpp::wrap(LargeUtf8__initialize()); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeUtf8__initialize(){ + Rf_error("Cannot call LargeUtf8__initialize(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // datatype.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Binary__initialize(); @@ -1891,6 +1982,20 @@ RcppExport SEXP _arrow_Binary__initialize(){ } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr LargeBinary__initialize(); +RcppExport SEXP _arrow_LargeBinary__initialize(){ +BEGIN_RCPP + return Rcpp::wrap(LargeBinary__initialize()); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeBinary__initialize(){ + Rf_error("Cannot call LargeBinary__initialize(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // datatype.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Date32__initialize(); @@ -2025,6 +2130,21 @@ RcppExport SEXP _arrow_list__(SEXP x_sexp){ } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +SEXP large_list__(SEXP x); +RcppExport SEXP _arrow_large_list__(SEXP x_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter::type x(x_sexp); + return Rcpp::wrap(large_list__(x)); +END_RCPP +} +#else +RcppExport SEXP _arrow_large_list__(SEXP x_sexp){ + Rf_error("Cannot call large_list__(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // datatype.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr struct_(List fields); @@ -2390,6 +2510,36 @@ RcppExport SEXP _arrow_ListType__value_type(SEXP type_sexp){ } #endif +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr LargeListType__value_field(const std::shared_ptr& type); +RcppExport SEXP _arrow_LargeListType__value_field(SEXP type_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter&>::type type(type_sexp); + return Rcpp::wrap(LargeListType__value_field(type)); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeListType__value_field(SEXP type_sexp){ + Rf_error("Cannot call LargeListType__value_field(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + +// datatype.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr LargeListType__value_type(const std::shared_ptr& type); +RcppExport SEXP _arrow_LargeListType__value_type(SEXP type_sexp){ +BEGIN_RCPP + Rcpp::traits::input_parameter&>::type type(type_sexp); + return Rcpp::wrap(LargeListType__value_type(type)); +END_RCPP +} +#else +RcppExport SEXP _arrow_LargeListType__value_type(SEXP type_sexp){ + Rf_error("Cannot call LargeListType__value_type(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif + // expression.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr dataset___expr__field_ref(std::string name); @@ -5688,10 +5838,15 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2}, { "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1}, { "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1}, + { "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1}, { "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1}, + { "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1}, { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, + { "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2}, { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, + { "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2}, { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, + { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, @@ -5786,7 +5941,9 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, + { "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, + { "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, @@ -5796,6 +5953,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, { "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, { "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, + { "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, { "_arrow_struct_", (DL_FUNC) &_arrow_struct_, 1}, { "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, { "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, @@ -5820,6 +5978,8 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2}, { "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1}, { "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1}, + { "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1}, + { "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1}, { "_arrow_dataset___expr__field_ref", (DL_FUNC) &_arrow_dataset___expr__field_ref, 1}, { "_arrow_dataset___expr__equal", (DL_FUNC) &_arrow_dataset___expr__equal, 2}, { "_arrow_dataset___expr__not_equal", (DL_FUNC) &_arrow_dataset___expr__not_equal, 2}, diff --git a/r/src/arrow_rcpp.h b/r/src/arrow_rcpp.h index e66f14e70a8..9a038bc1096 100644 --- a/r/src/arrow_rcpp.h +++ b/r/src/arrow_rcpp.h @@ -34,6 +34,7 @@ struct symbols { static SEXP row_names; static SEXP serialize_arrow_r_metadata; static SEXP as_list; + static SEXP ptype; }; struct data { @@ -41,6 +42,8 @@ struct data { static SEXP classes_metadata_r; static SEXP names_metadata; + static SEXP classes_vctrs_list_of; + static SEXP empty_raw; }; struct ns { diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index 9f505986d84..c22afd1a1b0 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -80,9 +80,17 @@ std::shared_ptr Boolean__initialize() { return arrow::boolean() // [[arrow::export]] std::shared_ptr Utf8__initialize() { return arrow::utf8(); } +// [[arrow::export]] +std::shared_ptr LargeUtf8__initialize() { return arrow::large_utf8(); } + // [[arrow::export]] std::shared_ptr Binary__initialize() { return arrow::binary(); } +// [[arrow::export]] +std::shared_ptr LargeBinary__initialize() { + return arrow::large_binary(); +} + // [[arrow::export]] std::shared_ptr Date32__initialize() { return arrow::date32(); } @@ -142,6 +150,22 @@ SEXP list__(SEXP x) { return R_NilValue; } +// [[arrow::export]] +SEXP large_list__(SEXP x) { + if (Rf_inherits(x, "Field")) { + Rcpp::ConstReferenceSmartPtrInputParameter> field(x); + return wrap(arrow::large_list(field)); + } + + if (Rf_inherits(x, "DataType")) { + Rcpp::ConstReferenceSmartPtrInputParameter> type(x); + return wrap(arrow::large_list(type)); + } + + stop("incompatible"); + return R_NilValue; +} + // [[arrow::export]] std::shared_ptr struct_(List fields) { return arrow::struct_(arrow::r::List_to_shared_ptr_vector(fields)); @@ -272,4 +296,16 @@ std::shared_ptr ListType__value_type( return type->value_type(); } +// [[arrow::export]] +std::shared_ptr LargeListType__value_field( + const std::shared_ptr& type) { + return type->value_field(); +} + +// [[arrow::export]] +std::shared_ptr LargeListType__value_type( + const std::shared_ptr& type) { + return type->value_type(); +} + #endif diff --git a/r/src/symbols.cpp b/r/src/symbols.cpp index be06fdfb533..001a2dd941c 100644 --- a/r/src/symbols.cpp +++ b/r/src/symbols.cpp @@ -27,6 +27,7 @@ SEXP symbols::inspect = Rf_install("inspect"); SEXP symbols::row_names = Rf_install("row.names"); SEXP symbols::serialize_arrow_r_metadata = Rf_install(".serialize_arrow_r_metadata"); SEXP symbols::as_list = Rf_install("as.list"); +SEXP symbols::ptype = Rf_install("ptype"); SEXP get_classes_POSIXct() { SEXP classes = Rf_allocVector(STRSXP, 2); @@ -50,9 +51,26 @@ SEXP get_names_metadata() { return names; } +SEXP get_classes_vctrs_list_of() { + SEXP classes = Rf_allocVector(STRSXP, 3); + R_PreserveObject(classes); + SET_STRING_ELT(classes, 0, Rf_mkChar("vctrs_list_of")); + SET_STRING_ELT(classes, 1, Rf_mkChar("vctrs_vctr")); + SET_STRING_ELT(classes, 2, Rf_mkChar("list")); + return classes; +} + +SEXP get_empty_raw() { + SEXP res = Rf_allocVector(RAWSXP, 0); + R_PreserveObject(res); + return res; +} + SEXP data::classes_POSIXct = get_classes_POSIXct(); SEXP data::classes_metadata_r = get_classes_metadata_r(); SEXP data::names_metadata = get_names_metadata(); +SEXP data::classes_vctrs_list_of = get_classes_vctrs_list_of(); +SEXP data::empty_raw = get_empty_raw(); void inspect(SEXP obj) { Rcpp::Shield call_inspect(Rf_lang2(symbols::inspect, obj)); diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 2ae6505cc2d..66908325b9e 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -17,17 +17,17 @@ context("Array") -expect_array_roundtrip <- function(x, type) { - a <- Array$create(x) +expect_array_roundtrip <- function(x, type, as = NULL) { + a <- Array$create(x, type = as) expect_type_equal(a$type, type) expect_identical(length(a), length(x)) - if (!inherits(type, "ListType")) { + if (!inherits(type, c("ListType", "LargeListType"))) { # TODO: revisit how missingness works with ListArrays # R list objects don't handle missingness the same way as other vectors. # Is there some vctrs thing we should do on the roundtrip back to R? expect_identical(is.na(a), is.na(x)) } - expect_equal(as.vector(a), x) + expect_equivalent(as.vector(a), x) # Make sure the storage mode is the same on roundtrip (esp. integer vs. numeric) expect_identical(typeof(as.vector(a)), typeof(x)) @@ -36,10 +36,10 @@ expect_array_roundtrip <- function(x, type) { x_sliced <- x[-1] expect_type_equal(a_sliced$type, type) expect_identical(length(a_sliced), length(x_sliced)) - if (!inherits(type, "ListType")) { + if (!inherits(type, c("ListType", "LargeListType"))) { expect_identical(is.na(a_sliced), is.na(x_sliced)) } - expect_equal(as.vector(a_sliced), x_sliced) + expect_equivalent(as.vector(a_sliced), x_sliced) } invisible(a) } @@ -49,6 +49,16 @@ test_that("Integer Array", { x <- expect_array_roundtrip(ints, int32()) }) +test_that("binary Array", { + bin <- vctrs::list_of(as.raw(1:10), as.raw(0:255), .ptype = raw()) + expect_array_roundtrip(bin, binary()) + expect_array_roundtrip(bin, large_binary(), as = large_binary()) + + # degenerate + bin <- structure(list(1L), ptype = raw()) + expect_error(Array$create(bin)) +}) + test_that("Slice() and RangeEquals()", { ints <- c(1:10, 101:110, 201:205) x <- Array$create(ints) @@ -150,12 +160,16 @@ test_that("Array supports logical vectors (ARROW-3341)", { test_that("Array supports character vectors (ARROW-3339)", { # without NA expect_array_roundtrip(c("itsy", "bitsy", "spider"), utf8()) + expect_array_roundtrip(c("itsy", "bitsy", "spider"), large_utf8(), as = large_utf8()) + # with NA expect_array_roundtrip(c("itsy", NA, "spider"), utf8()) + expect_array_roundtrip(c("itsy", NA, "spider"), large_utf8(), as = large_utf8()) }) test_that("empty arrays are supported", { expect_array_roundtrip(character(), utf8()) + expect_array_roundtrip(character(), large_utf8(), as = large_utf8()) expect_array_roundtrip(integer(), int32()) expect_array_roundtrip(numeric(), float64()) expect_array_roundtrip(factor(character()), dictionary(int8(), utf8())) @@ -481,6 +495,51 @@ test_that("Array$create() handles vector -> list arrays (ARROW-7662)", { expect_error(Array$create(list(df))) }) +test_that("Array$create() handles vector -> large list arrays", { + # Should be able to create an empty list with a type hint. + expect_is(Array$create(list(), type = large_list_of(bool())), "LargeListArray") + + # logical + expect_array_roundtrip(list(NA), large_list_of(bool()), as = large_list_of(bool())) + expect_array_roundtrip(list(logical(0)), large_list_of(bool()), as = large_list_of(bool())) + expect_array_roundtrip(list(c(TRUE), c(FALSE), c(FALSE, TRUE)), large_list_of(bool()), as = large_list_of(bool())) + expect_array_roundtrip(list(c(TRUE), c(FALSE), NA, logical(0), c(FALSE, NA, TRUE)), large_list_of(bool()), as = large_list_of(bool())) + + # integer + expect_array_roundtrip(list(NA_integer_), large_list_of(int32()), as = large_list_of(int32())) + expect_array_roundtrip(list(integer(0)), large_list_of(int32()), as = large_list_of(int32())) + expect_array_roundtrip(list(1:2, 3:4, 12:18), large_list_of(int32()), as = large_list_of(int32())) + expect_array_roundtrip(list(c(1:2), NA_integer_, integer(0), c(12:18, NA_integer_)), large_list_of(int32()), as = large_list_of(int32())) + + # numeric + expect_array_roundtrip(list(NA_real_), large_list_of(float64()), as = large_list_of(float64())) + expect_array_roundtrip(list(numeric(0)), large_list_of(float64()), as = large_list_of(float64())) + expect_array_roundtrip(list(1, c(2, 3), 4), large_list_of(float64()), as = large_list_of(float64())) + expect_array_roundtrip(list(1, numeric(0), c(2, 3, NA_real_), 4), large_list_of(float64()), as = large_list_of(float64())) + + # character + expect_array_roundtrip(list(NA_character_), large_list_of(utf8()), as = large_list_of(utf8())) + expect_array_roundtrip(list(character(0)), large_list_of(utf8()), as = large_list_of(utf8())) + expect_array_roundtrip(list("itsy", c("bitsy", "spider"), c("is")), large_list_of(utf8()), as = large_list_of(utf8())) + expect_array_roundtrip(list("itsy", character(0), c("bitsy", "spider", NA_character_), c("is")), large_list_of(utf8()), as = large_list_of(utf8())) + + # factor + expect_array_roundtrip(list(factor(c("b", "a"), levels = c("a", "b"))), large_list_of(dictionary(int8(), utf8())), as = large_list_of(dictionary(int8(), utf8()))) + expect_array_roundtrip(list(factor(NA, levels = c("a", "b"))), large_list_of(dictionary(int8(), utf8())), as = large_list_of(dictionary(int8(), utf8()))) + + # struct + expect_array_roundtrip( + list(tibble::tibble(a = integer(0), b = integer(0), c = character(0), d = logical(0))), + large_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool())), + as = large_list_of(struct(a = int32(), b = int32(), c = utf8(), d = bool())) + ) + expect_array_roundtrip( + list(tibble::tibble(a = list(integer()))), + large_list_of(struct(a = list_of(int32()))), + as = large_list_of(struct(a = list_of(int32()))) + ) +}) + test_that("Array$create() should have helpful error on lists with type hint", { expect_error(Array$create(list(numeric(0)), list_of(bool())), regexp = "List vector expecting elements vector of type") diff --git a/r/tests/testthat/test-json.R b/r/tests/testthat/test-json.R index d4a783089bc..a35a465bf0b 100644 --- a/r/tests/testthat/test-json.R +++ b/r/tests/testthat/test-json.R @@ -133,7 +133,7 @@ test_that("Can read json file with nested columns (ARROW-5503)", { c(5, 6) ) list_array <- tab1$column(0) - expect_identical( + expect_equivalent( list_array$as_vector(), list_array_r ) diff --git a/r/vignettes/arrow.Rmd b/r/vignettes/arrow.Rmd index 8602ea260e0..b3accd00fc3 100644 --- a/r/vignettes/arrow.Rmd +++ b/r/vignettes/arrow.Rmd @@ -103,40 +103,40 @@ In the tables, entries with a `-` are not currently implemented. ### Arrow to R -| Arrow type | R type | -|-------------------|--------------------------| -| boolean | logical | -| int8 | integer | -| int16 | integer | -| int32 | integer | -| int64 | integer^++^ | -| uint8 | integer | -| uint16 | integer | -| uint32 | integer^++^ | -| uint64 | integer^++^ | -| float16 | - | -| float32 | double | -| float64 | double | -| utf8 | character | -| binary | - | -| fixed_size_binary | - | -| date32 | Date | -| date64 | POSIXct | -| time32 | hms::difftime | -| time64 | hms::difftime | -| timestamp | POSIXct | -| duration | - | -| decimal | double | -| dictionary | factor^+++^ | -| list | list | -| fixed_size_list | - | -| struct | data.frame | -| null | vctrs::vctrs_unspecified | -| map | - | -| union | - | -| large_utf8 | - | -| large_binary | - | -| large_list | - | +| Arrow type | R type | +|-------------------|---------------------------| +| boolean | logical | +| int8 | integer | +| int16 | integer | +| int32 | integer | +| int64 | integer^++^ | +| uint8 | integer | +| uint16 | integer | +| uint32 | integer^++^ | +| uint64 | integer^++^ | +| float16 | - | +| float32 | double | +| float64 | double | +| utf8 | character | +| binary | vctrs::vctrs_list_of(raw) | +| fixed_size_binary | - | +| date32 | Date | +| date64 | POSIXct | +| time32 | hms::difftime | +| time64 | hms::difftime | +| timestamp | POSIXct | +| duration | - | +| decimal | double | +| dictionary | factor^+++^ | +| list | vctrs::vctrs_list_of | +| fixed_size_list | - | +| struct | data.frame | +| null | vctrs::vctrs_unspecified | +| map | - | +| union | - | +| large_utf8 | character | +| large_binary | vctrs::vctrs_list_of(raw) | +| large_list | vctrs::vctrs_list_of | ^++^: These integer types may contain values that exceed the range of R's `integer` type (32-bit signed integer). When they do, `uint32` and `uint64` are converted to `double` ("numeric") and `int64` is converted to `bit64::integer64`.