From d3a57b652147baca6214ba8b055735818e1e57cf Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 12 Nov 2020 15:57:38 +0100 Subject: [PATCH 01/82] baby steps for converting SEXP to Array through the Converter api --- r/R/arrowExports.R | 4 + r/src/arrowExports.cpp | 820 +++++++++++++++++++++-------------------- r/src/arrow_types.h | 8 +- r/src/r_to_arrow.cpp | 173 +++++++++ 4 files changed, 602 insertions(+), 403 deletions(-) create mode 100644 r/src/r_to_arrow.cpp diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index d6a5f9356e8..421310ae4ac 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1268,6 +1268,10 @@ ExportRecordBatch <- function(batch, array_ptr, schema_ptr){ invisible(.Call(`_arrow_ExportRecordBatch`, batch, array_ptr, schema_ptr)) } +vec_to_arrow <- function(x, s_type){ + .Call(`_arrow_vec_to_arrow` , x, s_type) +} + RecordBatch__num_columns <- function(x){ .Call(`_arrow_RecordBatch__num_columns`, x) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index ae90abd5adf..1eef952f5e7 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -2764,6 +2764,27 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +#else +extern "C" SEXP _arrow_ExportRecordBatch(SEXP batch_sexp, SEXP array_ptr_sexp, SEXP schema_ptr_sexp){ + Rf_error("Cannot call ExportRecordBatch(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + +// r_to_arrow.cpp +#if defined(ARROW_R_WITH_ARROW) +SEXP vec_to_arrow(SEXP x, SEXP s_type); +extern "C" SEXP _arrow_vec_to_arrow(SEXP x_sexp, SEXP s_type_sexp){ +BEGIN_CPP11 + arrow::r::Input::type x(x_sexp); + arrow::r::Input::type s_type(s_type_sexp); + return cpp11::as_sexp(vec_to_arrow(x, s_type)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_vec_to_arrow(SEXP x_sexp, SEXP s_type_sexp){ + Rf_error("Cannot call vec_to_arrow(). Please use arrow::install_arrow() to install required runtime libraries. "); +} +#endif // recordbatch.cpp int RecordBatch__num_columns(const std::shared_ptr& x); extern "C" SEXP _arrow_RecordBatch__num_columns(SEXP x_sexp){ @@ -3500,405 +3521,406 @@ return Rf_ScalarLogical( static const R_CallMethodDef CallEntries[] = { { "_arrow_available", (DL_FUNC)& _arrow_available, 0 }, { "_s3_available", (DL_FUNC)& _s3_available, 0 }, - { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, - { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, - { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, - { "_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, - { "_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, - { "_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, - { "_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, - { "_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, - { "_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, - { "_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, - { "_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, - { "_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, - { "_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, - { "_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, - { "_arrow_Array__View", (DL_FUNC) &_arrow_Array__View, 2}, - { "_arrow_Array__Validate", (DL_FUNC) &_arrow_Array__Validate, 1}, - { "_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, - { "_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, - { "_arrow_StructArray__field", (DL_FUNC) &_arrow_StructArray__field, 2}, - { "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2}, - { "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1}, - { "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1}, - { "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1}, - { "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1}, - { "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1}, - { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, - { "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2}, - { "_arrow_FixedSizeListArray__value_length", (DL_FUNC) &_arrow_FixedSizeListArray__value_length, 2}, - { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, - { "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2}, - { "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, - { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, - { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, - { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, - { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, - { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, - { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, - { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, - { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, - { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, - { "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, - { "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, - { "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, - { "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, - { "_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, - { "_arrow_ArrayData__buffers", (DL_FUNC) &_arrow_ArrayData__buffers, 1}, - { "_arrow_Buffer__is_mutable", (DL_FUNC) &_arrow_Buffer__is_mutable, 1}, - { "_arrow_Buffer__ZeroPadding", (DL_FUNC) &_arrow_Buffer__ZeroPadding, 1}, - { "_arrow_Buffer__capacity", (DL_FUNC) &_arrow_Buffer__capacity, 1}, - { "_arrow_Buffer__size", (DL_FUNC) &_arrow_Buffer__size, 1}, - { "_arrow_r___RBuffer__initialize", (DL_FUNC) &_arrow_r___RBuffer__initialize, 1}, - { "_arrow_Buffer__data", (DL_FUNC) &_arrow_Buffer__data, 1}, - { "_arrow_Buffer__Equals", (DL_FUNC) &_arrow_Buffer__Equals, 2}, - { "_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, - { "_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, - { "_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, - { "_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, - { "_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, - { "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, - { "_arrow_ChunkedArray__Slice1", (DL_FUNC) &_arrow_ChunkedArray__Slice1, 2}, - { "_arrow_ChunkedArray__Slice2", (DL_FUNC) &_arrow_ChunkedArray__Slice2, 3}, - { "_arrow_ChunkedArray__View", (DL_FUNC) &_arrow_ChunkedArray__View, 2}, - { "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1}, - { "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2}, - { "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1}, - { "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2}, - { "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1}, - { "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, - { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, - { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, - { "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, - { "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, - { "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, - { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, - { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, - { "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, - { "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, - { "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, - { "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, - { "_arrow_TimestampParser__kind", (DL_FUNC) &_arrow_TimestampParser__kind, 1}, - { "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, - { "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, - { "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, - { "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, - { "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, - { "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, - { "_arrow_dataset___Dataset__ReplaceSchema", (DL_FUNC) &_arrow_dataset___Dataset__ReplaceSchema, 2}, - { "_arrow_dataset___UnionDataset__create", (DL_FUNC) &_arrow_dataset___UnionDataset__create, 2}, - { "_arrow_dataset___InMemoryDataset__create", (DL_FUNC) &_arrow_dataset___InMemoryDataset__create, 1}, - { "_arrow_dataset___UnionDataset__children", (DL_FUNC) &_arrow_dataset___UnionDataset__children, 1}, - { "_arrow_dataset___FileSystemDataset__format", (DL_FUNC) &_arrow_dataset___FileSystemDataset__format, 1}, - { "_arrow_dataset___FileSystemDataset__filesystem", (DL_FUNC) &_arrow_dataset___FileSystemDataset__filesystem, 1}, - { "_arrow_dataset___FileSystemDataset__files", (DL_FUNC) &_arrow_dataset___FileSystemDataset__files, 1}, - { "_arrow_dataset___DatasetFactory__Finish1", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish1, 2}, - { "_arrow_dataset___DatasetFactory__Finish2", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish2, 2}, - { "_arrow_dataset___DatasetFactory__Inspect", (DL_FUNC) &_arrow_dataset___DatasetFactory__Inspect, 2}, - { "_arrow_dataset___UnionDatasetFactory__Make", (DL_FUNC) &_arrow_dataset___UnionDatasetFactory__Make, 1}, - { "_arrow_dataset___FileSystemDatasetFactory__Make2", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make2, 4}, - { "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3}, - { "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4}, - { "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1}, - { "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1}, - { "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 3}, - { "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1}, - { "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, - { "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, - { "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, - { "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, - { "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 1}, - { "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 1}, - { "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 1}, - { "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 1}, - { "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 0}, - { "_arrow_dataset___ScannerBuilder__Project", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Project, 2}, - { "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, - { "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, - { "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, - { "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, - { "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, - { "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, - { "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, - { "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1}, - { "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, - { "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, - { "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, - { "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, - { "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, - { "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, - { "_arrow_Int64__initialize", (DL_FUNC) &_arrow_Int64__initialize, 0}, - { "_arrow_UInt8__initialize", (DL_FUNC) &_arrow_UInt8__initialize, 0}, - { "_arrow_UInt16__initialize", (DL_FUNC) &_arrow_UInt16__initialize, 0}, - { "_arrow_UInt32__initialize", (DL_FUNC) &_arrow_UInt32__initialize, 0}, - { "_arrow_UInt64__initialize", (DL_FUNC) &_arrow_UInt64__initialize, 0}, - { "_arrow_Float16__initialize", (DL_FUNC) &_arrow_Float16__initialize, 0}, - { "_arrow_Float32__initialize", (DL_FUNC) &_arrow_Float32__initialize, 0}, - { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, - { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, - { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, - { "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, - { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, - { "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, - { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, - { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, - { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, - { "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2}, - { "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1}, - { "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, - { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, - { "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, - { "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, - { "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, - { "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2}, - { "_arrow_struct__", (DL_FUNC) &_arrow_struct__, 1}, - { "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, - { "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, - { "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2}, - { "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1}, - { "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1}, - { "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, - { "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, - { "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, - { "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, - { "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1}, - { "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1}, - { "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1}, - { "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, - { "_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1}, - { "_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3}, - { "_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1}, - { "_arrow_DictionaryType__value_type", (DL_FUNC) &_arrow_DictionaryType__value_type, 1}, - { "_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1}, - { "_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1}, - { "_arrow_StructType__GetFieldByName", (DL_FUNC) &_arrow_StructType__GetFieldByName, 2}, - { "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2}, - { "_arrow_StructType__field_names", (DL_FUNC) &_arrow_StructType__field_names, 1}, - { "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1}, - { "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1}, - { "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1}, - { "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1}, - { "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, - { "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, - { "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, - { "_arrow_dataset___expr__call", (DL_FUNC) &_arrow_dataset___expr__call, 3}, - { "_arrow_dataset___expr__field_ref", (DL_FUNC) &_arrow_dataset___expr__field_ref, 1}, - { "_arrow_dataset___expr__scalar", (DL_FUNC) &_arrow_dataset___expr__scalar, 1}, - { "_arrow_dataset___expr__ToString", (DL_FUNC) &_arrow_dataset___expr__ToString, 1}, - { "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, - { "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, - { "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, - { "_arrow_ipc___feather___Reader__Open", (DL_FUNC) &_arrow_ipc___feather___Reader__Open, 1}, - { "_arrow_ipc___feather___Reader__column_names", (DL_FUNC) &_arrow_ipc___feather___Reader__column_names, 1}, - { "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, - { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, - { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, - { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, - { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, - { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, - { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, - { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, - { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, - { "_arrow_fs___FileInfo__set_path", (DL_FUNC) &_arrow_fs___FileInfo__set_path, 2}, - { "_arrow_fs___FileInfo__size", (DL_FUNC) &_arrow_fs___FileInfo__size, 1}, - { "_arrow_fs___FileInfo__set_size", (DL_FUNC) &_arrow_fs___FileInfo__set_size, 2}, - { "_arrow_fs___FileInfo__base_name", (DL_FUNC) &_arrow_fs___FileInfo__base_name, 1}, - { "_arrow_fs___FileInfo__extension", (DL_FUNC) &_arrow_fs___FileInfo__extension, 1}, - { "_arrow_fs___FileInfo__mtime", (DL_FUNC) &_arrow_fs___FileInfo__mtime, 1}, - { "_arrow_fs___FileInfo__set_mtime", (DL_FUNC) &_arrow_fs___FileInfo__set_mtime, 2}, - { "_arrow_fs___FileSelector__base_dir", (DL_FUNC) &_arrow_fs___FileSelector__base_dir, 1}, - { "_arrow_fs___FileSelector__allow_not_found", (DL_FUNC) &_arrow_fs___FileSelector__allow_not_found, 1}, - { "_arrow_fs___FileSelector__recursive", (DL_FUNC) &_arrow_fs___FileSelector__recursive, 1}, - { "_arrow_fs___FileSelector__create", (DL_FUNC) &_arrow_fs___FileSelector__create, 3}, - { "_arrow_fs___FileSystem__GetTargetInfos_Paths", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_Paths, 2}, - { "_arrow_fs___FileSystem__GetTargetInfos_FileSelector", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_FileSelector, 2}, - { "_arrow_fs___FileSystem__CreateDir", (DL_FUNC) &_arrow_fs___FileSystem__CreateDir, 3}, - { "_arrow_fs___FileSystem__DeleteDir", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDir, 2}, - { "_arrow_fs___FileSystem__DeleteDirContents", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDirContents, 2}, - { "_arrow_fs___FileSystem__DeleteFile", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFile, 2}, - { "_arrow_fs___FileSystem__DeleteFiles", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFiles, 2}, - { "_arrow_fs___FileSystem__Move", (DL_FUNC) &_arrow_fs___FileSystem__Move, 3}, - { "_arrow_fs___FileSystem__CopyFile", (DL_FUNC) &_arrow_fs___FileSystem__CopyFile, 3}, - { "_arrow_fs___FileSystem__OpenInputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputStream, 2}, - { "_arrow_fs___FileSystem__OpenInputFile", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputFile, 2}, - { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, - { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, - { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, - { "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0}, - { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, - { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, - { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, - { "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1}, - { "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6}, - { "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 12}, - { "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1}, - { "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2}, - { "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1}, - { "_arrow_io___OutputStream__Close", (DL_FUNC) &_arrow_io___OutputStream__Close, 1}, - { "_arrow_io___RandomAccessFile__GetSize", (DL_FUNC) &_arrow_io___RandomAccessFile__GetSize, 1}, - { "_arrow_io___RandomAccessFile__supports_zero_copy", (DL_FUNC) &_arrow_io___RandomAccessFile__supports_zero_copy, 1}, - { "_arrow_io___RandomAccessFile__Seek", (DL_FUNC) &_arrow_io___RandomAccessFile__Seek, 2}, - { "_arrow_io___RandomAccessFile__Tell", (DL_FUNC) &_arrow_io___RandomAccessFile__Tell, 1}, - { "_arrow_io___RandomAccessFile__Read0", (DL_FUNC) &_arrow_io___RandomAccessFile__Read0, 1}, - { "_arrow_io___RandomAccessFile__ReadAt", (DL_FUNC) &_arrow_io___RandomAccessFile__ReadAt, 3}, - { "_arrow_io___MemoryMappedFile__Create", (DL_FUNC) &_arrow_io___MemoryMappedFile__Create, 2}, - { "_arrow_io___MemoryMappedFile__Open", (DL_FUNC) &_arrow_io___MemoryMappedFile__Open, 2}, - { "_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2}, - { "_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1}, - { "_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1}, - { "_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2}, - { "_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1}, - { "_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1}, - { "_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1}, - { "_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1}, - { "_arrow_io___BufferOutputStream__Finish", (DL_FUNC) &_arrow_io___BufferOutputStream__Finish, 1}, - { "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, - { "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, - { "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, - { "_arrow_json___ParseOptions__initialize", (DL_FUNC) &_arrow_json___ParseOptions__initialize, 1}, - { "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, - { "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, - { "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, - { "_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1}, - { "_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1}, - { "_arrow_MemoryPool__backend_name", (DL_FUNC) &_arrow_MemoryPool__backend_name, 1}, - { "_arrow_supported_memory_backends", (DL_FUNC) &_arrow_supported_memory_backends, 0}, - { "_arrow_ipc___Message__body_length", (DL_FUNC) &_arrow_ipc___Message__body_length, 1}, - { "_arrow_ipc___Message__metadata", (DL_FUNC) &_arrow_ipc___Message__metadata, 1}, - { "_arrow_ipc___Message__body", (DL_FUNC) &_arrow_ipc___Message__body, 1}, - { "_arrow_ipc___Message__Verify", (DL_FUNC) &_arrow_ipc___Message__Verify, 1}, - { "_arrow_ipc___Message__type", (DL_FUNC) &_arrow_ipc___Message__type, 1}, - { "_arrow_ipc___Message__Equals", (DL_FUNC) &_arrow_ipc___Message__Equals, 2}, - { "_arrow_ipc___ReadRecordBatch__Message__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__Message__Schema, 2}, - { "_arrow_ipc___ReadSchema_InputStream", (DL_FUNC) &_arrow_ipc___ReadSchema_InputStream, 1}, - { "_arrow_ipc___ReadSchema_Message", (DL_FUNC) &_arrow_ipc___ReadSchema_Message, 1}, - { "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, - { "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, - { "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, - { "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1}, - { "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, - { "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, - { "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, - { "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroup1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup1, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroup2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup2, 3}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroups1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups1, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroups2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups2, 3}, - { "_arrow_parquet___arrow___FileReader__num_rows", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_rows, 1}, - { "_arrow_parquet___arrow___FileReader__num_columns", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_columns, 1}, - { "_arrow_parquet___arrow___FileReader__num_row_groups", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_row_groups, 1}, - { "_arrow_parquet___arrow___FileReader__ReadColumn", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadColumn, 2}, - { "_arrow_parquet___ArrowWriterProperties___create", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___create, 3}, - { "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0}, - { "_arrow_parquet___WriterProperties___Builder__version", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__version, 2}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_compressions", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compressions, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__data_page_size", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__data_page_size, 2}, - { "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1}, - { "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4}, - { "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3}, - { "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, - { "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, - { "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, - { "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, - { "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, - { "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, - { "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, - { "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, - { "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, - { "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, - { "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, - { "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, - { "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, - { "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, - { "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, - { "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, - { "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, - { "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2}, - { "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2}, - { "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, - { "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, - { "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2}, - { "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2}, - { "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3}, - { "_arrow_RecordBatch__AddColumn", (DL_FUNC) &_arrow_RecordBatch__AddColumn, 4}, - { "_arrow_RecordBatch__SetColumn", (DL_FUNC) &_arrow_RecordBatch__SetColumn, 4}, - { "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, - { "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2}, - { "_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, - { "_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, - { "_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, - { "_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, - { "_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, - { "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, - { "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, - { "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, - { "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, - { "_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1}, - { "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, - { "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, - { "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, - { "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, - { "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, - { "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, - { "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, - { "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, - { "_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, - { "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, - { "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, - { "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, - { "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, - { "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, - { "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, - { "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, - { "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, - { "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, - { "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, - { "_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1}, - { "_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1}, - { "_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1}, - { "_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2}, - { "_arrow_Schema__GetFieldByName", (DL_FUNC) &_arrow_Schema__GetFieldByName, 2}, - { "_arrow_Schema__fields", (DL_FUNC) &_arrow_Schema__fields, 1}, - { "_arrow_Schema__field_names", (DL_FUNC) &_arrow_Schema__field_names, 1}, - { "_arrow_Schema__HasMetadata", (DL_FUNC) &_arrow_Schema__HasMetadata, 1}, - { "_arrow_Schema__metadata", (DL_FUNC) &_arrow_Schema__metadata, 1}, - { "_arrow_Schema__WithMetadata", (DL_FUNC) &_arrow_Schema__WithMetadata, 2}, - { "_arrow_Schema__serialize", (DL_FUNC) &_arrow_Schema__serialize, 1}, - { "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals, 3}, - { "_arrow_arrow__UnifySchemas", (DL_FUNC) &_arrow_arrow__UnifySchemas, 1}, - { "_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, - { "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, - { "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, - { "_arrow_Table__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_Table__ReplaceSchemaMetadata, 2}, - { "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, - { "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, - { "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, - { "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1}, - { "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2}, - { "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2}, - { "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3}, - { "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3}, - { "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1}, - { "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1}, - { "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2}, - { "_arrow_Table__RemoveColumn", (DL_FUNC) &_arrow_Table__RemoveColumn, 2}, - { "_arrow_Table__AddColumn", (DL_FUNC) &_arrow_Table__AddColumn, 4}, - { "_arrow_Table__SetColumn", (DL_FUNC) &_arrow_Table__SetColumn, 4}, - { "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, - { "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, - { "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, - { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, - { "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, - { "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, - { "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, - { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, + { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, + { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, + { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, + { "_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, + { "_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, + { "_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, + { "_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, + { "_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, + { "_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, + { "_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, + { "_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, + { "_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, + { "_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, + { "_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, + { "_arrow_Array__View", (DL_FUNC) &_arrow_Array__View, 2}, + { "_arrow_Array__Validate", (DL_FUNC) &_arrow_Array__Validate, 1}, + { "_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, + { "_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, + { "_arrow_StructArray__field", (DL_FUNC) &_arrow_StructArray__field, 2}, + { "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2}, + { "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1}, + { "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1}, + { "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1}, + { "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1}, + { "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1}, + { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, + { "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2}, + { "_arrow_FixedSizeListArray__value_length", (DL_FUNC) &_arrow_FixedSizeListArray__value_length, 2}, + { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, + { "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2}, + { "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, + { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, + { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, + { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, + { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, + { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, + { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, + { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, + { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, + { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, + { "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, + { "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, + { "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, + { "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, + { "_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, + { "_arrow_ArrayData__buffers", (DL_FUNC) &_arrow_ArrayData__buffers, 1}, + { "_arrow_Buffer__is_mutable", (DL_FUNC) &_arrow_Buffer__is_mutable, 1}, + { "_arrow_Buffer__ZeroPadding", (DL_FUNC) &_arrow_Buffer__ZeroPadding, 1}, + { "_arrow_Buffer__capacity", (DL_FUNC) &_arrow_Buffer__capacity, 1}, + { "_arrow_Buffer__size", (DL_FUNC) &_arrow_Buffer__size, 1}, + { "_arrow_r___RBuffer__initialize", (DL_FUNC) &_arrow_r___RBuffer__initialize, 1}, + { "_arrow_Buffer__data", (DL_FUNC) &_arrow_Buffer__data, 1}, + { "_arrow_Buffer__Equals", (DL_FUNC) &_arrow_Buffer__Equals, 2}, + { "_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, + { "_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, + { "_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, + { "_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, + { "_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, + { "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, + { "_arrow_ChunkedArray__Slice1", (DL_FUNC) &_arrow_ChunkedArray__Slice1, 2}, + { "_arrow_ChunkedArray__Slice2", (DL_FUNC) &_arrow_ChunkedArray__Slice2, 3}, + { "_arrow_ChunkedArray__View", (DL_FUNC) &_arrow_ChunkedArray__View, 2}, + { "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1}, + { "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2}, + { "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1}, + { "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2}, + { "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1}, + { "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, + { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, + { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, + { "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, + { "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, + { "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, + { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, + { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, + { "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, + { "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, + { "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, + { "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, + { "_arrow_TimestampParser__kind", (DL_FUNC) &_arrow_TimestampParser__kind, 1}, + { "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, + { "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, + { "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, + { "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, + { "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, + { "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, + { "_arrow_dataset___Dataset__ReplaceSchema", (DL_FUNC) &_arrow_dataset___Dataset__ReplaceSchema, 2}, + { "_arrow_dataset___UnionDataset__create", (DL_FUNC) &_arrow_dataset___UnionDataset__create, 2}, + { "_arrow_dataset___InMemoryDataset__create", (DL_FUNC) &_arrow_dataset___InMemoryDataset__create, 1}, + { "_arrow_dataset___UnionDataset__children", (DL_FUNC) &_arrow_dataset___UnionDataset__children, 1}, + { "_arrow_dataset___FileSystemDataset__format", (DL_FUNC) &_arrow_dataset___FileSystemDataset__format, 1}, + { "_arrow_dataset___FileSystemDataset__filesystem", (DL_FUNC) &_arrow_dataset___FileSystemDataset__filesystem, 1}, + { "_arrow_dataset___FileSystemDataset__files", (DL_FUNC) &_arrow_dataset___FileSystemDataset__files, 1}, + { "_arrow_dataset___DatasetFactory__Finish1", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish1, 2}, + { "_arrow_dataset___DatasetFactory__Finish2", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish2, 2}, + { "_arrow_dataset___DatasetFactory__Inspect", (DL_FUNC) &_arrow_dataset___DatasetFactory__Inspect, 2}, + { "_arrow_dataset___UnionDatasetFactory__Make", (DL_FUNC) &_arrow_dataset___UnionDatasetFactory__Make, 1}, + { "_arrow_dataset___FileSystemDatasetFactory__Make2", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make2, 4}, + { "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3}, + { "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4}, + { "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1}, + { "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1}, + { "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 3}, + { "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1}, + { "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, + { "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, + { "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, + { "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, + { "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 1}, + { "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 1}, + { "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 1}, + { "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 1}, + { "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 0}, + { "_arrow_dataset___ScannerBuilder__Project", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Project, 2}, + { "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, + { "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, + { "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, + { "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, + { "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, + { "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, + { "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, + { "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1}, + { "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, + { "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, + { "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, + { "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, + { "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, + { "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, + { "_arrow_Int64__initialize", (DL_FUNC) &_arrow_Int64__initialize, 0}, + { "_arrow_UInt8__initialize", (DL_FUNC) &_arrow_UInt8__initialize, 0}, + { "_arrow_UInt16__initialize", (DL_FUNC) &_arrow_UInt16__initialize, 0}, + { "_arrow_UInt32__initialize", (DL_FUNC) &_arrow_UInt32__initialize, 0}, + { "_arrow_UInt64__initialize", (DL_FUNC) &_arrow_UInt64__initialize, 0}, + { "_arrow_Float16__initialize", (DL_FUNC) &_arrow_Float16__initialize, 0}, + { "_arrow_Float32__initialize", (DL_FUNC) &_arrow_Float32__initialize, 0}, + { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, + { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, + { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, + { "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, + { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, + { "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, + { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, + { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, + { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, + { "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2}, + { "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1}, + { "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, + { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, + { "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, + { "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, + { "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, + { "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2}, + { "_arrow_struct__", (DL_FUNC) &_arrow_struct__, 1}, + { "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, + { "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, + { "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2}, + { "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1}, + { "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1}, + { "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, + { "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, + { "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, + { "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, + { "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1}, + { "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1}, + { "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1}, + { "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, + { "_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1}, + { "_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3}, + { "_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1}, + { "_arrow_DictionaryType__value_type", (DL_FUNC) &_arrow_DictionaryType__value_type, 1}, + { "_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1}, + { "_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1}, + { "_arrow_StructType__GetFieldByName", (DL_FUNC) &_arrow_StructType__GetFieldByName, 2}, + { "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2}, + { "_arrow_StructType__field_names", (DL_FUNC) &_arrow_StructType__field_names, 1}, + { "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1}, + { "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1}, + { "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1}, + { "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1}, + { "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, + { "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, + { "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, + { "_arrow_dataset___expr__call", (DL_FUNC) &_arrow_dataset___expr__call, 3}, + { "_arrow_dataset___expr__field_ref", (DL_FUNC) &_arrow_dataset___expr__field_ref, 1}, + { "_arrow_dataset___expr__scalar", (DL_FUNC) &_arrow_dataset___expr__scalar, 1}, + { "_arrow_dataset___expr__ToString", (DL_FUNC) &_arrow_dataset___expr__ToString, 1}, + { "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, + { "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, + { "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, + { "_arrow_ipc___feather___Reader__Open", (DL_FUNC) &_arrow_ipc___feather___Reader__Open, 1}, + { "_arrow_ipc___feather___Reader__column_names", (DL_FUNC) &_arrow_ipc___feather___Reader__column_names, 1}, + { "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, + { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, + { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, + { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, + { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, + { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, + { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, + { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, + { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, + { "_arrow_fs___FileInfo__set_path", (DL_FUNC) &_arrow_fs___FileInfo__set_path, 2}, + { "_arrow_fs___FileInfo__size", (DL_FUNC) &_arrow_fs___FileInfo__size, 1}, + { "_arrow_fs___FileInfo__set_size", (DL_FUNC) &_arrow_fs___FileInfo__set_size, 2}, + { "_arrow_fs___FileInfo__base_name", (DL_FUNC) &_arrow_fs___FileInfo__base_name, 1}, + { "_arrow_fs___FileInfo__extension", (DL_FUNC) &_arrow_fs___FileInfo__extension, 1}, + { "_arrow_fs___FileInfo__mtime", (DL_FUNC) &_arrow_fs___FileInfo__mtime, 1}, + { "_arrow_fs___FileInfo__set_mtime", (DL_FUNC) &_arrow_fs___FileInfo__set_mtime, 2}, + { "_arrow_fs___FileSelector__base_dir", (DL_FUNC) &_arrow_fs___FileSelector__base_dir, 1}, + { "_arrow_fs___FileSelector__allow_not_found", (DL_FUNC) &_arrow_fs___FileSelector__allow_not_found, 1}, + { "_arrow_fs___FileSelector__recursive", (DL_FUNC) &_arrow_fs___FileSelector__recursive, 1}, + { "_arrow_fs___FileSelector__create", (DL_FUNC) &_arrow_fs___FileSelector__create, 3}, + { "_arrow_fs___FileSystem__GetTargetInfos_Paths", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_Paths, 2}, + { "_arrow_fs___FileSystem__GetTargetInfos_FileSelector", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_FileSelector, 2}, + { "_arrow_fs___FileSystem__CreateDir", (DL_FUNC) &_arrow_fs___FileSystem__CreateDir, 3}, + { "_arrow_fs___FileSystem__DeleteDir", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDir, 2}, + { "_arrow_fs___FileSystem__DeleteDirContents", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDirContents, 2}, + { "_arrow_fs___FileSystem__DeleteFile", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFile, 2}, + { "_arrow_fs___FileSystem__DeleteFiles", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFiles, 2}, + { "_arrow_fs___FileSystem__Move", (DL_FUNC) &_arrow_fs___FileSystem__Move, 3}, + { "_arrow_fs___FileSystem__CopyFile", (DL_FUNC) &_arrow_fs___FileSystem__CopyFile, 3}, + { "_arrow_fs___FileSystem__OpenInputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputStream, 2}, + { "_arrow_fs___FileSystem__OpenInputFile", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputFile, 2}, + { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, + { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, + { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, + { "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0}, + { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, + { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, + { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, + { "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1}, + { "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6}, + { "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 12}, + { "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1}, + { "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2}, + { "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1}, + { "_arrow_io___OutputStream__Close", (DL_FUNC) &_arrow_io___OutputStream__Close, 1}, + { "_arrow_io___RandomAccessFile__GetSize", (DL_FUNC) &_arrow_io___RandomAccessFile__GetSize, 1}, + { "_arrow_io___RandomAccessFile__supports_zero_copy", (DL_FUNC) &_arrow_io___RandomAccessFile__supports_zero_copy, 1}, + { "_arrow_io___RandomAccessFile__Seek", (DL_FUNC) &_arrow_io___RandomAccessFile__Seek, 2}, + { "_arrow_io___RandomAccessFile__Tell", (DL_FUNC) &_arrow_io___RandomAccessFile__Tell, 1}, + { "_arrow_io___RandomAccessFile__Read0", (DL_FUNC) &_arrow_io___RandomAccessFile__Read0, 1}, + { "_arrow_io___RandomAccessFile__ReadAt", (DL_FUNC) &_arrow_io___RandomAccessFile__ReadAt, 3}, + { "_arrow_io___MemoryMappedFile__Create", (DL_FUNC) &_arrow_io___MemoryMappedFile__Create, 2}, + { "_arrow_io___MemoryMappedFile__Open", (DL_FUNC) &_arrow_io___MemoryMappedFile__Open, 2}, + { "_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2}, + { "_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1}, + { "_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1}, + { "_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2}, + { "_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1}, + { "_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1}, + { "_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1}, + { "_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1}, + { "_arrow_io___BufferOutputStream__Finish", (DL_FUNC) &_arrow_io___BufferOutputStream__Finish, 1}, + { "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, + { "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, + { "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, + { "_arrow_json___ParseOptions__initialize", (DL_FUNC) &_arrow_json___ParseOptions__initialize, 1}, + { "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, + { "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, + { "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, + { "_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1}, + { "_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1}, + { "_arrow_MemoryPool__backend_name", (DL_FUNC) &_arrow_MemoryPool__backend_name, 1}, + { "_arrow_supported_memory_backends", (DL_FUNC) &_arrow_supported_memory_backends, 0}, + { "_arrow_ipc___Message__body_length", (DL_FUNC) &_arrow_ipc___Message__body_length, 1}, + { "_arrow_ipc___Message__metadata", (DL_FUNC) &_arrow_ipc___Message__metadata, 1}, + { "_arrow_ipc___Message__body", (DL_FUNC) &_arrow_ipc___Message__body, 1}, + { "_arrow_ipc___Message__Verify", (DL_FUNC) &_arrow_ipc___Message__Verify, 1}, + { "_arrow_ipc___Message__type", (DL_FUNC) &_arrow_ipc___Message__type, 1}, + { "_arrow_ipc___Message__Equals", (DL_FUNC) &_arrow_ipc___Message__Equals, 2}, + { "_arrow_ipc___ReadRecordBatch__Message__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__Message__Schema, 2}, + { "_arrow_ipc___ReadSchema_InputStream", (DL_FUNC) &_arrow_ipc___ReadSchema_InputStream, 1}, + { "_arrow_ipc___ReadSchema_Message", (DL_FUNC) &_arrow_ipc___ReadSchema_Message, 1}, + { "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, + { "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, + { "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, + { "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1}, + { "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, + { "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, + { "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, + { "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroup1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup1, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroup2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup2, 3}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroups1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups1, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroups2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups2, 3}, + { "_arrow_parquet___arrow___FileReader__num_rows", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_rows, 1}, + { "_arrow_parquet___arrow___FileReader__num_columns", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_columns, 1}, + { "_arrow_parquet___arrow___FileReader__num_row_groups", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_row_groups, 1}, + { "_arrow_parquet___arrow___FileReader__ReadColumn", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadColumn, 2}, + { "_arrow_parquet___ArrowWriterProperties___create", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___create, 3}, + { "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0}, + { "_arrow_parquet___WriterProperties___Builder__version", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__version, 2}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_compressions", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compressions, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__data_page_size", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__data_page_size, 2}, + { "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1}, + { "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4}, + { "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3}, + { "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, + { "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, + { "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, + { "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, + { "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, + { "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, + { "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, + { "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, + { "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, + { "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, + { "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, + { "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, + { "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, + { "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, + { "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, + { "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, + { "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, + { "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, + { "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2}, + { "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2}, + { "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, + { "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, + { "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2}, + { "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2}, + { "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3}, + { "_arrow_RecordBatch__AddColumn", (DL_FUNC) &_arrow_RecordBatch__AddColumn, 4}, + { "_arrow_RecordBatch__SetColumn", (DL_FUNC) &_arrow_RecordBatch__SetColumn, 4}, + { "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, + { "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2}, + { "_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, + { "_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, + { "_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, + { "_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, + { "_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, + { "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, + { "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, + { "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, + { "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, + { "_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1}, + { "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, + { "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, + { "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, + { "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, + { "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, + { "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, + { "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, + { "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, + { "_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, + { "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, + { "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, + { "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, + { "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, + { "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, + { "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, + { "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, + { "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, + { "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, + { "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, + { "_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1}, + { "_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1}, + { "_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1}, + { "_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2}, + { "_arrow_Schema__GetFieldByName", (DL_FUNC) &_arrow_Schema__GetFieldByName, 2}, + { "_arrow_Schema__fields", (DL_FUNC) &_arrow_Schema__fields, 1}, + { "_arrow_Schema__field_names", (DL_FUNC) &_arrow_Schema__field_names, 1}, + { "_arrow_Schema__HasMetadata", (DL_FUNC) &_arrow_Schema__HasMetadata, 1}, + { "_arrow_Schema__metadata", (DL_FUNC) &_arrow_Schema__metadata, 1}, + { "_arrow_Schema__WithMetadata", (DL_FUNC) &_arrow_Schema__WithMetadata, 2}, + { "_arrow_Schema__serialize", (DL_FUNC) &_arrow_Schema__serialize, 1}, + { "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals, 3}, + { "_arrow_arrow__UnifySchemas", (DL_FUNC) &_arrow_arrow__UnifySchemas, 1}, + { "_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, + { "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, + { "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, + { "_arrow_Table__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_Table__ReplaceSchemaMetadata, 2}, + { "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, + { "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, + { "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, + { "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1}, + { "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2}, + { "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2}, + { "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3}, + { "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3}, + { "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1}, + { "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1}, + { "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2}, + { "_arrow_Table__RemoveColumn", (DL_FUNC) &_arrow_Table__RemoveColumn, 2}, + { "_arrow_Table__AddColumn", (DL_FUNC) &_arrow_Table__AddColumn, 4}, + { "_arrow_Table__SetColumn", (DL_FUNC) &_arrow_Table__SetColumn, 4}, + { "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, + { "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, + { "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, + { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, + { "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, + { "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, + { "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, + { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, {NULL, NULL, 0} }; diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 909ccfb217a..07d490f664d 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -23,14 +23,14 @@ #if defined(ARROW_R_WITH_ARROW) -#include -#include -#include - #include // for RBuffer definition below #include #include +#include +#include +#include + // forward declaration-only headers #include #include diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp new file mode 100644 index 00000000000..517d53d26a0 --- /dev/null +++ b/r/src/r_to_arrow.cpp @@ -0,0 +1,173 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "./arrow_types.h" +#include "./arrow_vctrs.h" + +#if defined(ARROW_R_WITH_ARROW) +#include +#include +#include +#include +#include +#include +#include +#include + +namespace arrow { + +using internal::checked_cast; +using internal::checked_pointer_cast; + +using internal::Converter; +using internal::DictionaryConverter; +using internal::ListConverter; +using internal::PrimitiveConverter; +using internal::StructConverter; + +using internal::MakeChunker; +using internal::MakeConverter; + +namespace r { + +struct RConversionOptions { + RConversionOptions() = default; + + std::shared_ptr type; + bool strict; + int64_t size; +}; + +using RConverter = Converter; + +// TODO: this needs various versions as what python does: +// class PyPrimitiveConverter> +// +// class PyPrimitiveConverter< +// T, enable_if_t::value || is_number_type::value || +// is_decimal_type::value || is_date_type::value || +// is_time_type::value>> : public PrimitiveConverter { +// +// class PyPrimitiveConverter< +// T, enable_if_t::value || is_number_type::value || +// is_decimal_type::value || is_date_type::value || +// is_time_type::value>> : public PrimitiveConverter { +// +// class PyPrimitiveConverter> +// : public PrimitiveConverter { +// +// class PyPrimitiveConverter::value>> +// : public PrimitiveConverter { +// +// class PyPrimitiveConverter> +// : public PrimitiveConverter { + +template +class RPrimitiveConverter : public PrimitiveConverter { + public: + Status Append(SEXP value) { return Status::OK(); } +}; + +template +class RListConverter; + +// TODO: replace by various versions. The python code has 2 versions: +// +// template +// class PyDictionaryConverter> +// : public DictionaryConverter { +// +// template +// class PyDictionaryConverter> +// : public DictionaryConverter { +// +template +class RDictionaryConverter : public DictionaryConverter { + public: + Status Append(SEXP value) { return Status::OK(); } +}; + +class RStructConverter; + +template +struct RConverterTrait; + +template +struct RConverterTrait< + T, enable_if_t::value && !is_interval_type::value && + !is_extension_type::value>> { + using type = RPrimitiveConverter; +}; + +template +struct RConverterTrait> { + using type = RListConverter; +}; + +template +class RListConverter : public ListConverter { + public: + Status Append(SEXP value) { return Status::OK(); } +}; + +template <> +struct RConverterTrait { + using type = RStructConverter; +}; + +class RStructConverter : public StructConverter { + public: + Status Append(SEXP value) { return Status::OK(); } +}; + +template <> +struct RConverterTrait { + template + using dictionary_type = RDictionaryConverter; +}; + +std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { + RConversionOptions options; + options.strict = !Rf_isNull(s_type); + + std::shared_ptr type; + if (options.strict) { + options.type = cpp11::as_cpp>(s_type); + } else { + options.type = arrow::r::InferArrowType(x); + } + + options.size = vctrs::short_vec_size(x); + + auto converter = ValueOrStop(MakeConverter( + options.type, options, gc_memory_pool())); + + StopIfNotOk(converter->Reserve(options.size)); + StopIfNotOk(converter->Append(x)); + return ValueOrStop(converter->ToArray()); +} + +} // namespace r +} // namespace arrow + +// [[arrow::export]] +SEXP vec_to_arrow(SEXP x, SEXP s_type) { + if (Rf_inherits(x, "Array")) return x; + return cpp11::to_r6(arrow::r::vec_to_arrow(x, s_type)); +} + +#endif From f4bc2f0126bf61a13fcbcd8495b685eb8c43d48d Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 13 Nov 2020 11:07:37 +0100 Subject: [PATCH 02/82] using RConverter = Converter --- r/src/r_to_arrow.cpp | 28 +++++++++++++++++++++------- 1 file changed, 21 insertions(+), 7 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 517d53d26a0..038846a5b86 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -52,7 +52,9 @@ struct RConversionOptions { int64_t size; }; -using RConverter = Converter; +struct RObject {}; + +using RConverter = Converter; // TODO: this needs various versions as what python does: // class PyPrimitiveConverter> @@ -79,7 +81,10 @@ using RConverter = Converter; template class RPrimitiveConverter : public PrimitiveConverter { public: - Status Append(SEXP value) { return Status::OK(); } + Status Append(RObject* value) { + Rprintf("T = %s\n", arrow::util::nameof().c_str()); + return Status::OK(); + } }; template @@ -98,7 +103,7 @@ class RListConverter; template class RDictionaryConverter : public DictionaryConverter { public: - Status Append(SEXP value) { return Status::OK(); } + Status Append(RObject* value) { return Status::OK(); } }; class RStructConverter; @@ -121,7 +126,7 @@ struct RConverterTrait> { template class RListConverter : public ListConverter { public: - Status Append(SEXP value) { return Status::OK(); } + Status Append(RObject* value) { return Status::OK(); } }; template <> @@ -130,8 +135,16 @@ struct RConverterTrait { }; class RStructConverter : public StructConverter { - public: - Status Append(SEXP value) { return Status::OK(); } +public: + Status Append(RObject* value) override { + return Status::OK(); + } + +protected: +Status Init(MemoryPool* pool) override { + RETURN_NOT_OK((StructConverter::Init(pool))); + return Status::OK(); +} }; template <> @@ -157,7 +170,8 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { options.type, options, gc_memory_pool())); StopIfNotOk(converter->Reserve(options.size)); - StopIfNotOk(converter->Append(x)); + // TODO: iterate and call Append on each value + // StopIfNotOk(converter->Append(x)); return ValueOrStop(converter->ToArray()); } From 3967f46c951769ec6b4133be4eb52aa493dce422 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 13 Nov 2020 14:42:02 +0100 Subject: [PATCH 03/82] going a little further with an initial conversion from INTSXP -> int32 --- r/src/r_to_arrow.cpp | 212 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 182 insertions(+), 30 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 038846a5b86..b72942c6b6f 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -52,41 +52,195 @@ struct RConversionOptions { int64_t size; }; -struct RObject {}; +enum RVectorType { + BOOLEAN, + UINT8, + INT32, + FLOAT64, + INTEGER64, + COMPLEX, + STRING, + DATAFRAME, + DATE, + TIME, + TIMESTAMP, + + OTHER +}; + +RVectorType GetVectorType(SEXP x) { + switch (TYPEOF(x)) { + case LGLSXP: + return BOOLEAN; + case RAWSXP: + return UINT8; + case INTSXP: + return INT32; + case STRSXP: + return STRING; + case CPLXSXP: + return COMPLEX; + case REALSXP: { + if (Rf_inherits(x, "integer64")) { + return INTEGER64; + } else if (Rf_inherits(x, "POSIXct")) { + return TIMESTAMP; + } else if (Rf_inherits(x, "difftime")) { + return TIME; + } else { + return FLOAT64; + } + } + case VECSXP: { + if (Rf_inherits(x, "data.frame")) { + return DATAFRAME; + } + // TODO: binary, list, POSIXlt + break; + } + default: + break; + } + return OTHER; +} + +struct RObject { + RVectorType rtype; + void* data; + bool null; +}; + +class RValue { + public: + static bool IsNull(RObject* obj) { return obj->null; } + + // TODO: generalise + static Result Convert(const Int32Type*, const RConversionOptions&, + RObject* value) { + // TODO: handle conversion from other types + if (value->rtype == INT32) { + return *reinterpret_cast(value->data); + } + + // TODO: improve error + return Status::Invalid("invalid conversion"); + } +}; + +template +inline Status VisitVector_Int32(SEXP x, R_xlen_t size, VisitorFunc&& func) { + RObject obj{INT32, nullptr, false}; + cpp11::integers values(x); + for (int value : values) { + obj.data = reinterpret_cast(&value); + obj.null = value == NA_INTEGER; + RETURN_NOT_OK(func(&obj)); + } + return Status::OK(); +} + +template +inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { + RVectorType rtype = GetVectorType(x); + + switch (rtype) { + case INT32: + return VisitVector_Int32(x, size, std::forward(func)); + default: + break; + } + + return Status::OK(); +} using RConverter = Converter; +template +class RPrimitiveConverter; + // TODO: this needs various versions as what python does: -// class PyPrimitiveConverter> -// -// class PyPrimitiveConverter< -// T, enable_if_t::value || is_number_type::value || -// is_decimal_type::value || is_date_type::value || -// is_time_type::value>> : public PrimitiveConverter { -// + // class PyPrimitiveConverter< // T, enable_if_t::value || is_number_type::value || // is_decimal_type::value || is_date_type::value || // is_time_type::value>> : public PrimitiveConverter { -// -// class PyPrimitiveConverter> -// : public PrimitiveConverter { -// -// class PyPrimitiveConverter::value>> -// : public PrimitiveConverter { -// -// class PyPrimitiveConverter> -// : public PrimitiveConverter { -template -class RPrimitiveConverter : public PrimitiveConverter { +template +class RPrimitiveConverter> + : public PrimitiveConverter { + public: + Status Append(RObject* value) override { + return this->primitive_builder_->AppendNull(); + } +}; + +// Temporary (this only handles int32 for now) +template +class RPrimitiveConverter< + T, enable_if_t::value && + (is_boolean_type::value || is_number_type::value || + is_decimal_type::value || is_date_type::value || + is_time_type::value)>> : public PrimitiveConverter { + public: + Status Append(RObject* value) { + return Status::NotImplemented("conversion to fixed size binary not yet implemented"); + } +}; + +template +class RPrimitiveConverter::value>> + : public PrimitiveConverter { public: Status Append(RObject* value) { - Rprintf("T = %s\n", arrow::util::nameof().c_str()); + if (RValue::IsNull(value)) { + return this->primitive_builder_->AppendNull(); + } else { + ARROW_ASSIGN_OR_RAISE( + auto converted, RValue::Convert(this->primitive_type_, this->options_, value)); + return this->primitive_builder_->Append(converted); + } return Status::OK(); } }; +template +class RPrimitiveConverter> + : public PrimitiveConverter { + public: + Status Append(RObject* value) { + return Status::NotImplemented("conversion to binary not yet implemented"); + } +}; + +template +class RPrimitiveConverter::value>> + : public PrimitiveConverter { + public: + Status Append(RObject* value) { + return Status::NotImplemented("conversion to fixed size binary not yet implemented"); + } +}; + +template +class RPrimitiveConverter> + : public PrimitiveConverter { + public: + Status Append(RObject* value) { + return Status::NotImplemented("conversion to string not yet implemented"); + } +}; + +template +class RPrimitiveConverter< + T, enable_if_t::value || is_duration_type::value>> + : public PrimitiveConverter { + public: + Status Append(RObject* value) { + return Status::NotImplemented( + "conversion to timestamp or duration not yet implemented"); + } +}; + template class RListConverter; @@ -135,16 +289,14 @@ struct RConverterTrait { }; class RStructConverter : public StructConverter { -public: - Status Append(RObject* value) override { + public: + Status Append(RObject* value) override { return Status::OK(); } + + protected: + Status Init(MemoryPool* pool) override { + RETURN_NOT_OK((StructConverter::Init(pool))); return Status::OK(); } - -protected: -Status Init(MemoryPool* pool) override { - RETURN_NOT_OK((StructConverter::Init(pool))); - return Status::OK(); -} }; template <> @@ -170,8 +322,8 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { options.type, options, gc_memory_pool())); StopIfNotOk(converter->Reserve(options.size)); - // TODO: iterate and call Append on each value - // StopIfNotOk(converter->Append(x)); + StopIfNotOk(VisitVector(x, options.size, + [&converter](RObject* obj) { return converter->Append(obj); })); return ValueOrStop(converter->ToArray()); } From 1b2cfcc7506180a23e9e2dff26b75d14b9532d7a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 13 Nov 2020 15:17:53 +0100 Subject: [PATCH 04/82] handle double --- r/src/r_to_arrow.cpp | 56 ++++++++++++++++++++++++++++++++++---------- 1 file changed, 44 insertions(+), 12 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index b72942c6b6f..fa54bfb2a6a 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -115,6 +115,7 @@ class RValue { static bool IsNull(RObject* obj) { return obj->null; } // TODO: generalise + static Result Convert(const Int32Type*, const RConversionOptions&, RObject* value) { // TODO: handle conversion from other types @@ -125,15 +126,39 @@ class RValue { // TODO: improve error return Status::Invalid("invalid conversion"); } + + static Result Convert(const DoubleType*, const RConversionOptions&, + RObject* value) { + // TODO: handle conversion from other types + if (value->rtype == FLOAT64) { + return *reinterpret_cast(value->data); + } + + // TODO: improve error + return Status::Invalid("invalid conversion"); + } }; -template -inline Status VisitVector_Int32(SEXP x, R_xlen_t size, VisitorFunc&& func) { - RObject obj{INT32, nullptr, false}; - cpp11::integers values(x); - for (int value : values) { +template +bool is_NA(T value); + +template <> +bool is_NA(int value) { + return value == NA_INTEGER; +} + +template <> +bool is_NA(double value) { + return ISNA(value); +} + +template +inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { + RObject obj{rtype, nullptr, false}; + cpp11::r_vector values(x); + for (T value : values) { obj.data = reinterpret_cast(&value); - obj.null = value == NA_INTEGER; + obj.null = is_NA(value); RETURN_NOT_OK(func(&obj)); } return Status::OK(); @@ -145,7 +170,11 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { switch (rtype) { case INT32: - return VisitVector_Int32(x, size, std::forward(func)); + return VisitRPrimitiveVector( + x, size, std::forward(func)); + case FLOAT64: + return VisitRPrimitiveVector( + x, size, std::forward(func)); default: break; } @@ -177,10 +206,12 @@ class RPrimitiveConverter> // Temporary (this only handles int32 for now) template class RPrimitiveConverter< - T, enable_if_t::value && - (is_boolean_type::value || is_number_type::value || - is_decimal_type::value || is_date_type::value || - is_time_type::value)>> : public PrimitiveConverter { + T, + enable_if_t< + !std::is_same::value && !std::is_same::value && + (is_boolean_type::value || is_number_type::value || + is_decimal_type::value || is_date_type::value || is_time_type::value)>> + : public PrimitiveConverter { public: Status Append(RObject* value) { return Status::NotImplemented("conversion to fixed size binary not yet implemented"); @@ -188,7 +219,8 @@ class RPrimitiveConverter< }; template -class RPrimitiveConverter::value>> +class RPrimitiveConverter::value || + std::is_same::value>> : public PrimitiveConverter { public: Status Append(RObject* value) { From 89e77f5ce6e9278d0f07ae4b61d9784b91811b18 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 13 Nov 2020 15:28:54 +0100 Subject: [PATCH 05/82] + raw --- r/src/r_to_arrow.cpp | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index fa54bfb2a6a..e6690fd66bb 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -137,6 +137,17 @@ class RValue { // TODO: improve error return Status::Invalid("invalid conversion"); } + + static Result Convert(const UInt8Type*, const RConversionOptions&, + RObject* value) { + // TODO: handle conversion from other types + if (value->rtype == UINT8) { + return *reinterpret_cast(value->data); + } + + // TODO: improve error + return Status::Invalid("invalid conversion"); + } }; template @@ -152,6 +163,11 @@ bool is_NA(double value) { return ISNA(value); } +template <> +bool is_NA(uint8_t value) { + return false; +} + template inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { RObject obj{rtype, nullptr, false}; @@ -169,6 +185,9 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { RVectorType rtype = GetVectorType(x); switch (rtype) { + case UINT8: + return VisitRPrimitiveVector( + x, size, std::forward(func)); case INT32: return VisitRPrimitiveVector( x, size, std::forward(func)); @@ -209,6 +228,7 @@ class RPrimitiveConverter< T, enable_if_t< !std::is_same::value && !std::is_same::value && + !std::is_same::value && (is_boolean_type::value || is_number_type::value || is_decimal_type::value || is_date_type::value || is_time_type::value)>> : public PrimitiveConverter { @@ -220,7 +240,8 @@ class RPrimitiveConverter< template class RPrimitiveConverter::value || - std::is_same::value>> + std::is_same::value || + std::is_same::value>> : public PrimitiveConverter { public: Status Append(RObject* value) { From 5d9adf5b9145073d5a2d9e4c33f45582965de3c2 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 23 Nov 2020 12:07:51 +0100 Subject: [PATCH 06/82] rename RObject -> RScalar, + handling of bool --- r/src/r_to_arrow.cpp | 73 +++++++++++++++++++++++++++----------------- 1 file changed, 45 insertions(+), 28 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index e6690fd66bb..b6854eeb020 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -104,7 +104,7 @@ RVectorType GetVectorType(SEXP x) { return OTHER; } -struct RObject { +struct RScalar { RVectorType rtype; void* data; bool null; @@ -112,12 +112,12 @@ struct RObject { class RValue { public: - static bool IsNull(RObject* obj) { return obj->null; } + static bool IsNull(RScalar* obj) { return obj->null; } // TODO: generalise static Result Convert(const Int32Type*, const RConversionOptions&, - RObject* value) { + RScalar* value) { // TODO: handle conversion from other types if (value->rtype == INT32) { return *reinterpret_cast(value->data); @@ -127,8 +127,18 @@ class RValue { return Status::Invalid("invalid conversion"); } + static Result Convert(const BooleanType*, const RConversionOptions&, + RScalar* value) { + if (value->rtype == BOOLEAN) { + return *reinterpret_cast(value->data); + } + + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + static Result Convert(const DoubleType*, const RConversionOptions&, - RObject* value) { + RScalar* value) { // TODO: handle conversion from other types if (value->rtype == FLOAT64) { return *reinterpret_cast(value->data); @@ -139,7 +149,7 @@ class RValue { } static Result Convert(const UInt8Type*, const RConversionOptions&, - RObject* value) { + RScalar* value) { // TODO: handle conversion from other types if (value->rtype == UINT8) { return *reinterpret_cast(value->data); @@ -168,9 +178,14 @@ bool is_NA(uint8_t value) { return false; } +template <> +bool is_NA(cpp11::r_bool value) { + return false; +} + template inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { - RObject obj{rtype, nullptr, false}; + RScalar obj{rtype, nullptr, false}; cpp11::r_vector values(x); for (T value : values) { obj.data = reinterpret_cast(&value); @@ -185,6 +200,9 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { RVectorType rtype = GetVectorType(x); switch (rtype) { + case BOOLEAN: + return VisitRPrimitiveVector( + x, size, std::forward(func)); case UINT8: return VisitRPrimitiveVector( x, size, std::forward(func)); @@ -201,7 +219,7 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { return Status::OK(); } -using RConverter = Converter; +using RConverter = Converter; template class RPrimitiveConverter; @@ -217,34 +235,33 @@ template class RPrimitiveConverter> : public PrimitiveConverter { public: - Status Append(RObject* value) override { + Status Append(RScalar* value) override { return this->primitive_builder_->AppendNull(); } }; -// Temporary (this only handles int32 for now) template class RPrimitiveConverter< - T, - enable_if_t< - !std::is_same::value && !std::is_same::value && - !std::is_same::value && - (is_boolean_type::value || is_number_type::value || - is_decimal_type::value || is_date_type::value || is_time_type::value)>> + T, enable_if_t::value && + !std::is_same::value && + !std::is_same::value && !is_boolean_type::value && + (is_number_type::value || is_decimal_type::value || + is_date_type::value || is_time_type::value)>> : public PrimitiveConverter { public: - Status Append(RObject* value) { + Status Append(RScalar* value) { return Status::NotImplemented("conversion to fixed size binary not yet implemented"); } }; template -class RPrimitiveConverter::value || - std::is_same::value || - std::is_same::value>> +class RPrimitiveConverter< + T, + enable_if_t::value || std::is_same::value || + std::is_same::value || is_boolean_type::value>> : public PrimitiveConverter { public: - Status Append(RObject* value) { + Status Append(RScalar* value) { if (RValue::IsNull(value)) { return this->primitive_builder_->AppendNull(); } else { @@ -260,7 +277,7 @@ template class RPrimitiveConverter> : public PrimitiveConverter { public: - Status Append(RObject* value) { + Status Append(RScalar* value) { return Status::NotImplemented("conversion to binary not yet implemented"); } }; @@ -269,7 +286,7 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RObject* value) { + Status Append(RScalar* value) { return Status::NotImplemented("conversion to fixed size binary not yet implemented"); } }; @@ -278,7 +295,7 @@ template class RPrimitiveConverter> : public PrimitiveConverter { public: - Status Append(RObject* value) { + Status Append(RScalar* value) { return Status::NotImplemented("conversion to string not yet implemented"); } }; @@ -288,7 +305,7 @@ class RPrimitiveConverter< T, enable_if_t::value || is_duration_type::value>> : public PrimitiveConverter { public: - Status Append(RObject* value) { + Status Append(RScalar* value) { return Status::NotImplemented( "conversion to timestamp or duration not yet implemented"); } @@ -310,7 +327,7 @@ class RListConverter; template class RDictionaryConverter : public DictionaryConverter { public: - Status Append(RObject* value) { return Status::OK(); } + Status Append(RScalar* value) { return Status::OK(); } }; class RStructConverter; @@ -333,7 +350,7 @@ struct RConverterTrait> { template class RListConverter : public ListConverter { public: - Status Append(RObject* value) { return Status::OK(); } + Status Append(RScalar* value) { return Status::OK(); } }; template <> @@ -343,7 +360,7 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: - Status Append(RObject* value) override { return Status::OK(); } + Status Append(RScalar* value) override { return Status::OK(); } protected: Status Init(MemoryPool* pool) override { @@ -376,7 +393,7 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { StopIfNotOk(converter->Reserve(options.size)); StopIfNotOk(VisitVector(x, options.size, - [&converter](RObject* obj) { return converter->Append(obj); })); + [&converter](RScalar* obj) { return converter->Append(obj); })); return ValueOrStop(converter->ToArray()); } From eeb50c7fa0e951af40989150143bdc6f031b825f Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 23 Nov 2020 13:42:24 +0100 Subject: [PATCH 07/82] skeleton for RPrimitiveConverter for cases: is_number_type::value is_boolean_type::value is_date_type::value is_time_type::value is_decimal_type::value> Most RScalar::Convert() not implemented yet --- r/src/r_to_arrow.cpp | 124 +++++++++++++++++++++++++++++++++---------- 1 file changed, 97 insertions(+), 27 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index b6854eeb020..d01f2c575bf 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -116,23 +116,24 @@ class RValue { // TODO: generalise - static Result Convert(const Int32Type*, const RConversionOptions&, - RScalar* value) { - // TODO: handle conversion from other types - if (value->rtype == INT32) { - return *reinterpret_cast(value->data); + static Result Convert(const BooleanType*, const RConversionOptions&, + RScalar* value) { + if (value->rtype == BOOLEAN) { + return *reinterpret_cast(value->data); } // TODO: improve error return Status::Invalid("invalid conversion"); } - static Result Convert(const BooleanType*, const RConversionOptions&, - RScalar* value) { - if (value->rtype == BOOLEAN) { - return *reinterpret_cast(value->data); - } + static Result Convert(const HalfFloatType*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + static Result Convert(const FloatType*, const RConversionOptions&, + RScalar* value) { // TODO: improve error return Status::Invalid("invalid conversion"); } @@ -158,6 +159,89 @@ class RValue { // TODO: improve error return Status::Invalid("invalid conversion"); } + + static Result Convert(const Int8Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Int16Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const UInt16Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Int32Type*, const RConversionOptions&, + RScalar* value) { + // TODO: handle conversion from other types + if (value->rtype == INT32) { + return *reinterpret_cast(value->data); + } + + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const UInt32Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Int64Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const UInt64Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Date32Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Date64Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Time32Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Time64Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Decimal128Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Result Convert(const Decimal256Type*, const RConversionOptions&, + RScalar* value) { + // TODO: improve error + return Status::Invalid("invalid conversion"); + } }; template @@ -242,23 +326,9 @@ class RPrimitiveConverter> template class RPrimitiveConverter< - T, enable_if_t::value && - !std::is_same::value && - !std::is_same::value && !is_boolean_type::value && - (is_number_type::value || is_decimal_type::value || - is_date_type::value || is_time_type::value)>> - : public PrimitiveConverter { - public: - Status Append(RScalar* value) { - return Status::NotImplemented("conversion to fixed size binary not yet implemented"); - } -}; - -template -class RPrimitiveConverter< - T, - enable_if_t::value || std::is_same::value || - std::is_same::value || is_boolean_type::value>> + T, enable_if_t::value || is_boolean_type::value || + is_date_type::value || is_time_type::value || + is_decimal_type::value>> : public PrimitiveConverter { public: Status Append(RScalar* value) { From d78999bae96f198b234a9e00ba781c62917acecc Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 23 Nov 2020 14:52:46 +0100 Subject: [PATCH 08/82] initial handling of strings --- r/src/r_to_arrow.cpp | 50 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 42 insertions(+), 8 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index d01f2c575bf..83e4bf67f92 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -242,6 +242,18 @@ class RValue { // TODO: improve error return Status::Invalid("invalid conversion"); } + + template + static enable_if_string> Convert(const T*, + const RConversionOptions&, + RScalar* value) { + if (value->rtype == STRING) { + return *reinterpret_cast(value->data); + } + + // TODO: improve error + return Status::Invalid("invalid conversion"); + } }; template @@ -267,6 +279,11 @@ bool is_NA(cpp11::r_bool value) { return false; } +template <> +bool is_NA(cpp11::r_string value) { + return value == NA_STRING; +} + template inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { RScalar obj{rtype, nullptr, false}; @@ -296,6 +313,9 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { case FLOAT64: return VisitRPrimitiveVector( x, size, std::forward(func)); + case STRING: + return VisitRPrimitiveVector( + x, size, std::forward(func)); default: break; } @@ -308,13 +328,6 @@ using RConverter = Converter; template class RPrimitiveConverter; -// TODO: this needs various versions as what python does: - -// class PyPrimitiveConverter< -// T, enable_if_t::value || is_number_type::value || -// is_decimal_type::value || is_date_type::value || -// is_time_type::value>> : public PrimitiveConverter { - template class RPrimitiveConverter> : public PrimitiveConverter { @@ -365,9 +378,30 @@ template class RPrimitiveConverter> : public PrimitiveConverter { public: + using OffsetType = typename T::offset_type; + Status Append(RScalar* value) { - return Status::NotImplemented("conversion to string not yet implemented"); + if (RValue::IsNull(value)) { + return this->primitive_builder_->AppendNull(); + } else { + ARROW_ASSIGN_OR_RAISE( + auto converted, RValue::Convert(this->primitive_type_, this->options_, value)); + + // TODO: the python implementation uses a PyBytesView class in between + // maybe useful for when we convert from a list of raw vectors + if (!IS_ASCII(converted) || !IS_UTF8(converted)) { + observed_binary_ = true; + } + + ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(XLENGTH(converted))); + this->primitive_builder_->UnsafeAppend(CHAR(converted), + static_cast(XLENGTH(converted))); + } + return Status::OK(); } + + protected: + bool observed_binary_ = false; }; template From e07795be396a46551d995bb7d250762ea16d7a4e Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 23 Nov 2020 15:37:20 +0100 Subject: [PATCH 09/82] intermediate RBytesView struct to deal with both string and binary (later) --- r/src/r_to_arrow.cpp | 43 +++++++++++++++++++++++++++++++------------ 1 file changed, 31 insertions(+), 12 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 83e4bf67f92..2fe056da92a 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -110,6 +110,27 @@ struct RScalar { bool null; }; +struct RBytesView { + const char* bytes; + R_xlen_t size; + bool is_utf8; + + Status ParseString(RScalar* value) { + if (value->rtype != STRING) { + return Status::Invalid("cannot parse string"); + } + + SEXP s = *reinterpret_cast(value->data); + bytes = CHAR(s); + size = XLENGTH(s); + + // TODO: test it + is_utf8 = true; + + return Status::OK(); + } +}; + class RValue { public: static bool IsNull(RScalar* obj) { return obj->null; } @@ -244,11 +265,10 @@ class RValue { } template - static enable_if_string> Convert(const T*, - const RConversionOptions&, - RScalar* value) { + static enable_if_string Convert(const T*, const RConversionOptions&, + RScalar* value, RBytesView& view) { if (value->rtype == STRING) { - return *reinterpret_cast(value->data); + return view.ParseString(value); } // TODO: improve error @@ -384,24 +404,23 @@ class RPrimitiveConverter> if (RValue::IsNull(value)) { return this->primitive_builder_->AppendNull(); } else { - ARROW_ASSIGN_OR_RAISE( - auto converted, RValue::Convert(this->primitive_type_, this->options_, value)); + ARROW_RETURN_NOT_OK( + RValue::Convert(this->primitive_type_, this->options_, value, view_)); - // TODO: the python implementation uses a PyBytesView class in between - // maybe useful for when we convert from a list of raw vectors - if (!IS_ASCII(converted) || !IS_UTF8(converted)) { + if (!view_.is_utf8) { observed_binary_ = true; } - ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(XLENGTH(converted))); - this->primitive_builder_->UnsafeAppend(CHAR(converted), - static_cast(XLENGTH(converted))); + ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size)); + this->primitive_builder_->UnsafeAppend(view_.bytes, + static_cast(view_.size)); } return Status::OK(); } protected: bool observed_binary_ = false; + RBytesView view_; }; template From ad26b41f086b014a63438282eef8b445d5392d1a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 23 Nov 2020 16:14:18 +0100 Subject: [PATCH 10/82] + binary/fixed binary handling --- r/src/r_to_arrow.cpp | 81 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 77 insertions(+), 4 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 2fe056da92a..cee7591e7df 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -64,6 +65,8 @@ enum RVectorType { DATE, TIME, TIMESTAMP, + BINARY, + LIST, OTHER }; @@ -95,8 +98,12 @@ RVectorType GetVectorType(SEXP x) { if (Rf_inherits(x, "data.frame")) { return DATAFRAME; } - // TODO: binary, list, POSIXlt - break; + + if (Rf_inherits(x, "arrow_binary")) { + return BINARY; + } + + return LIST; } default: break; @@ -129,6 +136,19 @@ struct RBytesView { return Status::OK(); } + + Status ParseRaw(RScalar* value) { + if (value->rtype != BINARY) { + return Status::Invalid("cannot parse binary"); + } + + SEXP raw = *reinterpret_cast(value->data); + bytes = reinterpret_cast(RAW_RO(raw)); + size = XLENGTH(raw); + is_utf8 = false; + + return Status::OK(); + } }; class RValue { @@ -274,6 +294,26 @@ class RValue { // TODO: improve error return Status::Invalid("invalid conversion"); } + + static Status Convert(const BaseBinaryType*, const RConversionOptions&, RScalar* value, + RBytesView& view) { + if (value->rtype == BINARY) { + return view.ParseRaw(value); + } + + // TODO: improve error + return Status::Invalid("invalid conversion"); + } + + static Status Convert(const FixedSizeBinaryType* type, const RConversionOptions&, + RScalar* value, RBytesView& view) { + ARROW_RETURN_NOT_OK(view.ParseRaw(value)); + if (view.size != type->byte_width()) { + return Status::Invalid("invalid size"); + } else { + return Status::OK(); + } + } }; template @@ -380,9 +420,27 @@ template class RPrimitiveConverter> : public PrimitiveConverter { public: + using OffsetType = typename T::offset_type; + Status Append(RScalar* value) { - return Status::NotImplemented("conversion to binary not yet implemented"); + if (RValue::IsNull(value)) { + this->primitive_builder_->UnsafeAppendNull(); + } else { + ARROW_RETURN_NOT_OK( + RValue::Convert(this->primitive_type_, this->options_, value, view_)); + // Since we don't know the varying length input size in advance, we need to + // reserve space in the value builder one by one. ReserveData raises CapacityError + // if the value would not fit into the array. + ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size)); + this->primitive_builder_->UnsafeAppend(view_.bytes, + static_cast(view_.size)); + } + + return Status::OK(); } + + protected: + RBytesView view_; }; template @@ -390,8 +448,23 @@ class RPrimitiveConverter::v : public PrimitiveConverter { public: Status Append(RScalar* value) { - return Status::NotImplemented("conversion to fixed size binary not yet implemented"); + if (RValue::IsNull(value)) { + this->primitive_builder_->UnsafeAppendNull(); + } else { + ARROW_RETURN_NOT_OK( + RValue::Convert(this->primitive_type_, this->options_, value, view_)); + // Since we don't know the varying length input size in advance, we need to + // reserve space in the value builder one by one. ReserveData raises CapacityError + // if the value would not fit into the array. + ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size)); + this->primitive_builder_->UnsafeAppend(view_.bytes); + } + + return Status::OK(); } + + protected: + RBytesView view_; }; template From d51f08ef36b9612971a9fc325af26221f9c069a0 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 24 Nov 2020 11:59:13 +0100 Subject: [PATCH 11/82] dictionary --- r/src/r_to_arrow.cpp | 81 +++++++++++++++++++++++++++++++++----------- 1 file changed, 62 insertions(+), 19 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index cee7591e7df..7db26f2f94f 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -67,6 +67,7 @@ enum RVectorType { TIMESTAMP, BINARY, LIST, + FACTOR, OTHER }; @@ -78,6 +79,9 @@ RVectorType GetVectorType(SEXP x) { case RAWSXP: return UINT8; case INTSXP: + if (Rf_inherits(x, "factor")) { + return FACTOR; + } return INT32; case STRSXP: return STRING; @@ -123,10 +127,6 @@ struct RBytesView { bool is_utf8; Status ParseString(RScalar* value) { - if (value->rtype != STRING) { - return Status::Invalid("cannot parse string"); - } - SEXP s = *reinterpret_cast(value->data); bytes = CHAR(s); size = XLENGTH(s); @@ -287,8 +287,12 @@ class RValue { template static enable_if_string Convert(const T*, const RConversionOptions&, RScalar* value, RBytesView& view) { - if (value->rtype == STRING) { - return view.ParseString(value); + switch (value->rtype) { + case STRING: + case FACTOR: + return view.ParseString(value); + default: + break; } // TODO: improve error @@ -356,6 +360,26 @@ inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { return Status::OK(); } +template +inline Status VisitFactor(SEXP x, R_xlen_t size, VisitorFunc&& func) { + cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol)); + SEXP* levels_ptr = const_cast(STRING_PTR_RO(levels)); + + RScalar obj{FACTOR, nullptr, false}; + cpp11::r_vector values(x); + + for (int value : values) { + if (is_NA(value)) { + obj.null = true; + } else { + obj.null = false; + obj.data = reinterpret_cast(&levels_ptr[value - 1]); + } + RETURN_NOT_OK(func(&obj)); + } + return Status::OK(); +} + template inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { RVectorType rtype = GetVectorType(x); @@ -376,11 +400,15 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { case STRING: return VisitRPrimitiveVector( x, size, std::forward(func)); + + case FACTOR: + return VisitFactor(x, size, std::forward(func)); + default: break; } - return Status::OK(); + return Status::Invalid("No visitor for R type ", rtype); } using RConverter = Converter; @@ -510,20 +538,35 @@ class RPrimitiveConverter< template class RListConverter; -// TODO: replace by various versions. The python code has 2 versions: -// -// template -// class PyDictionaryConverter> -// : public DictionaryConverter { -// -// template -// class PyDictionaryConverter> -// : public DictionaryConverter { -// template -class RDictionaryConverter : public DictionaryConverter { +class RDictionaryConverter; + +template +class RDictionaryConverter> + : public DictionaryConverter { public: - Status Append(RScalar* value) { return Status::OK(); } + Status Append(RScalar* value) override { + return Status::NotImplemented( + "dictionaries only implemented with string value types"); + } +}; + +template +class RDictionaryConverter> + : public DictionaryConverter { + public: + Status Append(RScalar* value) override { + if (RValue::IsNull(value)) { + return this->value_builder_->AppendNull(); + } else { + ARROW_RETURN_NOT_OK( + RValue::Convert(this->value_type_, this->options_, value, view_)); + return this->value_builder_->Append(view_.bytes, static_cast(view_.size)); + } + } + + protected: + RBytesView view_; }; class RStructConverter; From 1ecab028a4ab96982671d9b4390f17feb1bf9cae Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 24 Nov 2020 14:58:36 +0100 Subject: [PATCH 12/82] + list --- r/src/r_to_arrow.cpp | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 7db26f2f94f..07ee79e76e1 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -348,6 +348,11 @@ bool is_NA(cpp11::r_string value) { return value == NA_STRING; } +template <> +bool is_NA(SEXP value) { + return Rf_isNull(value); +} + template inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { RScalar obj{rtype, nullptr, false}; @@ -401,6 +406,10 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { return VisitRPrimitiveVector( x, size, std::forward(func)); + case LIST: + return VisitRPrimitiveVector( + x, size, std::forward(func)); + case FACTOR: return VisitFactor(x, size, std::forward(func)); @@ -411,6 +420,13 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { return Status::Invalid("No visitor for R type ", rtype); } +template +Status Extend(T* converter, SEXP x, R_xlen_t size) { + RETURN_NOT_OK(converter->Reserve(size)); + return VisitVector(x, size, + [&converter](RScalar* obj) { return converter->Append(obj); }); +} + using RConverter = Converter; template @@ -589,7 +605,21 @@ struct RConverterTrait> { template class RListConverter : public ListConverter { public: - Status Append(RScalar* value) { return Status::OK(); } + Status Append(RScalar* value) { + if (RValue::IsNull(value)) { + return this->list_builder_->AppendNull(); + } + + // append one element to the list + RETURN_NOT_OK(this->list_builder_->Append()); + + // append the contents through the list value converter + SEXP obj = *reinterpret_cast(value->data); + R_xlen_t size = XLENGTH(obj); + RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); + + return Extend(this->value_converter_.get(), obj, size); + } }; template <> @@ -630,9 +660,7 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { auto converter = ValueOrStop(MakeConverter( options.type, options, gc_memory_pool())); - StopIfNotOk(converter->Reserve(options.size)); - StopIfNotOk(VisitVector(x, options.size, - [&converter](RScalar* obj) { return converter->Append(obj); })); + StopIfNotOk(Extend(converter.get(), x, options.size)); return ValueOrStop(converter->ToArray()); } From 6c0dad702ffac2b412643a72ca4daff956bb5817 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 24 Nov 2020 15:41:55 +0100 Subject: [PATCH 13/82] integer64 --- r/src/r_to_arrow.cpp | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 07ee79e76e1..3a0532d4494 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -238,8 +238,13 @@ class RValue { static Result Convert(const Int64Type*, const RConversionOptions&, RScalar* value) { + // TODO: handle conversion from other types + if (value->rtype == INTEGER64) { + return *reinterpret_cast(value->data); + } + // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to int64"); } static Result Convert(const UInt64Type*, const RConversionOptions&, @@ -353,6 +358,11 @@ bool is_NA(SEXP value) { return Rf_isNull(value); } +template <> +bool is_NA(int64_t value) { + return value == NA_INT64; +} + template inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { RScalar obj{rtype, nullptr, false}; @@ -365,6 +375,18 @@ inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { return Status::OK(); } +template +inline Status VisitInt64Vector(SEXP x, R_xlen_t size, VisitorFunc&& func) { + RScalar obj{INTEGER64, nullptr, false}; + cpp11::doubles values(x); + for (double value : values) { + obj.data = reinterpret_cast(&value); + obj.null = is_NA(*reinterpret_cast(&value)); + RETURN_NOT_OK(func(&obj)); + } + return Status::OK(); +} + template inline Status VisitFactor(SEXP x, R_xlen_t size, VisitorFunc&& func) { cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol)); @@ -406,6 +428,9 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { return VisitRPrimitiveVector( x, size, std::forward(func)); + case INTEGER64: + return VisitInt64Vector(x, size, std::forward(func)); + case LIST: return VisitRPrimitiveVector( x, size, std::forward(func)); From 0164da693720a33ffd142239ba8cb7c821a50dd4 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 26 Nov 2020 11:09:00 +0100 Subject: [PATCH 14/82] struct converter --- r/src/r_to_arrow.cpp | 60 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 49 insertions(+), 11 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 3a0532d4494..442bd452f3d 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -407,9 +407,18 @@ inline Status VisitFactor(SEXP x, R_xlen_t size, VisitorFunc&& func) { return Status::OK(); } -template -inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { +template +inline Status VisitDataFrame(SEXP x, R_xlen_t size, T* converter); + +template +inline Status VisitVector(SEXP x, R_xlen_t size, T* converter) { + if (converter->type()->id() == Type::STRUCT) { + return VisitDataFrame(x, size, converter); + } + RVectorType rtype = GetVectorType(x); + auto func = [&converter](RScalar* obj) { return converter->Append(obj); }; + using VisitorFunc = decltype(func); switch (rtype) { case BOOLEAN: @@ -448,8 +457,7 @@ inline Status VisitVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { template Status Extend(T* converter, SEXP x, R_xlen_t size) { RETURN_NOT_OK(converter->Reserve(size)); - return VisitVector(x, size, - [&converter](RScalar* obj) { return converter->Append(obj); }); + return VisitVector(x, size, converter); } using RConverter = Converter; @@ -610,8 +618,6 @@ class RDictionaryConverter> RBytesView view_; }; -class RStructConverter; - template struct RConverterTrait; @@ -642,11 +648,12 @@ class RListConverter : public ListConverter { SEXP obj = *reinterpret_cast(value->data); R_xlen_t size = XLENGTH(obj); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); - return Extend(this->value_converter_.get(), obj, size); } }; +class RStructConverter; + template <> struct RConverterTrait { using type = RStructConverter; @@ -654,15 +661,46 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: - Status Append(RScalar* value) override { return Status::OK(); } + Status Append(RScalar* value) override { + return Status::NotImplemented("RStructConverter does not use Append()"); + } + + Status Reserve(int64_t additional_capacity) override { + // in contrast with StructConverter, this does not Reserve() + // on children, because it will be done as part of Visit() > Extend() + return this->builder_->Reserve(additional_capacity); + } + + Status Visit(SEXP x, R_xlen_t size) { + // iterate over columns of x + R_xlen_t n_columns = XLENGTH(x); + if (!Rf_inherits(x, "data.frame")) { + return Status::Invalid("Can only convert data frames to Struct type"); + } + + auto struct_builder = checked_cast(this->builder().get()); + for (R_xlen_t i = 0; i < size; i++) { + RETURN_NOT_OK(struct_builder->Append()); + } + + for (R_xlen_t i = 0; i < n_columns; i++) { + RETURN_NOT_OK(Extend(this->children_[i].get(), VECTOR_ELT(x, i), size)); + } + + return Status::OK(); + } protected: Status Init(MemoryPool* pool) override { - RETURN_NOT_OK((StructConverter::Init(pool))); - return Status::OK(); + return StructConverter::Init(pool); } }; +template +inline Status VisitDataFrame(SEXP x, R_xlen_t size, T* converter) { + return static_cast(converter)->Visit(x, size); +} + template <> struct RConverterTrait { template @@ -684,8 +722,8 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { auto converter = ValueOrStop(MakeConverter( options.type, options, gc_memory_pool())); - StopIfNotOk(Extend(converter.get(), x, options.size)); + return ValueOrStop(converter->ToArray()); } From 618aa0cfe87d4f074d3acaa0c372eec0ecc9b966 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 26 Nov 2020 12:17:52 +0100 Subject: [PATCH 15/82] binary --- r/src/r_to_arrow.cpp | 67 +++++++++++++++++++++++++++----------------- 1 file changed, 42 insertions(+), 25 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 442bd452f3d..6b580af86df 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -138,11 +138,17 @@ struct RBytesView { } Status ParseRaw(RScalar* value) { - if (value->rtype != BINARY) { - return Status::Invalid("cannot parse binary"); + SEXP raw; + + if (value->rtype == LIST || value->rtype == BINARY) { + raw = *reinterpret_cast(value->data); + if (TYPEOF(raw) != RAWSXP) { + return Status::Invalid("can only handle RAW vectors"); + } + } else { + return Status::NotImplemented("cannot parse binary with RBytesView::ParseRaw()"); } - SEXP raw = *reinterpret_cast(value->data); bytes = reinterpret_cast(RAW_RO(raw)); size = XLENGTH(raw); is_utf8 = false; @@ -164,7 +170,7 @@ class RValue { } // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to bool"); } static Result Convert(const HalfFloatType*, const RConversionOptions&, @@ -176,7 +182,7 @@ class RValue { static Result Convert(const FloatType*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to float"); } static Result Convert(const DoubleType*, const RConversionOptions&, @@ -187,7 +193,7 @@ class RValue { } // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to double"); } static Result Convert(const UInt8Type*, const RConversionOptions&, @@ -198,25 +204,25 @@ class RValue { } // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to uint8"); } static Result Convert(const Int8Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to int8"); } static Result Convert(const Int16Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to int16"); } static Result Convert(const UInt16Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to uint16"); } static Result Convert(const Int32Type*, const RConversionOptions&, @@ -227,13 +233,13 @@ class RValue { } // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to int32"); } static Result Convert(const UInt32Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to uint32"); } static Result Convert(const Int64Type*, const RConversionOptions&, @@ -250,43 +256,43 @@ class RValue { static Result Convert(const UInt64Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to uint64"); } static Result Convert(const Date32Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to date32"); } static Result Convert(const Date64Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to date64"); } static Result Convert(const Time32Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to time32"); } static Result Convert(const Time64Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to time64"); } static Result Convert(const Decimal128Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to decimal128"); } static Result Convert(const Decimal256Type*, const RConversionOptions&, RScalar* value) { // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to decimal256"); } template @@ -301,17 +307,25 @@ class RValue { } // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to string"); } static Status Convert(const BaseBinaryType*, const RConversionOptions&, RScalar* value, RBytesView& view) { - if (value->rtype == BINARY) { - return view.ParseRaw(value); + switch (value->rtype) { + case BINARY: + case LIST: + return view.ParseRaw(value); + + case STRING: + return Status::NotImplemented("conversion string -> binary"); + + default: + break; } // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::Invalid("invalid conversion to binary"); } static Status Convert(const FixedSizeBinaryType* type, const RConversionOptions&, @@ -319,9 +333,8 @@ class RValue { ARROW_RETURN_NOT_OK(view.ParseRaw(value)); if (view.size != type->byte_width()) { return Status::Invalid("invalid size"); - } else { - return Status::OK(); } + return Status::OK(); } }; @@ -440,6 +453,10 @@ inline Status VisitVector(SEXP x, R_xlen_t size, T* converter) { case INTEGER64: return VisitInt64Vector(x, size, std::forward(func)); + case BINARY: + return VisitRPrimitiveVector( + x, size, std::forward(func)); + case LIST: return VisitRPrimitiveVector( x, size, std::forward(func)); From 505f33b896c91770f18e0baecff5576c77d241ab Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 26 Nov 2020 15:05:05 +0100 Subject: [PATCH 16/82] date32 and date64 (only handle REALSXP backed for now) --- r/src/r_to_arrow.cpp | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 6b580af86df..660e1374008 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -58,7 +58,7 @@ enum RVectorType { UINT8, INT32, FLOAT64, - INTEGER64, + INT64, COMPLEX, STRING, DATAFRAME, @@ -88,8 +88,10 @@ RVectorType GetVectorType(SEXP x) { case CPLXSXP: return COMPLEX; case REALSXP: { - if (Rf_inherits(x, "integer64")) { - return INTEGER64; + if (Rf_inherits(x, "Date")) { + return DATE; + } else if (Rf_inherits(x, "integer64")) { + return INT64; } else if (Rf_inherits(x, "POSIXct")) { return TIMESTAMP; } else if (Rf_inherits(x, "difftime")) { @@ -245,7 +247,7 @@ class RValue { static Result Convert(const Int64Type*, const RConversionOptions&, RScalar* value) { // TODO: handle conversion from other types - if (value->rtype == INTEGER64) { + if (value->rtype == INT64) { return *reinterpret_cast(value->data); } @@ -261,13 +263,23 @@ class RValue { static Result Convert(const Date32Type*, const RConversionOptions&, RScalar* value) { + if (value->rtype == DATE) { + return static_cast(*reinterpret_cast(value->data)); + } + // TODO: improve error return Status::Invalid("invalid conversion to date32"); } static Result Convert(const Date64Type*, const RConversionOptions&, RScalar* value) { - // TODO: improve error + constexpr static int64_t kMillisecondsPerDay = 86400000; + + if (value->rtype == DATE) { + return static_cast(*reinterpret_cast(value->data) * + kMillisecondsPerDay); + } + return Status::Invalid("invalid conversion to date64"); } @@ -390,7 +402,7 @@ inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { template inline Status VisitInt64Vector(SEXP x, R_xlen_t size, VisitorFunc&& func) { - RScalar obj{INTEGER64, nullptr, false}; + RScalar obj{INT64, nullptr, false}; cpp11::doubles values(x); for (double value : values) { obj.data = reinterpret_cast(&value); @@ -446,11 +458,15 @@ inline Status VisitVector(SEXP x, R_xlen_t size, T* converter) { case FLOAT64: return VisitRPrimitiveVector( x, size, std::forward(func)); + case DATE: + return VisitRPrimitiveVector( + x, size, std::forward(func)); + case STRING: return VisitRPrimitiveVector( x, size, std::forward(func)); - case INTEGER64: + case INT64: return VisitInt64Vector(x, size, std::forward(func)); case BINARY: From 8a10ee222be5435fafaa614748c3e61944cafdc4 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 27 Nov 2020 10:27:43 +0100 Subject: [PATCH 17/82] handle null for bool --- r/src/r_to_arrow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 660e1374008..912644cbb87 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -172,7 +172,7 @@ class RValue { } // TODO: improve error - return Status::Invalid("invalid conversion to bool"); + return Status::Invalid("invalid conversion to bool, expecting a logical vector"); } static Result Convert(const HalfFloatType*, const RConversionOptions&, @@ -370,7 +370,7 @@ bool is_NA(uint8_t value) { template <> bool is_NA(cpp11::r_bool value) { - return false; + return value == NA_LOGICAL; } template <> From 63f95aec2cd05e3e2f20e79a84fed06f34f3bb8b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 27 Nov 2020 11:55:34 +0100 Subject: [PATCH 18/82] improved RValue::Convert --- r/src/r_to_arrow.cpp | 62 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 52 insertions(+), 10 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 912644cbb87..bf81d1f88b1 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -159,42 +159,84 @@ struct RBytesView { } }; +template +Result IntegerScalarToFloat32Safe(int64_t value) { + constexpr int64_t kFloatMax = 1LL << 24; + constexpr int64_t kFloatMin = -(1LL << 24); + + if (value < kFloatMin || value > kFloatMax) { + return Status::Invalid("Integer value ", value, " is outside of the range exactly", + " representable by a IEEE 754 single precision value"); + } + return static_cast(value); +} + +template +Result IntegerScalarToDoubleSafe(int64_t value) { + constexpr int64_t kDoubleMax = 1LL << 53; + constexpr int64_t kDoubleMin = -(1LL << 53); + + if (value < kDoubleMin || value > kDoubleMax) { + return Status::Invalid("Integer value ", value, " is outside of the range exactly", + " representable by a IEEE 754 double precision value"); + } + return static_cast(value); +} + class RValue { public: static bool IsNull(RScalar* obj) { return obj->null; } - // TODO: generalise - static Result Convert(const BooleanType*, const RConversionOptions&, RScalar* value) { if (value->rtype == BOOLEAN) { return *reinterpret_cast(value->data); } - // TODO: improve error return Status::Invalid("invalid conversion to bool, expecting a logical vector"); } static Result Convert(const HalfFloatType*, const RConversionOptions&, RScalar* value) { - // TODO: improve error - return Status::Invalid("invalid conversion"); + return Status::NotImplemented("conversion to half float from R not implemented"); } static Result Convert(const FloatType*, const RConversionOptions&, RScalar* value) { - // TODO: improve error + switch (value->rtype) { + case FLOAT64: + return static_cast(*reinterpret_cast(value->data)); + case INT32: + return IntegerScalarToFloat32Safe(*reinterpret_cast(value->data)); + case UINT8: + return IntegerScalarToFloat32Safe( + *reinterpret_cast(value->data)); + case INT64: + return IntegerScalarToFloat32Safe( + *reinterpret_cast(value->data)); + default: + break; + } return Status::Invalid("invalid conversion to float"); } static Result Convert(const DoubleType*, const RConversionOptions&, RScalar* value) { - // TODO: handle conversion from other types - if (value->rtype == FLOAT64) { - return *reinterpret_cast(value->data); + switch (value->rtype) { + case FLOAT64: + return static_cast(*reinterpret_cast(value->data)); + case INT32: + return IntegerScalarToDoubleSafe(*reinterpret_cast(value->data)); + case UINT8: + return IntegerScalarToDoubleSafe( + *reinterpret_cast(value->data)); + case INT64: + return IntegerScalarToDoubleSafe( + *reinterpret_cast(value->data)); + default: + break; } - // TODO: improve error return Status::Invalid("invalid conversion to double"); } From c102f65f323c653f34f02d3b1e74201817036a09 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Dec 2020 12:09:23 +0100 Subject: [PATCH 19/82] enable_if_integer> RValue::Convert --- r/src/r_to_arrow.cpp | 99 +++++++++++++++++--------------------------- 1 file changed, 38 insertions(+), 61 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index bf81d1f88b1..652f0bd8428 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -183,6 +183,40 @@ Result IntegerScalarToDoubleSafe(int64_t value) { return static_cast(value); } +template +Result CIntFromRScalarImpl(int64_t value) { + if (value < std::numeric_limits::min() || value > std::numeric_limits::max()) { + return Status::Invalid("value outside of range"); + } + return static_cast(value); +} + +template <> +Result CIntFromRScalarImpl(int64_t value) { + if (value < 0) { + return Status::Invalid("value outside of range"); + } + return static_cast(value); +} + +template +Result CIntFromRScalar(RScalar* obj) { + switch (obj->rtype) { + case FLOAT64: + return CIntFromRScalarImpl(*reinterpret_cast(obj->data)); + case INT32: + return CIntFromRScalarImpl(*reinterpret_cast(obj->data)); + case UINT8: + return CIntFromRScalarImpl(*reinterpret_cast(obj->data)); + case INT64: + return CIntFromRScalarImpl(*reinterpret_cast(obj->data)); + default: + break; + } + + return Status::Invalid("Cannot convert to Int"); +} + class RValue { public: static bool IsNull(RScalar* obj) { return obj->null; } @@ -240,67 +274,10 @@ class RValue { return Status::Invalid("invalid conversion to double"); } - static Result Convert(const UInt8Type*, const RConversionOptions&, - RScalar* value) { - // TODO: handle conversion from other types - if (value->rtype == UINT8) { - return *reinterpret_cast(value->data); - } - - // TODO: improve error - return Status::Invalid("invalid conversion to uint8"); - } - - static Result Convert(const Int8Type*, const RConversionOptions&, - RScalar* value) { - // TODO: improve error - return Status::Invalid("invalid conversion to int8"); - } - - static Result Convert(const Int16Type*, const RConversionOptions&, - RScalar* value) { - // TODO: improve error - return Status::Invalid("invalid conversion to int16"); - } - - static Result Convert(const UInt16Type*, const RConversionOptions&, - RScalar* value) { - // TODO: improve error - return Status::Invalid("invalid conversion to uint16"); - } - - static Result Convert(const Int32Type*, const RConversionOptions&, - RScalar* value) { - // TODO: handle conversion from other types - if (value->rtype == INT32) { - return *reinterpret_cast(value->data); - } - - // TODO: improve error - return Status::Invalid("invalid conversion to int32"); - } - - static Result Convert(const UInt32Type*, const RConversionOptions&, - RScalar* value) { - // TODO: improve error - return Status::Invalid("invalid conversion to uint32"); - } - - static Result Convert(const Int64Type*, const RConversionOptions&, - RScalar* value) { - // TODO: handle conversion from other types - if (value->rtype == INT64) { - return *reinterpret_cast(value->data); - } - - // TODO: improve error - return Status::Invalid("invalid conversion to int64"); - } - - static Result Convert(const UInt64Type*, const RConversionOptions&, - RScalar* value) { - // TODO: improve error - return Status::Invalid("invalid conversion to uint64"); + template + static enable_if_integer> Convert( + const T*, const RConversionOptions&, RScalar* value) { + return CIntFromRScalar(value); } static Result Convert(const Date32Type*, const RConversionOptions&, From 6f41b74bfa83f13bbe19349592a30f950e27a6f3 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Dec 2020 12:22:24 +0100 Subject: [PATCH 20/82] DATE -> DATE_INT + DATE_DBL because dates in R can be either backed by integers or double --- r/src/r_to_arrow.cpp | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 652f0bd8428..dae4ee43127 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -62,7 +62,8 @@ enum RVectorType { COMPLEX, STRING, DATAFRAME, - DATE, + DATE_INT, + DATE_DBL, TIME, TIMESTAMP, BINARY, @@ -81,6 +82,8 @@ RVectorType GetVectorType(SEXP x) { case INTSXP: if (Rf_inherits(x, "factor")) { return FACTOR; + } else if (Rf_inherits(x, "Date")) { + return DATE_INT; } return INT32; case STRSXP: @@ -89,7 +92,7 @@ RVectorType GetVectorType(SEXP x) { return COMPLEX; case REALSXP: { if (Rf_inherits(x, "Date")) { - return DATE; + return DATE_DBL; } else if (Rf_inherits(x, "integer64")) { return INT64; } else if (Rf_inherits(x, "POSIXct")) { @@ -282,11 +285,15 @@ class RValue { static Result Convert(const Date32Type*, const RConversionOptions&, RScalar* value) { - if (value->rtype == DATE) { - return static_cast(*reinterpret_cast(value->data)); + switch (value->rtype) { + case DATE_DBL: + return static_cast(*reinterpret_cast(value->data)); + case DATE_INT: + return *reinterpret_cast(value->data); + default: + break; } - // TODO: improve error return Status::Invalid("invalid conversion to date32"); } @@ -294,7 +301,7 @@ class RValue { RScalar* value) { constexpr static int64_t kMillisecondsPerDay = 86400000; - if (value->rtype == DATE) { + if (value->rtype == DATE_DBL) { return static_cast(*reinterpret_cast(value->data) * kMillisecondsPerDay); } @@ -477,8 +484,11 @@ inline Status VisitVector(SEXP x, R_xlen_t size, T* converter) { case FLOAT64: return VisitRPrimitiveVector( x, size, std::forward(func)); - case DATE: - return VisitRPrimitiveVector( + case DATE_DBL: + return VisitRPrimitiveVector( + x, size, std::forward(func)); + case DATE_INT: + return VisitRPrimitiveVector( x, size, std::forward(func)); case STRING: From ea97e21cab11697a6125e2184d7529b16bd5786a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Dec 2020 14:08:56 +0100 Subject: [PATCH 21/82] + RScalar_to_days (handling date int&dbl for now) --- r/src/r_to_arrow.cpp | 40 ++++++++++++++++++++++++---------------- 1 file changed, 24 insertions(+), 16 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index dae4ee43127..15a30f4784c 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -220,6 +220,22 @@ Result CIntFromRScalar(RScalar* obj) { return Status::Invalid("Cannot convert to Int"); } +Status RScalar_to_days(RScalar* value, int32_t* days) { + switch (value->rtype) { + case DATE_DBL: { + *days = static_cast(*reinterpret_cast(value->data)); + return Status::OK(); + } + case DATE_INT: { + *days = *reinterpret_cast(value->data); + return Status::OK(); + } + default: + break; + } + return Status::Invalid("invalid conversion to Date"); +} + class RValue { public: static bool IsNull(RScalar* obj) { return obj->null; } @@ -285,28 +301,20 @@ class RValue { static Result Convert(const Date32Type*, const RConversionOptions&, RScalar* value) { - switch (value->rtype) { - case DATE_DBL: - return static_cast(*reinterpret_cast(value->data)); - case DATE_INT: - return *reinterpret_cast(value->data); - default: - break; - } - - return Status::Invalid("invalid conversion to date32"); + int32_t days; + RETURN_NOT_OK(RScalar_to_days(value, &days)); + return days; } static Result Convert(const Date64Type*, const RConversionOptions&, RScalar* value) { constexpr static int64_t kMillisecondsPerDay = 86400000; - if (value->rtype == DATE_DBL) { - return static_cast(*reinterpret_cast(value->data) * - kMillisecondsPerDay); - } + // first truncate to a number of days since epoch and then convert to milliseconds + int32_t days; + RETURN_NOT_OK(RScalar_to_days(value, &days)); - return Status::Invalid("invalid conversion to date64"); + return static_cast(days) * kMillisecondsPerDay; } static Result Convert(const Time32Type*, const RConversionOptions&, @@ -488,7 +496,7 @@ inline Status VisitVector(SEXP x, R_xlen_t size, T* converter) { return VisitRPrimitiveVector( x, size, std::forward(func)); case DATE_INT: - return VisitRPrimitiveVector( + return VisitRPrimitiveVector( x, size, std::forward(func)); case STRING: From 09dd5a9b42dc2d26bbcc88bb44365a64224085d8 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Dec 2020 14:09:24 +0100 Subject: [PATCH 22/82] using this->struct_builder_ --- r/src/r_to_arrow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 15a30f4784c..06d64e10403 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -748,7 +748,7 @@ class RStructConverter : public StructConverter { return Status::Invalid("Can only convert data frames to Struct type"); } - auto struct_builder = checked_cast(this->builder().get()); + auto struct_builder = this->struct_builder_; for (R_xlen_t i = 0; i < size; i++) { RETURN_NOT_OK(struct_builder->Append()); } From 39bdaa452ce5545750022ea1c08b746d48ab48c0 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Dec 2020 14:34:54 +0100 Subject: [PATCH 23/82] POSIXct -> date32 and date64 --- r/src/r_to_arrow.cpp | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 06d64e10403..955627c8af8 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -65,7 +65,7 @@ enum RVectorType { DATE_INT, DATE_DBL, TIME, - TIMESTAMP, + POSIXCT, BINARY, LIST, FACTOR, @@ -96,7 +96,7 @@ RVectorType GetVectorType(SEXP x) { } else if (Rf_inherits(x, "integer64")) { return INT64; } else if (Rf_inherits(x, "POSIXct")) { - return TIMESTAMP; + return POSIXCT; } else if (Rf_inherits(x, "difftime")) { return TIME; } else { @@ -221,6 +221,8 @@ Result CIntFromRScalar(RScalar* obj) { } Status RScalar_to_days(RScalar* value, int32_t* days) { + constexpr int64_t kSecondsPerDay = 86400; + switch (value->rtype) { case DATE_DBL: { *days = static_cast(*reinterpret_cast(value->data)); @@ -230,6 +232,11 @@ Status RScalar_to_days(RScalar* value, int32_t* days) { *days = *reinterpret_cast(value->data); return Status::OK(); } + case POSIXCT: { + *days = *reinterpret_cast(value->data) / kSecondsPerDay; + return Status::OK(); + } + default: break; } From 488a0820e01efa88617a8666bb0bdc28e425f9d1 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Dec 2020 16:25:31 +0100 Subject: [PATCH 24/82] time32 + time64 --- r/src/r_to_arrow.cpp | 77 +++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 73 insertions(+), 4 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 955627c8af8..fab34efcfc7 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -220,6 +220,11 @@ Result CIntFromRScalar(RScalar* obj) { return Status::Invalid("Cannot convert to Int"); } +struct DiffTimeData { + double data; + int multiplier; +}; + Status RScalar_to_days(RScalar* value, int32_t* days) { constexpr int64_t kSecondsPerDay = 86400; @@ -324,15 +329,42 @@ class RValue { return static_cast(days) * kMillisecondsPerDay; } - static Result Convert(const Time32Type*, const RConversionOptions&, + static Result Convert(const Time32Type* type, const RConversionOptions&, RScalar* value) { - // TODO: improve error + if (value->rtype == TIME) { + DiffTimeData* data = reinterpret_cast(value->data); + auto seconds = data->data * data->multiplier; + switch (type->unit()) { + case TimeUnit::SECOND: + return seconds; + case TimeUnit::MILLI: + return seconds * 1000; + default: + return Status::Invalid("invalid time unit"); + } + } + return Status::Invalid("invalid conversion to time32"); } - static Result Convert(const Time64Type*, const RConversionOptions&, + static Result Convert(const Time64Type* type, const RConversionOptions&, RScalar* value) { - // TODO: improve error + constexpr int64_t kMicroSeconds = 1000000; + constexpr int64_t kNanoSeconds = 1000000000; + + if (value->rtype == TIME) { + DiffTimeData* data = reinterpret_cast(value->data); + auto seconds = data->data * data->multiplier; + switch (type->unit()) { + case TimeUnit::MICRO: + return seconds * kMicroSeconds; + case TimeUnit::NANO: + return seconds * kNanoSeconds; + default: + return Status::Invalid("invalid time unit"); + } + } + return Status::Invalid("invalid conversion to time64"); } @@ -473,6 +505,40 @@ inline Status VisitFactor(SEXP x, R_xlen_t size, VisitorFunc&& func) { return Status::OK(); } +Status GetDifftimeMultiplier(SEXP obj, int* res) { + std::string unit(CHAR(STRING_ELT(Rf_getAttrib(obj, symbols::units), 0))); + if (unit == "secs") { + *res = 1; + } else if (unit == "mins") { + *res = 60; + } else if (unit == "hours") { + *res = 3600; + } else if (unit == "days") { + *res = 86400; + } else if (unit == "weeks") { + *res = 604800; + } else { + return Status::Invalid("unknown difftime unit"); + } + return Status::OK(); +} + +template +inline Status VisitDifftime(SEXP x, R_xlen_t size, VisitorFunc&& func) { + DiffTimeData scalar; + RETURN_NOT_OK(GetDifftimeMultiplier(x, &scalar.multiplier)); + + RScalar obj{TIME, reinterpret_cast(&scalar), false}; + cpp11::doubles values(x); + + for (double value : values) { + scalar.data = value; + obj.null = is_NA(value); + RETURN_NOT_OK(func(&obj)); + } + return Status::OK(); +} + template inline Status VisitDataFrame(SEXP x, R_xlen_t size, T* converter); @@ -524,6 +590,9 @@ inline Status VisitVector(SEXP x, R_xlen_t size, T* converter) { case FACTOR: return VisitFactor(x, size, std::forward(func)); + case TIME: + return VisitDifftime(x, size, std::forward(func)); + default: break; } From a674b709113a4edea066c2c470c02f5ce42e230b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Dec 2020 17:11:58 +0100 Subject: [PATCH 25/82] timestamp --- r/src/r_to_arrow.cpp | 48 ++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index fab34efcfc7..9ff678f169b 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -347,6 +347,27 @@ class RValue { return Status::Invalid("invalid conversion to time32"); } + static Result Convert(const TimestampType* type, const RConversionOptions&, + RScalar* value) { + if (value->rtype == POSIXCT) { + auto seconds = *reinterpret_cast(value->data); + switch (type->unit()) { + case TimeUnit::SECOND: + return seconds; + case TimeUnit::MILLI: + return seconds * 1000; + case TimeUnit::MICRO: + return seconds * 1000000; + case TimeUnit::NANO: + return seconds * 1000000000; + default: + return Status::Invalid("invalid time unit"); + } + } + + return Status::Invalid("invalid conversion to timestamp"); + } + static Result Convert(const Time64Type* type, const RConversionOptions&, RScalar* value) { constexpr int64_t kMicroSeconds = 1000000; @@ -593,6 +614,10 @@ inline Status VisitVector(SEXP x, R_xlen_t size, T* converter) { case TIME: return VisitDifftime(x, size, std::forward(func)); + case POSIXCT: + return VisitRPrimitiveVector( + x, size, std::forward(func)); + default: break; } @@ -639,6 +664,22 @@ class RPrimitiveConverter< } }; +template +class RPrimitiveConverter::value>> + : public PrimitiveConverter { + public: + Status Append(RScalar* value) { + if (RValue::IsNull(value)) { + return this->primitive_builder_->AppendNull(); + } else { + ARROW_ASSIGN_OR_RAISE( + auto converted, RValue::Convert(this->primitive_type_, this->options_, value)); + this->primitive_builder_->UnsafeAppend(converted); + } + return Status::OK(); + } +}; + template class RPrimitiveConverter> : public PrimitiveConverter { @@ -720,13 +761,12 @@ class RPrimitiveConverter> }; template -class RPrimitiveConverter< - T, enable_if_t::value || is_duration_type::value>> +class RPrimitiveConverter::value>> : public PrimitiveConverter { public: Status Append(RScalar* value) { - return Status::NotImplemented( - "conversion to timestamp or duration not yet implemented"); + // TODO: look in lubridate + return Status::NotImplemented("conversion to duration not yet implemented"); } }; From c2a28ba2273d70405412179f3e181ad61ba6f0e1 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 9 Dec 2020 10:36:43 +0100 Subject: [PATCH 26/82] virtual RConverter::AppendRange(start, size) + custom impl for RStructConverter --- r/src/r_to_arrow.cpp | 199 ++++++++++++++++++++++++------------------- 1 file changed, 110 insertions(+), 89 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 9ff678f169b..45fd9d7e789 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -225,6 +225,11 @@ struct DiffTimeData { int multiplier; }; +struct DataFrameRow { + SEXP data; + R_xlen_t row; +}; + Status RScalar_to_days(RScalar* value, int32_t* days) { constexpr int64_t kSecondsPerDay = 86400; @@ -483,22 +488,31 @@ bool is_NA(int64_t value) { } template -inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t size, VisitorFunc&& func) { +inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t start, R_xlen_t size, + VisitorFunc&& func) { RScalar obj{rtype, nullptr, false}; cpp11::r_vector values(x); - for (T value : values) { + auto it = values.begin() + start; + + for (R_xlen_t i = 0; i < size; i++, ++it) { + auto value = *it; obj.data = reinterpret_cast(&value); obj.null = is_NA(value); RETURN_NOT_OK(func(&obj)); } + return Status::OK(); } template -inline Status VisitInt64Vector(SEXP x, R_xlen_t size, VisitorFunc&& func) { +inline Status VisitInt64Vector(SEXP x, R_xlen_t start, R_xlen_t size, + VisitorFunc&& func) { RScalar obj{INT64, nullptr, false}; cpp11::doubles values(x); - for (double value : values) { + auto it = values.begin() + start; + + for (R_xlen_t i = 0; i < size; i++, ++it) { + double value = *it; obj.data = reinterpret_cast(&value); obj.null = is_NA(*reinterpret_cast(&value)); RETURN_NOT_OK(func(&obj)); @@ -507,14 +521,16 @@ inline Status VisitInt64Vector(SEXP x, R_xlen_t size, VisitorFunc&& func) { } template -inline Status VisitFactor(SEXP x, R_xlen_t size, VisitorFunc&& func) { +inline Status VisitFactor(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc&& func) { cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol)); SEXP* levels_ptr = const_cast(STRING_PTR_RO(levels)); RScalar obj{FACTOR, nullptr, false}; cpp11::r_vector values(x); + auto it = values.begin() + start; - for (int value : values) { + for (R_xlen_t i = 0; i < size; i++, ++it) { + int value = *it; if (is_NA(value)) { obj.null = true; } else { @@ -545,14 +561,16 @@ Status GetDifftimeMultiplier(SEXP obj, int* res) { } template -inline Status VisitDifftime(SEXP x, R_xlen_t size, VisitorFunc&& func) { +inline Status VisitDifftime(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc&& func) { DiffTimeData scalar; RETURN_NOT_OK(GetDifftimeMultiplier(x, &scalar.multiplier)); RScalar obj{TIME, reinterpret_cast(&scalar), false}; cpp11::doubles values(x); + auto it = values.begin() + start; - for (double value : values) { + for (R_xlen_t i = 0; i < size; i++, ++it) { + double value = *it; scalar.data = value; obj.null = is_NA(value); RETURN_NOT_OK(func(&obj)); @@ -560,78 +578,85 @@ inline Status VisitDifftime(SEXP x, R_xlen_t size, VisitorFunc&& func) { return Status::OK(); } -template -inline Status VisitDataFrame(SEXP x, R_xlen_t size, T* converter); +template +inline Status VisitDataFrame(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc&& func) { + DataFrameRow row({x, start}); + RScalar obj{DATAFRAME, reinterpret_cast(&row), false}; -template -inline Status VisitVector(SEXP x, R_xlen_t size, T* converter) { - if (converter->type()->id() == Type::STRUCT) { - return VisitDataFrame(x, size, converter); + for (R_xlen_t i = 0; i < size; i++) { + ++row.row; + RETURN_NOT_OK(func(&obj)); } + return Status::OK(); +} - RVectorType rtype = GetVectorType(x); - auto func = [&converter](RScalar* obj) { return converter->Append(obj); }; - using VisitorFunc = decltype(func); +class RConverter : public Converter { + public: + virtual Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) { + RETURN_NOT_OK(this->Reserve(size)); - switch (rtype) { - case BOOLEAN: - return VisitRPrimitiveVector( - x, size, std::forward(func)); - case UINT8: - return VisitRPrimitiveVector( - x, size, std::forward(func)); - case INT32: - return VisitRPrimitiveVector( - x, size, std::forward(func)); - case FLOAT64: - return VisitRPrimitiveVector( - x, size, std::forward(func)); - case DATE_DBL: - return VisitRPrimitiveVector( - x, size, std::forward(func)); - case DATE_INT: - return VisitRPrimitiveVector( - x, size, std::forward(func)); - - case STRING: - return VisitRPrimitiveVector( - x, size, std::forward(func)); + RVectorType rtype = GetVectorType(x); + auto func = [this](RScalar* obj) { return Append(obj); }; + using VisitorFunc = decltype(func); - case INT64: - return VisitInt64Vector(x, size, std::forward(func)); + switch (rtype) { + case BOOLEAN: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); + case UINT8: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); + case INT32: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); + case FLOAT64: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); + case DATE_DBL: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); + case DATE_INT: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); - case BINARY: - return VisitRPrimitiveVector( - x, size, std::forward(func)); + case STRING: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); - case LIST: - return VisitRPrimitiveVector( - x, size, std::forward(func)); + case INT64: + return VisitInt64Vector(x, start, size, + std::forward(func)); - case FACTOR: - return VisitFactor(x, size, std::forward(func)); + case BINARY: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); - case TIME: - return VisitDifftime(x, size, std::forward(func)); + case LIST: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); - case POSIXCT: - return VisitRPrimitiveVector( - x, size, std::forward(func)); + case FACTOR: + return VisitFactor(x, start, size, std::forward(func)); - default: - break; - } + case TIME: + return VisitDifftime(x, start, size, + std::forward(func)); - return Status::Invalid("No visitor for R type ", rtype); -} + case POSIXCT: + return VisitRPrimitiveVector( + x, start, size, std::forward(func)); -template -Status Extend(T* converter, SEXP x, R_xlen_t size) { - RETURN_NOT_OK(converter->Reserve(size)); - return VisitVector(x, size, converter); -} + case DATAFRAME: + return VisitDataFrame(x, start, size, + std::forward(func)); -using RConverter = Converter; + default: + break; + } + + return Status::Invalid("No visitor for R type ", rtype); + } +}; template class RPrimitiveConverter; @@ -652,7 +677,7 @@ class RPrimitiveConverter< is_decimal_type::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) { + Status Append(RScalar* value) override { if (RValue::IsNull(value)) { return this->primitive_builder_->AppendNull(); } else { @@ -668,7 +693,7 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) { + Status Append(RScalar* value) override { if (RValue::IsNull(value)) { return this->primitive_builder_->AppendNull(); } else { @@ -686,7 +711,7 @@ class RPrimitiveConverter> public: using OffsetType = typename T::offset_type; - Status Append(RScalar* value) { + Status Append(RScalar* value) override { if (RValue::IsNull(value)) { this->primitive_builder_->UnsafeAppendNull(); } else { @@ -711,7 +736,7 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) { + Status Append(RScalar* value) override { if (RValue::IsNull(value)) { this->primitive_builder_->UnsafeAppendNull(); } else { @@ -737,7 +762,7 @@ class RPrimitiveConverter> public: using OffsetType = typename T::offset_type; - Status Append(RScalar* value) { + Status Append(RScalar* value) override { if (RValue::IsNull(value)) { return this->primitive_builder_->AppendNull(); } else { @@ -764,7 +789,7 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) { + Status Append(RScalar* value) override { // TODO: look in lubridate return Status::NotImplemented("conversion to duration not yet implemented"); } @@ -822,7 +847,7 @@ struct RConverterTrait> { template class RListConverter : public ListConverter { public: - Status Append(RScalar* value) { + Status Append(RScalar* value) override { if (RValue::IsNull(value)) { return this->list_builder_->AppendNull(); } @@ -834,7 +859,7 @@ class RListConverter : public ListConverter { SEXP obj = *reinterpret_cast(value->data); R_xlen_t size = XLENGTH(obj); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); - return Extend(this->value_converter_.get(), obj, size); + return this->value_converter_.get()->AppendRange(obj, 0, size); } }; @@ -847,18 +872,19 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: + Status Append(RScalar* value) override { - return Status::NotImplemented("RStructConverter does not use Append()"); - } + if (value->rtype != DATAFRAME) { + return Status::Invalid("expecting a data frame"); + } - Status Reserve(int64_t additional_capacity) override { - // in contrast with StructConverter, this does not Reserve() - // on children, because it will be done as part of Visit() > Extend() - return this->builder_->Reserve(additional_capacity); + auto row = reinterpret_cast(value); + return AppendRange(row->data, row->row, 1); } - Status Visit(SEXP x, R_xlen_t size) { - // iterate over columns of x + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->builder_->Reserve(size)); + R_xlen_t n_columns = XLENGTH(x); if (!Rf_inherits(x, "data.frame")) { return Status::Invalid("Can only convert data frames to Struct type"); @@ -870,7 +896,7 @@ class RStructConverter : public StructConverter { } for (R_xlen_t i = 0; i < n_columns; i++) { - RETURN_NOT_OK(Extend(this->children_[i].get(), VECTOR_ELT(x, i), size)); + RETURN_NOT_OK(this->children_[i]->AppendRange(VECTOR_ELT(x, i), start, size)); } return Status::OK(); @@ -882,11 +908,6 @@ class RStructConverter : public StructConverter { } }; -template -inline Status VisitDataFrame(SEXP x, R_xlen_t size, T* converter) { - return static_cast(converter)->Visit(x, size); -} - template <> struct RConverterTrait { template @@ -908,7 +929,7 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { auto converter = ValueOrStop(MakeConverter( options.type, options, gc_memory_pool())); - StopIfNotOk(Extend(converter.get(), x, options.size)); + StopIfNotOk(converter->AppendRange(x, 0, options.size)); return ValueOrStop(converter->ToArray()); } From 772e3bef65615f806c60dd7bc997e329a93de2d0 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 9 Dec 2020 14:18:00 +0100 Subject: [PATCH 27/82] AppendRange() for RPrimitiveConverter --- r/src/r_to_arrow.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 45fd9d7e789..e264f1239bd 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -668,6 +668,10 @@ class RPrimitiveConverter> Status Append(RScalar* value) override { return this->primitive_builder_->AppendNull(); } + + Status AppendRange(SEXP, R_xlen_t start, R_xlen_t size) override { + return this->primitive_builder_->AppendNulls(size); + } }; template @@ -872,7 +876,6 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: - Status Append(RScalar* value) override { if (value->rtype != DATAFRAME) { return Status::Invalid("expecting a data frame"); From 2ef6d0b92962eb6ea2f40c7c775b82aa705180fb Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 5 Jan 2021 21:36:41 +0100 Subject: [PATCH 28/82] work in progress to use AppendRange(), currently fails to compile ... --- r/src/arrow_types.h | 2 + r/src/r_to_arrow.cpp | 578 ++++++++++++++++++++++++++++++++----------- 2 files changed, 437 insertions(+), 143 deletions(-) diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 07d490f664d..0a8fdd953a9 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -64,6 +64,8 @@ arrow::MemoryPool* gc_memory_pool(); #define DATAPTR(x) (void*)STRING_PTR(x) #endif +#define VECTOR_PTR_RO(x) ((const SEXP*)DATAPTR_RO(x)) + namespace arrow { static inline void StopIfNotOk(const Status& status) { diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index e264f1239bd..b7ff874bae4 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -73,6 +73,9 @@ enum RVectorType { OTHER }; +// this flattens out a logical type of what an R object is +// because TYPEOF() is not detailed enough +// we can't use arrow types though as there is no 1-1 mapping RVectorType GetVectorType(SEXP x) { switch (TYPEOF(x)) { case LGLSXP: @@ -220,6 +223,22 @@ Result CIntFromRScalar(RScalar* obj) { return Status::Invalid("Cannot convert to Int"); } +Result RPosixct_Convert(double seconds, const TimestampType* type) { + switch (type->unit()) { + case TimeUnit::SECOND: + return seconds; + case TimeUnit::MILLI: + return seconds * 1000; + case TimeUnit::MICRO: + return seconds * 1000000; + case TimeUnit::NANO: + return seconds * 1000000000; + default: + break; + } + return Status::Invalid("invalid time unit"); +} + struct DiffTimeData { double data; int multiplier; @@ -355,19 +374,7 @@ class RValue { static Result Convert(const TimestampType* type, const RConversionOptions&, RScalar* value) { if (value->rtype == POSIXCT) { - auto seconds = *reinterpret_cast(value->data); - switch (type->unit()) { - case TimeUnit::SECOND: - return seconds; - case TimeUnit::MILLI: - return seconds * 1000; - case TimeUnit::MICRO: - return seconds * 1000000; - case TimeUnit::NANO: - return seconds * 1000000000; - default: - return Status::Invalid("invalid time unit"); - } + return RPosixct_Convert(*reinterpret_cast(value->data), type); } return Status::Invalid("invalid conversion to timestamp"); @@ -487,6 +494,50 @@ bool is_NA(int64_t value) { return value == NA_INT64; } +template +struct RVectorVisitor { + using data_type = + typename std::conditional::value, double, T>::type; + + template + static Status Convert(SEXP x, R_xlen_t start, R_xlen_t size, + PrimitiveBuilder* primitive_builder, + ValueConverter&& value_converter) { + auto handler = [primitive_builder, value_converter](data_type value) { + ARROW_ASSIGN_OR_RAISE(auto converted, value_converter(value)); + primitive_builder->UnsafeAppend(converted); + return Status::OK(); + }; + return Visit(x, start, size, primitive_builder, handler); + } + + template + static Status Visit(SEXP x, R_xlen_t start, R_xlen_t size, + PrimitiveBuilder* primitive_builder, ValueHandler&& handler) { + cpp11::r_vector values(x); + auto it = values.begin() + start; + + for (R_xlen_t i = 0; i < size; i++, ++it) { + auto value = GetValue(*it); + + if (is_NA(value)) { + primitive_builder->UnsafeAppendNull(); + } else { + RETURN_NOT_OK(handler(value)); + } + } + + return Status::OK(); + } + + static T GetValue(data_type x) { return x; } +}; + +template <> +int64_t RVectorVisitor::GetValue(double x) { + return *reinterpret_cast(&x); +} + template inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc&& func) { @@ -592,120 +643,356 @@ inline Status VisitDataFrame(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc& class RConverter : public Converter { public: + virtual Status Append(RScalar*) { return Status::Invalid("not using Append()"); } + virtual Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) { - RETURN_NOT_OK(this->Reserve(size)); + // RETURN_NOT_OK(this->Reserve(size)); + // + // auto func = [this](RScalar* obj) { return Append(obj); }; + // using VisitorFunc = decltype(func); + // + // switch (rtype) { + // case BOOLEAN: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // case UINT8: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // case INT32: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // case FLOAT64: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // case DATE_DBL: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // case DATE_INT: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // + // case STRING: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // + // case INT64: + // return VisitInt64Vector(x, start, size, + // std::forward(func)); + // + // case BINARY: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // + // case LIST: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // + // case FACTOR: + // return VisitFactor(x, start, size, + // std::forward(func)); + // + // case TIME: + // return VisitDifftime(x, start, size, + // std::forward(func)); + // + // case POSIXCT: + // return VisitRPrimitiveVector( + // x, start, size, std::forward(func)); + // + // case DATAFRAME: + // return VisitDataFrame(x, start, size, + // std::forward(func)); + // + // default: + // break; + // } RVectorType rtype = GetVectorType(x); - auto func = [this](RScalar* obj) { return Append(obj); }; - using VisitorFunc = decltype(func); + return Status::Invalid("No visitor for R type ", rtype); + } +}; + +template +class RPrimitiveConverter; + +struct RConvert { + template + static enable_if_integer> Convert(Type*, + From from) { + return CIntFromRScalarImpl(from); + } + + template + static enable_if_t::value && + !std::is_same::value, + Result> + Convert(Type*, From from) { + constexpr int64_t kDoubleMax = 1LL << 53; + constexpr int64_t kDoubleMin = -(1LL << 53); + + if (from < kDoubleMin || from > kDoubleMax) { + return Status::Invalid("Integer value ", from, " is outside of the range exactly", + " representable by a IEEE 754 double precision value"); + } + return static_cast(from); + } + + template + static enable_if_t::value && + std::is_same::value, + Result> + Convert(Type*, From from) { + return from; + } + + template + static enable_if_t::value && + !std::is_same::value, + Result> + Convert(Type*, From from) { + constexpr int64_t kFloatMax = 1LL << 24; + constexpr int64_t kFloatMin = -(1LL << 24); + + if (from < kFloatMin || from > kFloatMax) { + return Status::Invalid("Integer value ", from, " is outside of the range exactly", + " representable by a IEEE 754 single precision value"); + } + return static_cast(from); + } + + template + static enable_if_t::value && + std::is_same::value, + Result> + Convert(Type*, From from) { + return static_cast(from); + } + + template + static enable_if_t::value, + Result> + Convert(Type*, From from) { + return Status::Invalid("Cannot convert to Half Float"); + } + + template + static enable_if_t::value, + Result> + Convert(Type*, cpp11::r_bool from) { + return from == TRUE; + } + + Result Convert(Date32Type*, int from) { return from; } + + Result Convert(Date64Type*, int from) { + constexpr int64_t kSecondsPerDay = 86400; + return from * kSecondsPerDay; + } +}; + +template +class RPrimitiveConverter> + : public PrimitiveConverter { + public: + Status AppendRange(SEXP, R_xlen_t start, R_xlen_t size) override { + return this->primitive_builder_->AppendNulls(size); + } +}; +template +class RPrimitiveConverter< + T, enable_if_t::value || is_floating_type::value>> + : public PrimitiveConverter { + public: + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); + + auto rtype = GetVectorType(x); switch (rtype) { - case BOOLEAN: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); case UINT8: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); + return AppendRangeImpl(x, start, size); case INT32: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); + return AppendRangeImpl(x, start, size); case FLOAT64: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); - case DATE_DBL: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); - case DATE_INT: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); + return AppendRangeImpl(x, start, size); + case INT64: + return AppendRangeImpl(x, start, size); - case STRING: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); + default: + break; + } + return Status::Invalid("cannot convert to integer "); + } - case INT64: - return VisitInt64Vector(x, start, size, - std::forward(func)); + private: + template + Status AppendRangeImpl(SEXP x, R_xlen_t start, R_xlen_t size) { + auto value_converter = [this](r_value_type value) { + return RConvert::Convert(this->primitive_type_, value); + }; + return RVectorVisitor::Convert(x, start, size, this->primitive_builder_, + value_converter); + } +}; - case BINARY: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); +template +class RPrimitiveConverter::value>> + : public PrimitiveConverter { + public: + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); - case LIST: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); + if (GetVectorType(x) != BOOLEAN) { + return Status::Invalid("cannot convert to boolean type "); + } - case FACTOR: - return VisitFactor(x, start, size, std::forward(func)); + auto value_converter = [this](cpp11::r_bool value) { + return RConvert::Convert(this->primitive_type_, value); + }; + return RVectorVisitor::Convert( + x, start, size, this->primitive_builder_, value_converter); + } +}; - case TIME: - return VisitDifftime(x, start, size, - std::forward(func)); +template +class RPrimitiveConverter::value>> + : public PrimitiveConverter { + public: + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); - case POSIXCT: - return VisitRPrimitiveVector( - x, start, size, std::forward(func)); + switch (GetVectorType(x)) { + case DATE_INT: + return AppendRange_Date_int(x, start, size); - case DATAFRAME: - return VisitDataFrame(x, start, size, - std::forward(func)); + case DATE_DBL: + return AppendRange_Date_dbl(x, start, size); + + case POSIXCT: + return AppendRange_Posixct(x, start, size); default: break; } - return Status::Invalid("No visitor for R type ", rtype); + return Status::Invalid("cannot convert to date type "); + } + + private: + Status AppendRange_Date_int(SEXP x, R_xlen_t start, R_xlen_t size) { + auto value_converter = [this](int value) { + return RConvert::Convert(this->primitive_type_, value); + }; + return RVectorVisitor::Convert(x, start, size, this->primitive_builder_, + value_converter); + } + + Status AppendRange_Date_dbl(SEXP x, R_xlen_t start, R_xlen_t size) { + // TODO + return Status::OK(); + } + + Status AppendRange_Posixct(SEXP x, R_xlen_t start, R_xlen_t size) { + // TODO + return Status::OK(); } }; -template -class RPrimitiveConverter; +// Status RScalar_to_days(RScalar* value, int32_t* days) { +// constexpr int64_t kSecondsPerDay = 86400; +// +// switch (value->rtype) { +// case DATE_DBL: { +// *days = static_cast(*reinterpret_cast(value->data)); +// return Status::OK(); +// } +// case DATE_INT: { +// *days = *reinterpret_cast(value->data); +// return Status::OK(); +// } +// case POSIXCT: { +// *days = *reinterpret_cast(value->data) / kSecondsPerDay; +// return Status::OK(); +// } +// +// default: +// break; +// } +// return Status::Invalid("invalid conversion to Date"); +// } + +int64_t get_TimeUnit_multiplier(TimeUnit::type unit) { + switch (unit) { + case TimeUnit::SECOND: + return 1; + case TimeUnit::MILLI: + return 1000; + case TimeUnit::MICRO: + return 1000000; + case TimeUnit::NANO: + return 1000000000; + default: + return 0; + } +} template -class RPrimitiveConverter> +class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) override { - return this->primitive_builder_->AppendNull(); - } + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); + auto rtype = GetVectorType(x); + if (rtype != TIME) { + return Status::Invalid("conversion to time from incompatible r vector type"); + } - Status AppendRange(SEXP, R_xlen_t start, R_xlen_t size) override { - return this->primitive_builder_->AppendNulls(size); + // multiplier to get the number of seconds from the value stored in the R vector + int difftime_multiplier; + RETURN_NOT_OK(GetDifftimeMultiplier(x, &difftime_multiplier)); + + // then multiply the seconds by this to match the time unit + auto multiplier = + get_TimeUnit_multiplier(this->primitive_type_->unit()) * difftime_multiplier; + + using c_type = typename T::c_type; + auto value_converter = [multiplier](double value) { + return Result(static_cast(value * multiplier)); + }; + return RVectorVisitor::Convert(x, start, size, this->primitive_builder_, + value_converter); } }; template -class RPrimitiveConverter< - T, enable_if_t::value || is_boolean_type::value || - is_date_type::value || is_time_type::value || - is_decimal_type::value>> +class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) override { - if (RValue::IsNull(value)) { - return this->primitive_builder_->AppendNull(); - } else { - ARROW_ASSIGN_OR_RAISE( - auto converted, RValue::Convert(this->primitive_type_, this->options_, value)); - return this->primitive_builder_->Append(converted); + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); + + RVectorType rtype = GetVectorType(x); + if (rtype != POSIXCT) { + return Status::Invalid("Invalid conversion to timestamp"); } - return Status::OK(); + + int64_t multiplier = get_TimeUnit_multiplier(this->primitive_type_->unit()); + + using c_type = typename T::c_type; + auto value_converter = [multiplier](double value) { + return Result(static_cast(value * multiplier)); + }; + return RVectorVisitor::Convert(x, start, size, this->primitive_builder_, + value_converter); } }; template -class RPrimitiveConverter::value>> +class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) override { - if (RValue::IsNull(value)) { - return this->primitive_builder_->AppendNull(); - } else { - ARROW_ASSIGN_OR_RAISE( - auto converted, RValue::Convert(this->primitive_type_, this->options_, value)); - this->primitive_builder_->UnsafeAppend(converted); - } - return Status::OK(); + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + return Status::NotImplemented("conversion from R to decimal"); } }; @@ -715,49 +1002,50 @@ class RPrimitiveConverter> public: using OffsetType = typename T::offset_type; - Status Append(RScalar* value) override { - if (RValue::IsNull(value)) { - this->primitive_builder_->UnsafeAppendNull(); - } else { - ARROW_RETURN_NOT_OK( - RValue::Convert(this->primitive_type_, this->options_, value, view_)); - // Since we don't know the varying length input size in advance, we need to - // reserve space in the value builder one by one. ReserveData raises CapacityError - // if the value would not fit into the array. - ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size)); - this->primitive_builder_->UnsafeAppend(view_.bytes, - static_cast(view_.size)); + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); + + RVectorType rtype = GetVectorType(x); + // TODO: handle STRSXP + if (rtype != BINARY) { + return Status::Invalid("invalid R type to convert to binary"); } - return Status::OK(); + auto handler = [this](SEXP raw) { + R_xlen_t n = XLENGTH(raw); + ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n)); + this->primitive_builder_->UnsafeAppend(RAW_RO(raw), static_cast(n)); + return Status::OK(); + }; + return RVectorVisitor::Visit(x, start, size, this->primitive_builder_, handler); } - - protected: - RBytesView view_; }; template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) override { - if (RValue::IsNull(value)) { - this->primitive_builder_->UnsafeAppendNull(); - } else { - ARROW_RETURN_NOT_OK( - RValue::Convert(this->primitive_type_, this->options_, value, view_)); - // Since we don't know the varying length input size in advance, we need to - // reserve space in the value builder one by one. ReserveData raises CapacityError - // if the value would not fit into the array. - ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size)); - this->primitive_builder_->UnsafeAppend(view_.bytes); + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); + + RVectorType rtype = GetVectorType(x); + // TODO: handle STRSXP + if (rtype != BINARY) { + return Status::Invalid("invalid R type to convert to binary"); } - return Status::OK(); - } + auto handler = [this](SEXP raw) { + R_xlen_t n = XLENGTH(raw); - protected: - RBytesView view_; + if (n != this->primitive_builder_->byte_width()) { + return Status::Invalid("invalid size"); + } + ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n)); + this->primitive_builder_->UnsafeAppend(RAW_RO(raw), n); + return Status::OK(); + }; + return RVectorVisitor::Visit(x, start, size, this->primitive_builder_, handler); + } }; template @@ -766,34 +1054,31 @@ class RPrimitiveConverter> public: using OffsetType = typename T::offset_type; - Status Append(RScalar* value) override { - if (RValue::IsNull(value)) { - return this->primitive_builder_->AppendNull(); - } else { - ARROW_RETURN_NOT_OK( - RValue::Convert(this->primitive_type_, this->options_, value, view_)); - - if (!view_.is_utf8) { - observed_binary_ = true; - } + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); - ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(view_.size)); - this->primitive_builder_->UnsafeAppend(view_.bytes, - static_cast(view_.size)); + RVectorType rtype = GetVectorType(x); + if (rtype != STRING) { + return Status::Invalid("invalid R type to convert to string"); } - return Status::OK(); - } - protected: - bool observed_binary_ = false; - RBytesView view_; + auto handler = [this](cpp11::r_string s) { + R_xlen_t n = XLENGTH(s); + ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n)); + this->primitive_builder_->UnsafeAppend(STRING_PTR_RO(s), + static_cast(n)); + return Status::OK(); + }; + return RVectorVisitor::Visit(x, start, size, + this->primitive_builder_, handler); + } }; template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status Append(RScalar* value) override { + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { // TODO: look in lubridate return Status::NotImplemented("conversion to duration not yet implemented"); } @@ -809,7 +1094,7 @@ template class RDictionaryConverter> : public DictionaryConverter { public: - Status Append(RScalar* value) override { + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { return Status::NotImplemented( "dictionaries only implemented with string value types"); } @@ -819,14 +1104,21 @@ template class RDictionaryConverter> : public DictionaryConverter { public: - Status Append(RScalar* value) override { - if (RValue::IsNull(value)) { - return this->value_builder_->AppendNull(); - } else { - ARROW_RETURN_NOT_OK( - RValue::Convert(this->value_type_, this->options_, value, view_)); - return this->value_builder_->Append(view_.bytes, static_cast(view_.size)); + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); + + RVectorType rtype = GetVectorType(x); + if (rtype != FACTOR) { + return Status::Invalid("invalid R type to convert to string"); } + + SEXP levels = Rf_getAttrib(x, Rf_install("levels")); + + auto handler = [this, levels](int value) { + SEXP s = STRING_ELT(levels, value - 1); + return this->value_builder_->Append(STRING_PTR_RO(s), XLENGTH(s)); + }; + return RVectorVisitor::Visit(x, start, size, this->value_builder_, handler); } protected: From 14646494cda62f42899a3d90f7a8e5d3dfc18e16 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 5 Jan 2021 21:46:26 +0100 Subject: [PATCH 29/82] minor fixes (still not compiling) --- r/src/r_to_arrow.cpp | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index b7ff874bae4..b3eb0d19de2 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1041,7 +1041,7 @@ class RPrimitiveConverter::v return Status::Invalid("invalid size"); } ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n)); - this->primitive_builder_->UnsafeAppend(RAW_RO(raw), n); + this->primitive_builder_->UnsafeAppend(RAW_RO(raw)); return Status::OK(); }; return RVectorVisitor::Visit(x, start, size, this->primitive_builder_, handler); @@ -1065,8 +1065,7 @@ class RPrimitiveConverter> auto handler = [this](cpp11::r_string s) { R_xlen_t n = XLENGTH(s); ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n)); - this->primitive_builder_->UnsafeAppend(STRING_PTR_RO(s), - static_cast(n)); + this->primitive_builder_->UnsafeAppend(CHAR(s), static_cast(n)); return Status::OK(); }; return RVectorVisitor::Visit(x, start, size, @@ -1116,7 +1115,7 @@ class RDictionaryConverter> auto handler = [this, levels](int value) { SEXP s = STRING_ELT(levels, value - 1); - return this->value_builder_->Append(STRING_PTR_RO(s), XLENGTH(s)); + return this->value_builder_->Append(CHAR(s)); }; return RVectorVisitor::Visit(x, start, size, this->value_builder_, handler); } From 88ea123ce79f063ad919ea0a1d71ce7fd71b4bf9 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 6 Jan 2021 09:15:11 +0100 Subject: [PATCH 30/82] RConvert::Convert() are static --- r/src/r_to_arrow.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index b3eb0d19de2..02e9f1edf5e 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -782,9 +782,9 @@ struct RConvert { return from == TRUE; } - Result Convert(Date32Type*, int from) { return from; } + static Result Convert(const Date32Type*, int from) { return from; } - Result Convert(Date64Type*, int from) { + static Result Convert(const Date64Type*, int from) { constexpr int64_t kSecondsPerDay = 86400; return from * kSecondsPerDay; } From ffc3ec175c06061afcb15742e09a090218e6eda7 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 6 Jan 2021 09:25:05 +0100 Subject: [PATCH 31/82] at least this compiles --- r/src/r_to_arrow.cpp | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 02e9f1edf5e..3a13ee1dcc7 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1111,13 +1111,14 @@ class RDictionaryConverter> return Status::Invalid("invalid R type to convert to string"); } - SEXP levels = Rf_getAttrib(x, Rf_install("levels")); + // SEXP levels = Rf_getAttrib(x, Rf_install("levels")); - auto handler = [this, levels](int value) { - SEXP s = STRING_ELT(levels, value - 1); - return this->value_builder_->Append(CHAR(s)); - }; - return RVectorVisitor::Visit(x, start, size, this->value_builder_, handler); + // auto handler = [this, levels](int value) { + // SEXP s = STRING_ELT(levels, value - 1); + // return this->value_builder_->Append(CHAR(s)); + // }; + // return RVectorVisitor::Visit(x, start, size, this->value_builder_, handler); + return Status::OK(); } protected: From a4dff163577864324c8c29bcc16d50cb850d17ec Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 6 Jan 2021 13:55:20 +0100 Subject: [PATCH 32/82] only use RVectorVisitor::Visit() --- r/src/r_to_arrow.cpp | 194 +++++++++++++++++++++++++------------------ 1 file changed, 114 insertions(+), 80 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 3a13ee1dcc7..41fbeee5503 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -499,21 +499,9 @@ struct RVectorVisitor { using data_type = typename std::conditional::value, double, T>::type; - template - static Status Convert(SEXP x, R_xlen_t start, R_xlen_t size, - PrimitiveBuilder* primitive_builder, - ValueConverter&& value_converter) { - auto handler = [primitive_builder, value_converter](data_type value) { - ARROW_ASSIGN_OR_RAISE(auto converted, value_converter(value)); - primitive_builder->UnsafeAppend(converted); - return Status::OK(); - }; - return Visit(x, start, size, primitive_builder, handler); - } - - template - static Status Visit(SEXP x, R_xlen_t start, R_xlen_t size, - PrimitiveBuilder* primitive_builder, ValueHandler&& handler) { + template + static Status Visit(SEXP x, R_xlen_t start, R_xlen_t size, AppendNull&& append_null, + AppendValue&& append_value) { cpp11::r_vector values(x); auto it = values.begin() + start; @@ -521,9 +509,9 @@ struct RVectorVisitor { auto value = GetValue(*it); if (is_NA(value)) { - primitive_builder->UnsafeAppendNull(); + RETURN_NOT_OK(append_null()); } else { - RETURN_NOT_OK(handler(value)); + RETURN_NOT_OK(append_value(value)); } } @@ -781,13 +769,6 @@ struct RConvert { Convert(Type*, cpp11::r_bool from) { return from == TRUE; } - - static Result Convert(const Date32Type*, int from) { return from; } - - static Result Convert(const Date64Type*, int from) { - constexpr int64_t kSecondsPerDay = 86400; - return from * kSecondsPerDay; - } }; template @@ -805,8 +786,6 @@ class RPrimitiveConverter< : public PrimitiveConverter { public: Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { - RETURN_NOT_OK(this->Reserve(size)); - auto rtype = GetVectorType(x); switch (rtype) { case UINT8: @@ -821,17 +800,26 @@ class RPrimitiveConverter< default: break; } - return Status::Invalid("cannot convert to integer "); + // TODO: mention T in the error + return Status::Invalid("cannot convert"); } private: template Status AppendRangeImpl(SEXP x, R_xlen_t start, R_xlen_t size) { - auto value_converter = [this](r_value_type value) { - return RConvert::Convert(this->primitive_type_, value); + RETURN_NOT_OK(this->Reserve(size)); + + auto append_value = [this](r_value_type value) { + ARROW_ASSIGN_OR_RAISE(auto converted, + RConvert::Convert(this->primitive_type_, value)); + this->primitive_builder_->UnsafeAppend(converted); + return Status::OK(); + }; + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); }; - return RVectorVisitor::Convert(x, start, size, this->primitive_builder_, - value_converter); + return RVectorVisitor::Visit(x, start, size, append_null, append_value); } }; @@ -840,17 +828,22 @@ class RPrimitiveConverter::value>> : public PrimitiveConverter { public: Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { - RETURN_NOT_OK(this->Reserve(size)); - - if (GetVectorType(x) != BOOLEAN) { - return Status::Invalid("cannot convert to boolean type "); + auto rtype = GetVectorType(x); + if (rtype != BOOLEAN) { + return Status::Invalid("cannot convert"); } + RETURN_NOT_OK(this->Reserve(size)); - auto value_converter = [this](cpp11::r_bool value) { - return RConvert::Convert(this->primitive_type_, value); + auto append_value = [this](cpp11::r_bool value) { + this->primitive_builder_->UnsafeAppend(value == 1); + return Status::OK(); + }; + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); }; - return RVectorVisitor::Convert( - x, start, size, this->primitive_builder_, value_converter); + return RVectorVisitor::Visit(x, start, size, append_null, + append_value); } }; @@ -863,10 +856,10 @@ class RPrimitiveConverter::value>> switch (GetVectorType(x)) { case DATE_INT: - return AppendRange_Date_int(x, start, size); + return AppendRange_Date(x, start, size); case DATE_DBL: - return AppendRange_Date_dbl(x, start, size); + return AppendRange_Date(x, start, size); case POSIXCT: return AppendRange_Posixct(x, start, size); @@ -879,23 +872,46 @@ class RPrimitiveConverter::value>> } private: - Status AppendRange_Date_int(SEXP x, R_xlen_t start, R_xlen_t size) { - auto value_converter = [this](int value) { - return RConvert::Convert(this->primitive_type_, value); + template + Status AppendRange_Date(SEXP x, R_xlen_t start, R_xlen_t size) { + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); + }; + auto append_value = [this](r_value_type value) { + this->primitive_builder_->UnsafeAppend(FromRDate(this->primitive_type_, value)); + return Status::OK(); }; - return RVectorVisitor::Convert(x, start, size, this->primitive_builder_, - value_converter); - } - Status AppendRange_Date_dbl(SEXP x, R_xlen_t start, R_xlen_t size) { - // TODO - return Status::OK(); + return RVectorVisitor::Visit(x, start, size, append_null, append_value); } Status AppendRange_Posixct(SEXP x, R_xlen_t start, R_xlen_t size) { - // TODO - return Status::OK(); + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); + }; + auto append_value = [this](double value) { + this->primitive_builder_->UnsafeAppend(FromPosixct(this->primitive_type_, value)); + return Status::OK(); + }; + + return RVectorVisitor::Visit(x, start, size, append_null, append_value); + } + + static int FromRDate(const Date32Type*, int from) { return from; } + + static int64_t FromRDate(const Date64Type*, int from) { + constexpr int64_t kMilliSecondsPerDay = 86400000; + return from * kMilliSecondsPerDay; } + + static int FromPosixct(const Date32Type*, double from) { + constexpr int64_t kSecondsPerDay = 86400; + return from / kSecondsPerDay; + } + + static int64_t FromPosixct(const Date64Type*, double from) { return from * 1000; } }; // Status RScalar_to_days(RScalar* value, int32_t* days) { @@ -944,7 +960,7 @@ class RPrimitiveConverter::value>> RETURN_NOT_OK(this->Reserve(size)); auto rtype = GetVectorType(x); if (rtype != TIME) { - return Status::Invalid("conversion to time from incompatible r vector type"); + return Status::Invalid("Invalid conversion to time"); } // multiplier to get the number of seconds from the value stored in the R vector @@ -955,12 +971,16 @@ class RPrimitiveConverter::value>> auto multiplier = get_TimeUnit_multiplier(this->primitive_type_->unit()) * difftime_multiplier; - using c_type = typename T::c_type; - auto value_converter = [multiplier](double value) { - return Result(static_cast(value * multiplier)); + auto append_value = [this, multiplier](double value) { + auto converted = static_cast(value * multiplier); + this->primitive_builder_->UnsafeAppend(converted); + return Status::OK(); + }; + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); }; - return RVectorVisitor::Convert(x, start, size, this->primitive_builder_, - value_converter); + return RVectorVisitor::Visit(x, start, size, append_null, append_value); } }; @@ -978,12 +998,16 @@ class RPrimitiveConverter::value>> int64_t multiplier = get_TimeUnit_multiplier(this->primitive_type_->unit()); - using c_type = typename T::c_type; - auto value_converter = [multiplier](double value) { - return Result(static_cast(value * multiplier)); + auto append_value = [this, multiplier](double value) { + auto converted = static_cast(value * multiplier); + this->primitive_builder_->UnsafeAppend(converted); + return Status::OK(); + }; + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); }; - return RVectorVisitor::Convert(x, start, size, this->primitive_builder_, - value_converter); + return RVectorVisitor::Visit(x, start, size, append_null, append_value); } }; @@ -1006,18 +1030,21 @@ class RPrimitiveConverter> RETURN_NOT_OK(this->Reserve(size)); RVectorType rtype = GetVectorType(x); - // TODO: handle STRSXP if (rtype != BINARY) { return Status::Invalid("invalid R type to convert to binary"); } - auto handler = [this](SEXP raw) { + auto append_value = [this](SEXP raw) { R_xlen_t n = XLENGTH(raw); ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n)); this->primitive_builder_->UnsafeAppend(RAW_RO(raw), static_cast(n)); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, this->primitive_builder_, handler); + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); + }; + return RVectorVisitor::Visit(x, start, size, append_null, append_value); } }; @@ -1034,7 +1061,7 @@ class RPrimitiveConverter::v return Status::Invalid("invalid R type to convert to binary"); } - auto handler = [this](SEXP raw) { + auto append_value = [this](SEXP raw) { R_xlen_t n = XLENGTH(raw); if (n != this->primitive_builder_->byte_width()) { @@ -1044,7 +1071,11 @@ class RPrimitiveConverter::v this->primitive_builder_->UnsafeAppend(RAW_RO(raw)); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, this->primitive_builder_, handler); + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); + }; + return RVectorVisitor::Visit(x, start, size, append_null, append_value); } }; @@ -1062,14 +1093,18 @@ class RPrimitiveConverter> return Status::Invalid("invalid R type to convert to string"); } - auto handler = [this](cpp11::r_string s) { + auto append_value = [this](cpp11::r_string s) { R_xlen_t n = XLENGTH(s); ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n)); this->primitive_builder_->UnsafeAppend(CHAR(s), static_cast(n)); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, - this->primitive_builder_, handler); + auto append_null = [this]() { + this->primitive_builder_->UnsafeAppendNull(); + return Status::OK(); + }; + return RVectorVisitor::Visit(x, start, size, append_null, + append_value); } }; @@ -1108,17 +1143,16 @@ class RDictionaryConverter> RVectorType rtype = GetVectorType(x); if (rtype != FACTOR) { - return Status::Invalid("invalid R type to convert to string"); + return Status::Invalid("invalid R type to convert to dictionary"); } - // SEXP levels = Rf_getAttrib(x, Rf_install("levels")); - - // auto handler = [this, levels](int value) { - // SEXP s = STRING_ELT(levels, value - 1); - // return this->value_builder_->Append(CHAR(s)); - // }; - // return RVectorVisitor::Visit(x, start, size, this->value_builder_, handler); - return Status::OK(); + SEXP levels = Rf_getAttrib(x, Rf_install("levels")); + auto append_value = [this, levels](int value) { + SEXP s = STRING_ELT(levels, value - 1); + return this->value_builder_->Append(CHAR(s)); + }; + auto append_null = [this]() { return this->value_builder_->AppendNull(); }; + return RVectorVisitor::Visit(x, start, size, append_null, append_value); } protected: From bb86b02935603051e64e9cd5913287ba5a1510f8 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 6 Jan 2021 14:08:09 +0100 Subject: [PATCH 33/82] RListConverter::AppendRange() --- r/src/r_to_arrow.cpp | 56 ++++++++++---------------------------------- 1 file changed, 13 insertions(+), 43 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 41fbeee5503..c5a96e29379 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -914,29 +914,6 @@ class RPrimitiveConverter::value>> static int64_t FromPosixct(const Date64Type*, double from) { return from * 1000; } }; -// Status RScalar_to_days(RScalar* value, int32_t* days) { -// constexpr int64_t kSecondsPerDay = 86400; -// -// switch (value->rtype) { -// case DATE_DBL: { -// *days = static_cast(*reinterpret_cast(value->data)); -// return Status::OK(); -// } -// case DATE_INT: { -// *days = *reinterpret_cast(value->data); -// return Status::OK(); -// } -// case POSIXCT: { -// *days = *reinterpret_cast(value->data) / kSecondsPerDay; -// return Status::OK(); -// } -// -// default: -// break; -// } -// return Status::Invalid("invalid conversion to Date"); -// } - int64_t get_TimeUnit_multiplier(TimeUnit::type unit) { switch (unit) { case TimeUnit::SECOND: @@ -1177,19 +1154,21 @@ struct RConverterTrait> { template class RListConverter : public ListConverter { public: - Status Append(RScalar* value) override { - if (RValue::IsNull(value)) { - return this->list_builder_->AppendNull(); - } + Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + RETURN_NOT_OK(this->Reserve(size)); - // append one element to the list - RETURN_NOT_OK(this->list_builder_->Append()); + RVectorType rtype = GetVectorType(x); + if (rtype != LIST) { + return Status::Invalid("Cannot convert to list type"); + } - // append the contents through the list value converter - SEXP obj = *reinterpret_cast(value->data); - R_xlen_t size = XLENGTH(obj); - RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); - return this->value_converter_.get()->AppendRange(obj, 0, size); + auto append_value = [this](SEXP value) { + R_xlen_t n = XLENGTH(value); + RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n)); + return this->value_converter_.get()->AppendRange(value, 0, n); + }; + auto append_null = [this]() { return this->list_builder_->AppendNull(); }; + return RVectorVisitor::Visit(x, start, size, append_null, append_value); } }; @@ -1202,15 +1181,6 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: - Status Append(RScalar* value) override { - if (value->rtype != DATAFRAME) { - return Status::Invalid("expecting a data frame"); - } - - auto row = reinterpret_cast(value); - return AppendRange(row->data, row->row, 1); - } - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { RETURN_NOT_OK(this->builder_->Reserve(size)); From c9725261d36bc92646c132def5f2a45c14349462 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 6 Jan 2021 14:13:00 +0100 Subject: [PATCH 34/82] class RConverter : public Converter --- r/src/r_to_arrow.cpp | 65 ++------------------------------------------ 1 file changed, 2 insertions(+), 63 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index c5a96e29379..d96629d106d 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -629,72 +629,11 @@ inline Status VisitDataFrame(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc& return Status::OK(); } -class RConverter : public Converter { +class RConverter : public Converter { public: - virtual Status Append(RScalar*) { return Status::Invalid("not using Append()"); } + virtual Status Append(SEXP) { return Status::Invalid("not using Append()"); } virtual Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) { - // RETURN_NOT_OK(this->Reserve(size)); - // - // auto func = [this](RScalar* obj) { return Append(obj); }; - // using VisitorFunc = decltype(func); - // - // switch (rtype) { - // case BOOLEAN: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // case UINT8: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // case INT32: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // case FLOAT64: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // case DATE_DBL: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // case DATE_INT: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // - // case STRING: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // - // case INT64: - // return VisitInt64Vector(x, start, size, - // std::forward(func)); - // - // case BINARY: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // - // case LIST: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // - // case FACTOR: - // return VisitFactor(x, start, size, - // std::forward(func)); - // - // case TIME: - // return VisitDifftime(x, start, size, - // std::forward(func)); - // - // case POSIXCT: - // return VisitRPrimitiveVector( - // x, start, size, std::forward(func)); - // - // case DATAFRAME: - // return VisitDataFrame(x, start, size, - // std::forward(func)); - // - // default: - // break; - // } - RVectorType rtype = GetVectorType(x); return Status::Invalid("No visitor for R type ", rtype); } From 376888eb20050a2efb340fc0809827bf8b133d74 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 6 Jan 2021 14:25:58 +0100 Subject: [PATCH 35/82] remove unused Rscalar concept --- r/src/r_to_arrow.cpp | 494 ++++--------------------------------------- 1 file changed, 43 insertions(+), 451 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index d96629d106d..9a0fb11dea2 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -123,339 +123,6 @@ RVectorType GetVectorType(SEXP x) { return OTHER; } -struct RScalar { - RVectorType rtype; - void* data; - bool null; -}; - -struct RBytesView { - const char* bytes; - R_xlen_t size; - bool is_utf8; - - Status ParseString(RScalar* value) { - SEXP s = *reinterpret_cast(value->data); - bytes = CHAR(s); - size = XLENGTH(s); - - // TODO: test it - is_utf8 = true; - - return Status::OK(); - } - - Status ParseRaw(RScalar* value) { - SEXP raw; - - if (value->rtype == LIST || value->rtype == BINARY) { - raw = *reinterpret_cast(value->data); - if (TYPEOF(raw) != RAWSXP) { - return Status::Invalid("can only handle RAW vectors"); - } - } else { - return Status::NotImplemented("cannot parse binary with RBytesView::ParseRaw()"); - } - - bytes = reinterpret_cast(RAW_RO(raw)); - size = XLENGTH(raw); - is_utf8 = false; - - return Status::OK(); - } -}; - -template -Result IntegerScalarToFloat32Safe(int64_t value) { - constexpr int64_t kFloatMax = 1LL << 24; - constexpr int64_t kFloatMin = -(1LL << 24); - - if (value < kFloatMin || value > kFloatMax) { - return Status::Invalid("Integer value ", value, " is outside of the range exactly", - " representable by a IEEE 754 single precision value"); - } - return static_cast(value); -} - -template -Result IntegerScalarToDoubleSafe(int64_t value) { - constexpr int64_t kDoubleMax = 1LL << 53; - constexpr int64_t kDoubleMin = -(1LL << 53); - - if (value < kDoubleMin || value > kDoubleMax) { - return Status::Invalid("Integer value ", value, " is outside of the range exactly", - " representable by a IEEE 754 double precision value"); - } - return static_cast(value); -} - -template -Result CIntFromRScalarImpl(int64_t value) { - if (value < std::numeric_limits::min() || value > std::numeric_limits::max()) { - return Status::Invalid("value outside of range"); - } - return static_cast(value); -} - -template <> -Result CIntFromRScalarImpl(int64_t value) { - if (value < 0) { - return Status::Invalid("value outside of range"); - } - return static_cast(value); -} - -template -Result CIntFromRScalar(RScalar* obj) { - switch (obj->rtype) { - case FLOAT64: - return CIntFromRScalarImpl(*reinterpret_cast(obj->data)); - case INT32: - return CIntFromRScalarImpl(*reinterpret_cast(obj->data)); - case UINT8: - return CIntFromRScalarImpl(*reinterpret_cast(obj->data)); - case INT64: - return CIntFromRScalarImpl(*reinterpret_cast(obj->data)); - default: - break; - } - - return Status::Invalid("Cannot convert to Int"); -} - -Result RPosixct_Convert(double seconds, const TimestampType* type) { - switch (type->unit()) { - case TimeUnit::SECOND: - return seconds; - case TimeUnit::MILLI: - return seconds * 1000; - case TimeUnit::MICRO: - return seconds * 1000000; - case TimeUnit::NANO: - return seconds * 1000000000; - default: - break; - } - return Status::Invalid("invalid time unit"); -} - -struct DiffTimeData { - double data; - int multiplier; -}; - -struct DataFrameRow { - SEXP data; - R_xlen_t row; -}; - -Status RScalar_to_days(RScalar* value, int32_t* days) { - constexpr int64_t kSecondsPerDay = 86400; - - switch (value->rtype) { - case DATE_DBL: { - *days = static_cast(*reinterpret_cast(value->data)); - return Status::OK(); - } - case DATE_INT: { - *days = *reinterpret_cast(value->data); - return Status::OK(); - } - case POSIXCT: { - *days = *reinterpret_cast(value->data) / kSecondsPerDay; - return Status::OK(); - } - - default: - break; - } - return Status::Invalid("invalid conversion to Date"); -} - -class RValue { - public: - static bool IsNull(RScalar* obj) { return obj->null; } - - static Result Convert(const BooleanType*, const RConversionOptions&, - RScalar* value) { - if (value->rtype == BOOLEAN) { - return *reinterpret_cast(value->data); - } - - return Status::Invalid("invalid conversion to bool, expecting a logical vector"); - } - - static Result Convert(const HalfFloatType*, const RConversionOptions&, - RScalar* value) { - return Status::NotImplemented("conversion to half float from R not implemented"); - } - - static Result Convert(const FloatType*, const RConversionOptions&, - RScalar* value) { - switch (value->rtype) { - case FLOAT64: - return static_cast(*reinterpret_cast(value->data)); - case INT32: - return IntegerScalarToFloat32Safe(*reinterpret_cast(value->data)); - case UINT8: - return IntegerScalarToFloat32Safe( - *reinterpret_cast(value->data)); - case INT64: - return IntegerScalarToFloat32Safe( - *reinterpret_cast(value->data)); - default: - break; - } - return Status::Invalid("invalid conversion to float"); - } - - static Result Convert(const DoubleType*, const RConversionOptions&, - RScalar* value) { - switch (value->rtype) { - case FLOAT64: - return static_cast(*reinterpret_cast(value->data)); - case INT32: - return IntegerScalarToDoubleSafe(*reinterpret_cast(value->data)); - case UINT8: - return IntegerScalarToDoubleSafe( - *reinterpret_cast(value->data)); - case INT64: - return IntegerScalarToDoubleSafe( - *reinterpret_cast(value->data)); - default: - break; - } - - return Status::Invalid("invalid conversion to double"); - } - - template - static enable_if_integer> Convert( - const T*, const RConversionOptions&, RScalar* value) { - return CIntFromRScalar(value); - } - - static Result Convert(const Date32Type*, const RConversionOptions&, - RScalar* value) { - int32_t days; - RETURN_NOT_OK(RScalar_to_days(value, &days)); - return days; - } - - static Result Convert(const Date64Type*, const RConversionOptions&, - RScalar* value) { - constexpr static int64_t kMillisecondsPerDay = 86400000; - - // first truncate to a number of days since epoch and then convert to milliseconds - int32_t days; - RETURN_NOT_OK(RScalar_to_days(value, &days)); - - return static_cast(days) * kMillisecondsPerDay; - } - - static Result Convert(const Time32Type* type, const RConversionOptions&, - RScalar* value) { - if (value->rtype == TIME) { - DiffTimeData* data = reinterpret_cast(value->data); - auto seconds = data->data * data->multiplier; - switch (type->unit()) { - case TimeUnit::SECOND: - return seconds; - case TimeUnit::MILLI: - return seconds * 1000; - default: - return Status::Invalid("invalid time unit"); - } - } - - return Status::Invalid("invalid conversion to time32"); - } - - static Result Convert(const TimestampType* type, const RConversionOptions&, - RScalar* value) { - if (value->rtype == POSIXCT) { - return RPosixct_Convert(*reinterpret_cast(value->data), type); - } - - return Status::Invalid("invalid conversion to timestamp"); - } - - static Result Convert(const Time64Type* type, const RConversionOptions&, - RScalar* value) { - constexpr int64_t kMicroSeconds = 1000000; - constexpr int64_t kNanoSeconds = 1000000000; - - if (value->rtype == TIME) { - DiffTimeData* data = reinterpret_cast(value->data); - auto seconds = data->data * data->multiplier; - switch (type->unit()) { - case TimeUnit::MICRO: - return seconds * kMicroSeconds; - case TimeUnit::NANO: - return seconds * kNanoSeconds; - default: - return Status::Invalid("invalid time unit"); - } - } - - return Status::Invalid("invalid conversion to time64"); - } - - static Result Convert(const Decimal128Type*, const RConversionOptions&, - RScalar* value) { - // TODO: improve error - return Status::Invalid("invalid conversion to decimal128"); - } - - static Result Convert(const Decimal256Type*, const RConversionOptions&, - RScalar* value) { - // TODO: improve error - return Status::Invalid("invalid conversion to decimal256"); - } - - template - static enable_if_string Convert(const T*, const RConversionOptions&, - RScalar* value, RBytesView& view) { - switch (value->rtype) { - case STRING: - case FACTOR: - return view.ParseString(value); - default: - break; - } - - // TODO: improve error - return Status::Invalid("invalid conversion to string"); - } - - static Status Convert(const BaseBinaryType*, const RConversionOptions&, RScalar* value, - RBytesView& view) { - switch (value->rtype) { - case BINARY: - case LIST: - return view.ParseRaw(value); - - case STRING: - return Status::NotImplemented("conversion string -> binary"); - - default: - break; - } - - // TODO: improve error - return Status::Invalid("invalid conversion to binary"); - } - - static Status Convert(const FixedSizeBinaryType* type, const RConversionOptions&, - RScalar* value, RBytesView& view) { - ARROW_RETURN_NOT_OK(view.ParseRaw(value)); - if (view.size != type->byte_width()) { - return Status::Invalid("invalid size"); - } - return Status::OK(); - } -}; - template bool is_NA(T value); @@ -526,109 +193,6 @@ int64_t RVectorVisitor::GetValue(double x) { return *reinterpret_cast(&x); } -template -inline Status VisitRPrimitiveVector(SEXP x, R_xlen_t start, R_xlen_t size, - VisitorFunc&& func) { - RScalar obj{rtype, nullptr, false}; - cpp11::r_vector values(x); - auto it = values.begin() + start; - - for (R_xlen_t i = 0; i < size; i++, ++it) { - auto value = *it; - obj.data = reinterpret_cast(&value); - obj.null = is_NA(value); - RETURN_NOT_OK(func(&obj)); - } - - return Status::OK(); -} - -template -inline Status VisitInt64Vector(SEXP x, R_xlen_t start, R_xlen_t size, - VisitorFunc&& func) { - RScalar obj{INT64, nullptr, false}; - cpp11::doubles values(x); - auto it = values.begin() + start; - - for (R_xlen_t i = 0; i < size; i++, ++it) { - double value = *it; - obj.data = reinterpret_cast(&value); - obj.null = is_NA(*reinterpret_cast(&value)); - RETURN_NOT_OK(func(&obj)); - } - return Status::OK(); -} - -template -inline Status VisitFactor(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc&& func) { - cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol)); - SEXP* levels_ptr = const_cast(STRING_PTR_RO(levels)); - - RScalar obj{FACTOR, nullptr, false}; - cpp11::r_vector values(x); - auto it = values.begin() + start; - - for (R_xlen_t i = 0; i < size; i++, ++it) { - int value = *it; - if (is_NA(value)) { - obj.null = true; - } else { - obj.null = false; - obj.data = reinterpret_cast(&levels_ptr[value - 1]); - } - RETURN_NOT_OK(func(&obj)); - } - return Status::OK(); -} - -Status GetDifftimeMultiplier(SEXP obj, int* res) { - std::string unit(CHAR(STRING_ELT(Rf_getAttrib(obj, symbols::units), 0))); - if (unit == "secs") { - *res = 1; - } else if (unit == "mins") { - *res = 60; - } else if (unit == "hours") { - *res = 3600; - } else if (unit == "days") { - *res = 86400; - } else if (unit == "weeks") { - *res = 604800; - } else { - return Status::Invalid("unknown difftime unit"); - } - return Status::OK(); -} - -template -inline Status VisitDifftime(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc&& func) { - DiffTimeData scalar; - RETURN_NOT_OK(GetDifftimeMultiplier(x, &scalar.multiplier)); - - RScalar obj{TIME, reinterpret_cast(&scalar), false}; - cpp11::doubles values(x); - auto it = values.begin() + start; - - for (R_xlen_t i = 0; i < size; i++, ++it) { - double value = *it; - scalar.data = value; - obj.null = is_NA(value); - RETURN_NOT_OK(func(&obj)); - } - return Status::OK(); -} - -template -inline Status VisitDataFrame(SEXP x, R_xlen_t start, R_xlen_t size, VisitorFunc&& func) { - DataFrameRow row({x, start}); - RScalar obj{DATAFRAME, reinterpret_cast(&row), false}; - - for (R_xlen_t i = 0; i < size; i++) { - ++row.row; - RETURN_NOT_OK(func(&obj)); - } - return Status::OK(); -} - class RConverter : public Converter { public: virtual Status Append(SEXP) { return Status::Invalid("not using Append()"); } @@ -642,13 +206,33 @@ class RConverter : public Converter { template class RPrimitiveConverter; +template +Result CIntFromRScalarImpl(int64_t value) { + if (value < std::numeric_limits::min() || value > std::numeric_limits::max()) { + return Status::Invalid("value outside of range"); + } + return static_cast(value); +} + +template <> +Result CIntFromRScalarImpl(int64_t value) { + if (value < 0) { + return Status::Invalid("value outside of range"); + } + return static_cast(value); +} + +// utility to convert R single values from (int, raw, double and int64) vectors +// to arrow integers and floating point struct RConvert { + // ---- convert to an arrow integer template static enable_if_integer> Convert(Type*, From from) { return CIntFromRScalarImpl(from); } + // ---- convert R integer types to double template static enable_if_t::value && !std::is_same::value, @@ -664,6 +248,7 @@ struct RConvert { return static_cast(from); } + // ---- convert double to double template static enable_if_t::value && std::is_same::value, @@ -672,6 +257,7 @@ struct RConvert { return from; } + // ---- convert R integer types to float template static enable_if_t::value && !std::is_same::value, @@ -687,6 +273,7 @@ struct RConvert { return static_cast(from); } + // ---- convert double to float template static enable_if_t::value && std::is_same::value, @@ -695,19 +282,13 @@ struct RConvert { return static_cast(from); } + // ---- convert to half float: not implemented template static enable_if_t::value, Result> Convert(Type*, From from) { return Status::Invalid("Cannot convert to Half Float"); } - - template - static enable_if_t::value, - Result> - Convert(Type*, cpp11::r_bool from) { - return from == TRUE; - } }; template @@ -881,7 +462,20 @@ class RPrimitiveConverter::value>> // multiplier to get the number of seconds from the value stored in the R vector int difftime_multiplier; - RETURN_NOT_OK(GetDifftimeMultiplier(x, &difftime_multiplier)); + std::string unit(CHAR(STRING_ELT(Rf_getAttrib(x, symbols::units), 0))); + if (unit == "secs") { + difftime_multiplier = 1; + } else if (unit == "mins") { + difftime_multiplier = 60; + } else if (unit == "hours") { + difftime_multiplier = 3600; + } else if (unit == "days") { + difftime_multiplier = 86400; + } else if (unit == "weeks") { + difftime_multiplier = 604800; + } else { + return Status::Invalid("unknown difftime unit"); + } // then multiply the seconds by this to match the time unit auto multiplier = @@ -1062,7 +656,7 @@ class RDictionaryConverter> return Status::Invalid("invalid R type to convert to dictionary"); } - SEXP levels = Rf_getAttrib(x, Rf_install("levels")); + cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol)); auto append_value = [this, levels](int value) { SEXP s = STRING_ELT(levels, value - 1); return this->value_builder_->Append(CHAR(s)); @@ -1070,9 +664,6 @@ class RDictionaryConverter> auto append_null = [this]() { return this->value_builder_->AppendNull(); }; return RVectorVisitor::Visit(x, start, size, append_null, append_value); } - - protected: - RBytesView view_; }; template @@ -1121,6 +712,8 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + // this specifically does not use this->Reserve() because + // children_[i]->AppendRange() below will reserve the additional capacity RETURN_NOT_OK(this->builder_->Reserve(size)); R_xlen_t n_columns = XLENGTH(x); @@ -1128,13 +721,12 @@ class RStructConverter : public StructConverter { return Status::Invalid("Can only convert data frames to Struct type"); } - auto struct_builder = this->struct_builder_; for (R_xlen_t i = 0; i < size; i++) { - RETURN_NOT_OK(struct_builder->Append()); + RETURN_NOT_OK(struct_builder_->Append()); } for (R_xlen_t i = 0; i < n_columns; i++) { - RETURN_NOT_OK(this->children_[i]->AppendRange(VECTOR_ELT(x, i), start, size)); + RETURN_NOT_OK(children_[i]->AppendRange(VECTOR_ELT(x, i), start, size)); } return Status::OK(); From d9c84e74580169ecec17537b88c96a0d53f2646b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 7 Jan 2021 10:39:25 +0100 Subject: [PATCH 36/82] reuse the short circuit code from the previous api, i.e. this does not use the Converter api, and instead reuses the memory from the R object, and manually handle the bitmap if needed --- r/src/array_from_vector.cpp | 83 ------------------------------- r/src/arrow_types.h | 2 + r/src/r_to_arrow.cpp | 98 +++++++++++++++++++++++++++++++++++-- 3 files changed, 97 insertions(+), 86 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index c2d22868535..f747ef5ec12 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -1334,89 +1334,6 @@ std::shared_ptr InferArrowType(SEXP x) { cpp11::stop("Cannot infer type from vector"); } -// in some situations we can just use the memory of the R object in an RBuffer -// instead of going through ArrayBuilder, etc ... -bool can_reuse_memory(SEXP x, const std::shared_ptr& type) { - switch (type->id()) { - case Type::INT32: - return TYPEOF(x) == INTSXP && !OBJECT(x); - case Type::DOUBLE: - return TYPEOF(x) == REALSXP && !OBJECT(x); - case Type::INT8: - return TYPEOF(x) == RAWSXP && !OBJECT(x); - case Type::INT64: - return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64"); - default: - break; - } - return false; -} - -// this is only used on some special cases when the arrow Array can just use the memory of -// the R object, via an RBuffer, hence be zero copy -template -std::shared_ptr MakeSimpleArray(SEXP x) { - using value_type = typename arrow::TypeTraits::ArrayType::value_type; - RVector vec(x); - auto n = vec.size(); - auto p_vec_start = reinterpret_cast(DATAPTR(vec)); - auto p_vec_end = p_vec_start + n; - std::vector> buffers{nullptr, - std::make_shared>(vec)}; - - int null_count = 0; - - auto first_na = std::find_if(p_vec_start, p_vec_end, is_na); - if (first_na < p_vec_end) { - auto null_bitmap = - ValueOrStop(AllocateBuffer(BitUtil::BytesForBits(n), gc_memory_pool())); - internal::FirstTimeBitmapWriter bitmap_writer(null_bitmap->mutable_data(), 0, n); - - // first loop to clear all the bits before the first NA - auto j = std::distance(p_vec_start, first_na); - int i = 0; - for (; i < j; i++, bitmap_writer.Next()) { - bitmap_writer.Set(); - } - - auto p_vec = first_na; - // then finish - for (; i < n; i++, bitmap_writer.Next(), ++p_vec) { - if (is_na(*p_vec)) { - bitmap_writer.Clear(); - null_count++; - } else { - bitmap_writer.Set(); - } - } - - bitmap_writer.Finish(); - buffers[0] = std::move(null_bitmap); - } - - auto data = ArrayData::Make(std::make_shared(), LENGTH(x), std::move(buffers), - null_count, 0 /*offset*/); - - // return the right Array class - return std::make_shared::ArrayType>(data); -} - -std::shared_ptr Array__from_vector_reuse_memory(SEXP x) { - auto type = TYPEOF(x); - - if (type == INTSXP) { - return MakeSimpleArray(x); - } else if (type == REALSXP && Rf_inherits(x, "integer64")) { - return MakeSimpleArray(x); - } else if (type == REALSXP) { - return MakeSimpleArray(x); - } else if (type == RAWSXP) { - return MakeSimpleArray(x); - } - - cpp11::stop("Unreachable: you might need to fix can_reuse_memory()"); -} - bool CheckCompatibleFactor(SEXP obj, const std::shared_ptr& type) { if (!Rf_inherits(obj, "factor")) { return false; diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 0a8fdd953a9..6cd2171852e 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -83,6 +83,8 @@ auto ValueOrStop(R&& result) -> decltype(std::forward(result).ValueOrDie()) { namespace r { std::shared_ptr InferArrowType(SEXP x); +std::shared_ptr Array__from_vector_reuse_memory(SEXP x); +bool can_reuse_memory(SEXP x, const std::shared_ptr& type); Status count_fields(SEXP lst, int* out); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 9a0fb11dea2..18f7b04925f 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -26,6 +26,7 @@ #include #include #include +#include #include #include @@ -744,19 +745,110 @@ struct RConverterTrait { using dictionary_type = RDictionaryConverter; }; +// in some situations we can just use the memory of the R object in an RBuffer +// instead of going through ArrayBuilder, etc ... +bool can_reuse_memory(SEXP x, const std::shared_ptr& type) { + // TODO: this probably should be disabled when x is an ALTREP object + // because MakeSimpleArray below will force materialization + switch (type->id()) { + case Type::INT32: + return TYPEOF(x) == INTSXP && !OBJECT(x); + case Type::DOUBLE: + return TYPEOF(x) == REALSXP && !OBJECT(x); + case Type::INT8: + return TYPEOF(x) == RAWSXP && !OBJECT(x); + case Type::INT64: + return TYPEOF(x) == REALSXP && Rf_inherits(x, "integer64"); + default: + break; + } + return false; +} + +// this is only used on some special cases when the arrow Array can just use the memory of +// the R object, via an RBuffer, hence be zero copy +template +std::shared_ptr MakeSimpleArray(SEXP x) { + using value_type = typename arrow::TypeTraits::ArrayType::value_type; + RVector vec(x); + auto n = vec.size(); + auto p_vec_start = reinterpret_cast(DATAPTR(vec)); + auto p_vec_end = p_vec_start + n; + std::vector> buffers{nullptr, + std::make_shared>(vec)}; + + int null_count = 0; + + auto first_na = std::find_if(p_vec_start, p_vec_end, is_NA); + if (first_na < p_vec_end) { + auto null_bitmap = + ValueOrStop(AllocateBuffer(BitUtil::BytesForBits(n), gc_memory_pool())); + internal::FirstTimeBitmapWriter bitmap_writer(null_bitmap->mutable_data(), 0, n); + + // first loop to clear all the bits before the first NA + auto j = std::distance(p_vec_start, first_na); + int i = 0; + for (; i < j; i++, bitmap_writer.Next()) { + bitmap_writer.Set(); + } + + auto p_vec = first_na; + // then finish + for (; i < n; i++, bitmap_writer.Next(), ++p_vec) { + if (is_NA(*p_vec)) { + bitmap_writer.Clear(); + null_count++; + } else { + bitmap_writer.Set(); + } + } + + bitmap_writer.Finish(); + buffers[0] = std::move(null_bitmap); + } + + auto data = ArrayData::Make(std::make_shared(), LENGTH(x), std::move(buffers), + null_count, 0 /*offset*/); + + // return the right Array class + return std::make_shared::ArrayType>(data); +} + +std::shared_ptr Array__from_vector_reuse_memory(SEXP x) { + auto type = TYPEOF(x); + + if (type == INTSXP) { + return MakeSimpleArray(x); + } else if (type == REALSXP && Rf_inherits(x, "integer64")) { + return MakeSimpleArray(x); + } else if (type == REALSXP) { + return MakeSimpleArray(x); + } else if (type == RAWSXP) { + return MakeSimpleArray(x); + } + + cpp11::stop("Unreachable: you might need to fix can_reuse_memory()"); +} + std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { + // short circuit if `x` is already an Array + if (Rf_inherits(x, "Array")) { + return cpp11::as_cpp>(x); + } + RConversionOptions options; options.strict = !Rf_isNull(s_type); - - std::shared_ptr type; if (options.strict) { options.type = cpp11::as_cpp>(s_type); } else { options.type = arrow::r::InferArrowType(x); } - options.size = vctrs::short_vec_size(x); + if (can_reuse_memory(x, options.type)) { + return Array__from_vector_reuse_memory(x); + } + auto converter = ValueOrStop(MakeConverter( options.type, options, gc_memory_pool())); StopIfNotOk(converter->AppendRange(x, 0, options.size)); From f302d96780f2bc950183e0e16ac4b829de31604f Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 7 Jan 2021 15:43:50 +0100 Subject: [PATCH 37/82] paths that don't use RConvert::Convert in RPrimitiveConverter< floating point | integer > --- r/src/arrow_cpp11.h | 1 + r/src/r_to_arrow.cpp | 97 +++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 92 insertions(+), 6 deletions(-) diff --git a/r/src/arrow_cpp11.h b/r/src/arrow_cpp11.h index 2329db11e99..1d0e26e1a38 100644 --- a/r/src/arrow_cpp11.h +++ b/r/src/arrow_cpp11.h @@ -23,6 +23,7 @@ #undef Free #include +#include #include "./nameof.h" diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 18f7b04925f..6fd272041b3 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -310,13 +310,13 @@ class RPrimitiveConverter< auto rtype = GetVectorType(x); switch (rtype) { case UINT8: - return AppendRangeImpl(x, start, size); + return AppendRangeDispatch(x, start, size); case INT32: - return AppendRangeImpl(x, start, size); + return AppendRangeDispatch(x, start, size); case FLOAT64: - return AppendRangeImpl(x, start, size); + return AppendRangeDispatch(x, start, size); case INT64: - return AppendRangeImpl(x, start, size); + return AppendRangeDispatch(x, start, size); default: break; @@ -327,7 +327,7 @@ class RPrimitiveConverter< private: template - Status AppendRangeImpl(SEXP x, R_xlen_t start, R_xlen_t size) { + Status AppendRangeLoopDifferentType(SEXP x, R_xlen_t start, R_xlen_t size) { RETURN_NOT_OK(this->Reserve(size)); auto append_value = [this](r_value_type value) { @@ -342,6 +342,88 @@ class RPrimitiveConverter< }; return RVectorVisitor::Visit(x, start, size, append_null, append_value); } + + template + Status AppendRangeSameTypeNotALTREP(SEXP x, R_xlen_t start, R_xlen_t size) { + const r_value_type* p = reinterpret_cast(DATAPTR_RO(x)) + start; + const r_value_type* p_end = p + size; + + auto first_na = std::find_if(p, p_end, is_NA); + + if (first_na == p_end) { + // no nulls, so we can use AppendValues() directly + return this->primitive_builder_->AppendValues(p, p_end); + } + + // Append all values up until the first NULL + RETURN_NOT_OK(this->primitive_builder_->AppendValues(p, first_na)); + + // loop for the remaining + RETURN_NOT_OK(this->primitive_builder_->Reserve(p_end - first_na)); + p = first_na; + for (; p < p_end; ++p) { + r_value_type value = *p; + if (is_NA(value)) { + this->primitive_builder_->UnsafeAppendNull(); + } else { + this->primitive_builder_->UnsafeAppend(value); + } + } + return Status::OK(); + } + + template + Status AppendRangeSameTypeALTREP(SEXP x, R_xlen_t start, R_xlen_t size) { + // if it is altrep, then we use cpp11 looping + // without needing to convert + RETURN_NOT_OK(this->primitive_builder_->Reserve(size)); + cpp11::r_vector vec(x); + auto it = vec.begin() + start; + for (R_xlen_t i = 0; i < size; i++, ++it) { + r_value_type value = *it; + if (is_NA(value)) { + this->primitive_builder_->UnsafeAppendNull(); + } else { + this->primitive_builder_->UnsafeAppend(value); + } + } + return Status::OK(); + } + + template <> + Status AppendRangeSameTypeALTREP(SEXP x, R_xlen_t start, R_xlen_t size) { + // if it is altrep, then we use cpp11 looping + // without needing to convert + RETURN_NOT_OK(this->primitive_builder_->Reserve(size)); + cpp11::r_vector vec(x); + auto it = vec.begin() + start; + for (R_xlen_t i = 0; i < size; i++, ++it) { + double d = *it; + int64_t value = *reinterpret_cast(&d); + if (is_NA(value)) { + this->primitive_builder_->UnsafeAppendNull(); + } else { + this->primitive_builder_->UnsafeAppend(value); + } + } + return Status::OK(); + } + + template + Status AppendRangeDispatch(SEXP x, R_xlen_t start, R_xlen_t size) { + if (std::is_same::value) { + + if (!ALTREP(x)) { + return AppendRangeSameTypeNotALTREP(x, start, size); + } else { + return AppendRangeSameTypeALTREP(x, start, size); + } + } + + // here if underlying types differ so going + return AppendRangeLoopDifferentType(x, start, size); + } + }; template @@ -745,6 +827,8 @@ struct RConverterTrait { using dictionary_type = RDictionaryConverter; }; +// ---- short circuit the Converter api entirely when we can do zero-copy + // in some situations we can just use the memory of the R object in an RBuffer // instead of going through ArrayBuilder, etc ... bool can_reuse_memory(SEXP x, const std::shared_ptr& type) { @@ -845,14 +929,15 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { } options.size = vctrs::short_vec_size(x); + // maybe short circuit when zero-copy is possible if (can_reuse_memory(x, options.type)) { return Array__from_vector_reuse_memory(x); } + // otherwise go through the converter api auto converter = ValueOrStop(MakeConverter( options.type, options, gc_memory_pool())); StopIfNotOk(converter->AppendRange(x, 0, options.size)); - return ValueOrStop(converter->ToArray()); } From 82bffd842ae029caf4b11f1ce91aeaae9ced9313 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 7 Jan 2021 15:53:40 +0100 Subject: [PATCH 38/82] lint --- r/src/r_to_arrow.cpp | 2 -- 1 file changed, 2 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 6fd272041b3..4d1e493d11b 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -412,7 +412,6 @@ class RPrimitiveConverter< template Status AppendRangeDispatch(SEXP x, R_xlen_t start, R_xlen_t size) { if (std::is_same::value) { - if (!ALTREP(x)) { return AppendRangeSameTypeNotALTREP(x, start, size); } else { @@ -423,7 +422,6 @@ class RPrimitiveConverter< // here if underlying types differ so going return AppendRangeLoopDifferentType(x, start, size); } - }; template From c7a6c0d0c4b88e84ea40b3cbf5b2b9552cd4a184 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 7 Jan 2021 18:43:44 +0100 Subject: [PATCH 39/82] reuse code from previous approach StringVectorConverter in RPrimitiveConverter --- r/src/r_to_arrow.cpp | 39 +++++++++++++++++++++++++-------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 4d1e493d11b..aa68569cef4 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -677,25 +677,36 @@ class RPrimitiveConverter> using OffsetType = typename T::offset_type; Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { - RETURN_NOT_OK(this->Reserve(size)); - RVectorType rtype = GetVectorType(x); if (rtype != STRING) { return Status::Invalid("invalid R type to convert to string"); } - auto append_value = [this](cpp11::r_string s) { - R_xlen_t n = XLENGTH(s); - ARROW_RETURN_NOT_OK(this->primitive_builder_->ReserveData(n)); - this->primitive_builder_->UnsafeAppend(CHAR(s), static_cast(n)); - return Status::OK(); - }; - auto append_null = [this]() { - this->primitive_builder_->UnsafeAppendNull(); - return Status::OK(); - }; - return RVectorVisitor::Visit(x, start, size, append_null, - append_value); + cpp11::strings s(arrow::r::utf8_strings(x)); + RETURN_NOT_OK(this->primitive_builder_->Reserve(s.size())); + auto it = s.begin() + start; + + // we know all the R strings are utf8 already, so we can get + // a definite size and then use UnsafeAppend*() + int64_t total_length = 0; + for (R_xlen_t i = 0; i < size; i++, ++it) { + cpp11::r_string si = *it; + total_length += cpp11::is_na(si) ? 0 : si.size(); + } + RETURN_NOT_OK(this->primitive_builder_->ReserveData(total_length)); + + // append + it = s.begin() + start; + for (R_xlen_t i = 0; i < size; i++, ++it) { + cpp11::r_string si = *it; + if (si == NA_STRING) { + this->primitive_builder_->UnsafeAppendNull(); + } else { + this->primitive_builder_->UnsafeAppend(CHAR(si), si.size()); + } + } + + return Status::OK(); } }; From 9fef8242001a8d8b32252745536bac9c5d5cc39d Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 8 Jan 2021 10:16:58 +0100 Subject: [PATCH 40/82] define DATAPTR for versions of R < 3.5 ARROW-10803 --- r/src/r_to_arrow.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index aa68569cef4..ff379817621 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -345,8 +345,8 @@ class RPrimitiveConverter< template Status AppendRangeSameTypeNotALTREP(SEXP x, R_xlen_t start, R_xlen_t size) { - const r_value_type* p = reinterpret_cast(DATAPTR_RO(x)) + start; - const r_value_type* p_end = p + size; + auto p = reinterpret_cast(DATAPTR_RO(x)) + start; + auto p_end = p + size; auto first_na = std::find_if(p, p_end, is_NA); @@ -865,7 +865,7 @@ std::shared_ptr MakeSimpleArray(SEXP x) { using value_type = typename arrow::TypeTraits::ArrayType::value_type; RVector vec(x); auto n = vec.size(); - auto p_vec_start = reinterpret_cast(DATAPTR(vec)); + auto p_vec_start = reinterpret_cast(DATAPTR_RO(vec)); auto p_vec_end = p_vec_start + n; std::vector> buffers{nullptr, std::make_shared>(vec)}; From 8d14d257b19a7fcf0cfd23b207b1242ebb2df257 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 8 Jan 2021 11:14:52 +0100 Subject: [PATCH 41/82] include Rdynload, ARROW-10803 --- r/src/imports.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/r/src/imports.cpp b/r/src/imports.cpp index 5c77d6cb5b0..7ac6bf1487f 100644 --- a/r/src/imports.cpp +++ b/r/src/imports.cpp @@ -17,6 +17,7 @@ #include // for R_GetCCallable #include +#include // for R_GetCCallable namespace vctrs { struct vctrs_api_ptrs_t { From 2893679bbccc9bed9af3199280e36c6471345dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Kriszti=C3=A1n=20Sz=C5=B1cs?= Date: Thu, 10 Dec 2020 12:43:21 +0100 Subject: [PATCH 42/82] Add Extend and ExtendMasked to the converter interface --- cpp/src/arrow/python/python_to_arrow.cc | 82 ++++++++++++------------- cpp/src/arrow/util/converter.h | 38 +++++++++++- 2 files changed, 78 insertions(+), 42 deletions(-) diff --git a/cpp/src/arrow/python/python_to_arrow.cc b/cpp/src/arrow/python/python_to_arrow.cc index b136bec9709..b2d9f1cb5a3 100644 --- a/cpp/src/arrow/python/python_to_arrow.cc +++ b/cpp/src/arrow/python/python_to_arrow.cc @@ -388,36 +388,36 @@ class PyValue { } }; -template -Status Extend(T* converter, PyObject* values, int64_t size) { - /// Ensure we've allocated enough space - RETURN_NOT_OK(converter->Reserve(size)); - // Iterate over the items adding each one - return internal::VisitSequence(values, [converter](PyObject* item, bool* /* unused */) { - return converter->Append(item); - }); -} - -// Convert and append a sequence of values masked with a numpy array -template -Status ExtendMasked(T* converter, PyObject* values, PyObject* mask, int64_t size) { - /// Ensure we've allocated enough space - RETURN_NOT_OK(converter->Reserve(size)); - // Iterate over the items adding each one - return internal::VisitSequenceMasked( - values, mask, [converter](PyObject* item, bool is_masked, bool* /* unused */) { - if (is_masked) { - return converter->AppendNull(); - } else { - // This will also apply the null-checking convention in the event - // that the value is not masked - return converter->Append(item); // perhaps use AppendValue instead? - } - }); -} - // The base Converter class is a mixin with predefined behavior and constructors. -using PyConverter = Converter; +class PyConverter : public Converter { + public: + // Iterate over the input values and defer the conversion to the Append method + Status Extend(PyObject* values, int64_t size) override { + /// Ensure we've allocated enough space + RETURN_NOT_OK(this->Reserve(size)); + // Iterate over the items adding each one + return internal::VisitSequence(values, [this](PyObject* item, bool* /* unused */) { + return this->Append(item); + }); + } + + // Convert and append a sequence of values masked with a numpy array + Status ExtendMasked(PyObject* values, PyObject* mask, int64_t size) override { + /// Ensure we've allocated enough space + RETURN_NOT_OK(this->Reserve(size)); + // Iterate over the items adding each one + return internal::VisitSequenceMasked( + values, mask, [this](PyObject* item, bool is_masked, bool* /* unused */) { + if (is_masked) { + return this->AppendNull(); + } else { + // This will also apply the null-checking convention in the event + // that the value is not masked + return this->Append(item); // perhaps use AppendValue instead? + } + }); + } +}; template class PyPrimitiveConverter; @@ -669,7 +669,7 @@ class PyListConverter : public ListConverter { Status AppendSequence(PyObject* value) { int64_t size = static_cast(PySequence_Size(value)); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(size)); - return Extend(this->value_converter_.get(), value, size); + return this->value_converter_->Extend(value, size); } Status AppendNdarray(PyObject* value) { @@ -684,12 +684,12 @@ class PyListConverter : public ListConverter { switch (value_type->id()) { // If the value type does not match the expected NumPy dtype, then fall through // to a slower PySequence-based path -#define LIST_FAST_CASE(TYPE_ID, TYPE, NUMPY_TYPE) \ - case Type::TYPE_ID: { \ - if (PyArray_DESCR(ndarray)->type_num != NUMPY_TYPE) { \ - return Extend(this->value_converter_.get(), value, size); \ - } \ - return AppendNdarrayTyped(ndarray); \ +#define LIST_FAST_CASE(TYPE_ID, TYPE, NUMPY_TYPE) \ + case Type::TYPE_ID: { \ + if (PyArray_DESCR(ndarray)->type_num != NUMPY_TYPE) { \ + return this->value_converter_->Extend(value, size); \ + } \ + return AppendNdarrayTyped(ndarray); \ } LIST_FAST_CASE(BOOL, BooleanType, NPY_BOOL) LIST_FAST_CASE(UINT8, UInt8Type, NPY_UINT8) @@ -707,7 +707,7 @@ class PyListConverter : public ListConverter { LIST_FAST_CASE(DURATION, DurationType, NPY_TIMEDELTA) #undef LIST_FAST_CASE default: { - return Extend(this->value_converter_.get(), value, size); + return this->value_converter_->Extend(value, size); } } } @@ -1041,18 +1041,18 @@ Result> ConvertPySequence(PyObject* obj, PyObject* // the overflow and automatically creates new chunks. ARROW_ASSIGN_OR_RAISE(auto chunked_converter, MakeChunker(std::move(converter))); if (mask != nullptr && mask != Py_None) { - RETURN_NOT_OK(ExtendMasked(chunked_converter.get(), seq, mask, size)); + RETURN_NOT_OK(chunked_converter->ExtendMasked(seq, mask, size)); } else { - RETURN_NOT_OK(Extend(chunked_converter.get(), seq, size)); + RETURN_NOT_OK(chunked_converter->Extend(seq, size)); } return chunked_converter->ToChunkedArray(); } else { // If the converter can't overflow spare the capacity error checking on the hot-path, // this improves the performance roughly by ~10% for primitive types. if (mask != nullptr && mask != Py_None) { - RETURN_NOT_OK(ExtendMasked(converter.get(), seq, mask, size)); + RETURN_NOT_OK(converter->ExtendMasked(seq, mask, size)); } else { - RETURN_NOT_OK(Extend(converter.get(), seq, size)); + RETURN_NOT_OK(converter->Extend(seq, size)); } return converter->ToChunkedArray(); } diff --git a/cpp/src/arrow/util/converter.h b/cpp/src/arrow/util/converter.h index e18f6e350d7..2c40a48726b 100644 --- a/cpp/src/arrow/util/converter.h +++ b/cpp/src/arrow/util/converter.h @@ -52,7 +52,15 @@ class Converter { return Init(pool); } - virtual Status Append(InputType value) = 0; + virtual Status Append(InputType value) { return Status::NotImplemented("Append"); } + + virtual Status Extend(InputType values, int64_t size) { + return Status::NotImplemented("Extend"); + } + + virtual Status ExtendMasked(InputType values, InputType mask, int64_t size) { + return Status::NotImplemented("ExtendMasked"); + } const std::shared_ptr& builder() const { return builder_; } @@ -294,6 +302,34 @@ class Chunker { return status; } + // we could get bit smarter here since the whole batch of appendable values + // will be rejected if a capacity error is raised + Status Extend(InputType values, int64_t size) { + auto status = converter_->Extend(values, size); + if (ARROW_PREDICT_FALSE(status.IsCapacityError())) { + if (converter_->builder()->length() == 0) { + return status; + } + ARROW_RETURN_NOT_OK(FinishChunk()); + return Extend(values, size); + } + length_ += size; + return status; + } + + Status ExtendMasked(InputType values, InputType mask, int64_t size) { + auto status = converter_->ExtendMasked(values, mask, size); + if (ARROW_PREDICT_FALSE(status.IsCapacityError())) { + if (converter_->builder()->length() == 0) { + return status; + } + ARROW_RETURN_NOT_OK(FinishChunk()); + return ExtendMasked(values, mask, size); + } + length_ += size; + return status; + } + Status FinishChunk() { ARROW_ASSIGN_OR_RAISE(auto chunk, converter_->ToArray(length_)); chunks_.push_back(chunk); From a98454378e4c2e1f7b3cbdf5df6d448e49e0229c Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 8 Jan 2021 14:10:50 +0100 Subject: [PATCH 43/82] vec_to_arrow() calling Extend() from https://github.com/apache/arrow/pull/8886 --- r/src/imports.cpp | 1 - r/src/r_to_arrow.cpp | 12 ++++++++++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/r/src/imports.cpp b/r/src/imports.cpp index 7ac6bf1487f..5c77d6cb5b0 100644 --- a/r/src/imports.cpp +++ b/r/src/imports.cpp @@ -17,7 +17,6 @@ #include // for R_GetCCallable #include -#include // for R_GetCCallable namespace vctrs { struct vctrs_api_ptrs_t { diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index ff379817621..76ab1f85913 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -196,7 +196,15 @@ int64_t RVectorVisitor::GetValue(double x) { class RConverter : public Converter { public: - virtual Status Append(SEXP) { return Status::Invalid("not using Append()"); } + virtual Status Append(SEXP) { return Status::Invalid("Append"); } + + virtual Status Extend(SEXP values, int64_t size) { + return AppendRange(values, 0, size); + } + + virtual Status ExtendMasked(SEXP values, SEXP mask, int64_t size) { + return AppendRange(values, 0, size); + } virtual Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) { RVectorType rtype = GetVectorType(x); @@ -946,7 +954,7 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { // otherwise go through the converter api auto converter = ValueOrStop(MakeConverter( options.type, options, gc_memory_pool())); - StopIfNotOk(converter->AppendRange(x, 0, options.size)); + StopIfNotOk(converter->Extend(x, options.size)); return ValueOrStop(converter->ToArray()); } From 8c5eb462864a7423054c56083ab1128c9235f39e Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 8 Jan 2021 14:26:06 +0100 Subject: [PATCH 44/82] replace the various AppendRange() with Extend() --- r/src/r_to_arrow.cpp | 84 ++++++++++++++++++++------------------------ 1 file changed, 39 insertions(+), 45 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 76ab1f85913..f65164def5d 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -196,19 +196,14 @@ int64_t RVectorVisitor::GetValue(double x) { class RConverter : public Converter { public: - virtual Status Append(SEXP) { return Status::Invalid("Append"); } + virtual Status Append(SEXP) { return Status::NotImplemented("Append"); } virtual Status Extend(SEXP values, int64_t size) { - return AppendRange(values, 0, size); + return Status::NotImplemented("ExtendMasked"); } virtual Status ExtendMasked(SEXP values, SEXP mask, int64_t size) { - return AppendRange(values, 0, size); - } - - virtual Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) { - RVectorType rtype = GetVectorType(x); - return Status::Invalid("No visitor for R type ", rtype); + return Status::NotImplemented("ExtendMasked"); } }; @@ -304,7 +299,7 @@ template class RPrimitiveConverter> : public PrimitiveConverter { public: - Status AppendRange(SEXP, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP, int64_t size) override { return this->primitive_builder_->AppendNulls(size); } }; @@ -314,17 +309,17 @@ class RPrimitiveConverter< T, enable_if_t::value || is_floating_type::value>> : public PrimitiveConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { auto rtype = GetVectorType(x); switch (rtype) { case UINT8: - return AppendRangeDispatch(x, start, size); + return AppendRangeDispatch(x, 0, size); case INT32: - return AppendRangeDispatch(x, start, size); + return AppendRangeDispatch(x, 0, size); case FLOAT64: - return AppendRangeDispatch(x, start, size); + return AppendRangeDispatch(x, 0, size); case INT64: - return AppendRangeDispatch(x, start, size); + return AppendRangeDispatch(x, 0, size); default: break; @@ -436,7 +431,7 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { auto rtype = GetVectorType(x); if (rtype != BOOLEAN) { return Status::Invalid("cannot convert"); @@ -451,8 +446,7 @@ class RPrimitiveConverter::value>> this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, append_null, - append_value); + return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } }; @@ -460,18 +454,18 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); switch (GetVectorType(x)) { case DATE_INT: - return AppendRange_Date(x, start, size); + return AppendRange_Date(x, 0, size); case DATE_DBL: - return AppendRange_Date(x, start, size); + return AppendRange_Date(x, 0, size); case POSIXCT: - return AppendRange_Posixct(x, start, size); + return AppendRange_Posixct(x, 0, size); default: break; @@ -542,7 +536,7 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); auto rtype = GetVectorType(x); if (rtype != TIME) { @@ -579,7 +573,7 @@ class RPrimitiveConverter::value>> this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } }; @@ -587,7 +581,7 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); RVectorType rtype = GetVectorType(x); @@ -606,7 +600,7 @@ class RPrimitiveConverter::value>> this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } }; @@ -614,8 +608,8 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { - return Status::NotImplemented("conversion from R to decimal"); + Status Extend(SEXP x, int64_t size) override { + return Status::NotImplemented("Extend"); } }; @@ -625,7 +619,7 @@ class RPrimitiveConverter> public: using OffsetType = typename T::offset_type; - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); RVectorType rtype = GetVectorType(x); @@ -643,7 +637,7 @@ class RPrimitiveConverter> this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } }; @@ -651,7 +645,7 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); RVectorType rtype = GetVectorType(x); @@ -674,7 +668,7 @@ class RPrimitiveConverter::v this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } }; @@ -684,7 +678,8 @@ class RPrimitiveConverter> public: using OffsetType = typename T::offset_type; - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { + int64_t start = 0; RVectorType rtype = GetVectorType(x); if (rtype != STRING) { return Status::Invalid("invalid R type to convert to string"); @@ -722,9 +717,9 @@ template class RPrimitiveConverter::value>> : public PrimitiveConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { // TODO: look in lubridate - return Status::NotImplemented("conversion to duration not yet implemented"); + return Status::NotImplemented("Extend"); } }; @@ -738,9 +733,8 @@ template class RDictionaryConverter> : public DictionaryConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { - return Status::NotImplemented( - "dictionaries only implemented with string value types"); + Status Extend(SEXP x, int64_t size) override { + return Status::NotImplemented("Extend"); } }; @@ -748,7 +742,7 @@ template class RDictionaryConverter> : public DictionaryConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); RVectorType rtype = GetVectorType(x); @@ -762,7 +756,7 @@ class RDictionaryConverter> return this->value_builder_->Append(CHAR(s)); }; auto append_null = [this]() { return this->value_builder_->AppendNull(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } }; @@ -784,7 +778,7 @@ struct RConverterTrait> { template class RListConverter : public ListConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); RVectorType rtype = GetVectorType(x); @@ -795,10 +789,10 @@ class RListConverter : public ListConverter { auto append_value = [this](SEXP value) { R_xlen_t n = XLENGTH(value); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n)); - return this->value_converter_.get()->AppendRange(value, 0, n); + return this->value_converter_.get()->Extend(value, n); }; auto append_null = [this]() { return this->list_builder_->AppendNull(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } }; @@ -811,9 +805,9 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: - Status AppendRange(SEXP x, R_xlen_t start, R_xlen_t size) override { + Status Extend(SEXP x, int64_t size) override { // this specifically does not use this->Reserve() because - // children_[i]->AppendRange() below will reserve the additional capacity + // children_[i]->Extend() below will reserve the additional capacity RETURN_NOT_OK(this->builder_->Reserve(size)); R_xlen_t n_columns = XLENGTH(x); @@ -826,7 +820,7 @@ class RStructConverter : public StructConverter { } for (R_xlen_t i = 0; i < n_columns; i++) { - RETURN_NOT_OK(children_[i]->AppendRange(VECTOR_ELT(x, i), start, size)); + RETURN_NOT_OK(children_[i]->Extend(VECTOR_ELT(x, i), size)); } return Status::OK(); From 0951de9196063fe133ecf2dacc7778f24c6a1165 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 8 Jan 2021 16:23:21 +0100 Subject: [PATCH 45/82] RStructConverter using this->Reserve() so that potential capacity error on children are raised early. --- r/src/r_to_arrow.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index f65164def5d..f588700838e 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -806,9 +806,7 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: Status Extend(SEXP x, int64_t size) override { - // this specifically does not use this->Reserve() because - // children_[i]->Extend() below will reserve the additional capacity - RETURN_NOT_OK(this->builder_->Reserve(size)); + RETURN_NOT_OK(this->Reserve(size)); R_xlen_t n_columns = XLENGTH(x); if (!Rf_inherits(x, "data.frame")) { From fb83abadbc4fc60a8b3e0e0b063553911215048f Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 20 Jan 2021 10:54:06 +0100 Subject: [PATCH 46/82] adapt code so that can call it from previous api --- r/src/array_from_vector.cpp | 1 + r/src/arrowExports.cpp | 2 +- r/src/arrow_types.h | 1 + r/src/r_to_arrow.cpp | 19 +++++++++++-------- 4 files changed, 14 insertions(+), 9 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index f747ef5ec12..ae249734ef8 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -1381,6 +1381,7 @@ arrow::Status CheckCompatibleStruct(SEXP obj, std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred) { + // short circuit if `x` is already an Array if (Rf_inherits(x, "Array")) { return cpp11::as_cpp>(x); diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 1eef952f5e7..c78bdd6480c 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -2782,7 +2782,7 @@ END_CPP11 } #else extern "C" SEXP _arrow_vec_to_arrow(SEXP x_sexp, SEXP s_type_sexp){ - Rf_error("Cannot call vec_to_arrow(). Please use arrow::install_arrow() to install required runtime libraries. "); + Rf_error("Cannot call vec_to_arrow(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif // recordbatch.cpp diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 6cd2171852e..73c1a59f6de 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -92,6 +92,7 @@ std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred); void inspect(SEXP obj); +std::shared_ptr vec_to_arrow(SEXP x, const std::shared_ptr& type, bool type_inferred); // the integer64 sentinel constexpr int64_t NA_INT64 = std::numeric_limits::min(); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index f588700838e..a87de4dcde4 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -923,19 +923,15 @@ std::shared_ptr Array__from_vector_reuse_memory(SEXP x) { cpp11::stop("Unreachable: you might need to fix can_reuse_memory()"); } -std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { +std::shared_ptr vec_to_arrow(SEXP x, const std::shared_ptr& type, bool type_inferred) { // short circuit if `x` is already an Array if (Rf_inherits(x, "Array")) { return cpp11::as_cpp>(x); } RConversionOptions options; - options.strict = !Rf_isNull(s_type); - if (options.strict) { - options.type = cpp11::as_cpp>(s_type); - } else { - options.type = arrow::r::InferArrowType(x); - } + options.strict = !type_inferred; + options.type = type; options.size = vctrs::short_vec_size(x); // maybe short circuit when zero-copy is possible @@ -956,7 +952,14 @@ std::shared_ptr vec_to_arrow(SEXP x, SEXP s_type) { // [[arrow::export]] SEXP vec_to_arrow(SEXP x, SEXP s_type) { if (Rf_inherits(x, "Array")) return x; - return cpp11::to_r6(arrow::r::vec_to_arrow(x, s_type)); + bool type_inferred = Rf_isNull(s_type); + std::shared_ptr type; + if (type_inferred) { + type = cpp11::as_cpp>(s_type); + } else { + type = type = arrow::r::InferArrowType(x); + } + return cpp11::to_r6(arrow::r::vec_to_arrow(x, type, type_inferred)); } #endif From f75821a91c11dc7b2ac20814ad9531621d37943a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 20 Jan 2021 11:19:32 +0100 Subject: [PATCH 47/82] less restrictive when ingesting binary --- r/src/array_to_vector.cpp | 4 ++-- r/src/arrow_types.h | 4 +++- r/src/r_to_arrow.cpp | 38 ++++++++++++++++++++++++++------------ 3 files changed, 31 insertions(+), 15 deletions(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index c9a1f6cf9ee..ddcb7494697 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -18,8 +18,6 @@ #include "./arrow_types.h" #if defined(ARROW_R_WITH_ARROW) -#include - #include #include #include @@ -30,6 +28,8 @@ #include #include +#include + namespace arrow { using internal::checked_cast; diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 73c1a59f6de..16c4679ad46 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -92,7 +92,9 @@ std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred); void inspect(SEXP obj); -std::shared_ptr vec_to_arrow(SEXP x, const std::shared_ptr& type, bool type_inferred); +std::shared_ptr vec_to_arrow(SEXP x, + const std::shared_ptr& type, + bool type_inferred); // the integer64 sentinel constexpr int64_t NA_INT64 = std::numeric_limits::min(); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index a87de4dcde4..5a7d115f54b 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -613,6 +613,27 @@ class RPrimitiveConverter::value>> } }; +Status check_binary(SEXP x, int64_t size) { + RVectorType rtype = GetVectorType(x); + switch (rtype) { + case BINARY: + break; + case LIST: { + // check this is a list of raw vectors + const SEXP* p_x = VECTOR_PTR_RO(x); + for (R_xlen_t i = 0; i < size; i++, ++p_x) { + if (TYPEOF(*p_x) != RAWSXP) { + return Status::Invalid("invalid R type to convert to binary"); + } + } + break; + } + default: + return Status::Invalid("invalid R type to convert to binary"); + } + return Status::OK(); +} + template class RPrimitiveConverter> : public PrimitiveConverter { @@ -621,11 +642,7 @@ class RPrimitiveConverter> Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); - - RVectorType rtype = GetVectorType(x); - if (rtype != BINARY) { - return Status::Invalid("invalid R type to convert to binary"); - } + RETURN_NOT_OK(check_binary(x, size)); auto append_value = [this](SEXP raw) { R_xlen_t n = XLENGTH(raw); @@ -647,12 +664,7 @@ class RPrimitiveConverter::v public: Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); - - RVectorType rtype = GetVectorType(x); - // TODO: handle STRSXP - if (rtype != BINARY) { - return Status::Invalid("invalid R type to convert to binary"); - } + RETURN_NOT_OK(check_binary(x, size)); auto append_value = [this](SEXP raw) { R_xlen_t n = XLENGTH(raw); @@ -923,7 +935,9 @@ std::shared_ptr Array__from_vector_reuse_memory(SEXP x) { cpp11::stop("Unreachable: you might need to fix can_reuse_memory()"); } -std::shared_ptr vec_to_arrow(SEXP x, const std::shared_ptr& type, bool type_inferred) { +std::shared_ptr vec_to_arrow(SEXP x, + const std::shared_ptr& type, + bool type_inferred) { // short circuit if `x` is already an Array if (Rf_inherits(x, "Array")) { return cpp11::as_cpp>(x); From dac370801e8a2721928669f6afb34af5c3e7b478 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 09:47:15 +0100 Subject: [PATCH 48/82] rebasing --- r/R/arrowExports.R | 2 +- r/src/array_from_vector.cpp | 2 + r/src/arrowExports.cpp | 812 +++++++++++++++++----------------- r/src/r_to_arrow.cpp | 37 +- r/tests/testthat/test-Array.R | 27 +- 5 files changed, 452 insertions(+), 428 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 421310ae4ac..bd21ee28948 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1269,7 +1269,7 @@ ExportRecordBatch <- function(batch, array_ptr, schema_ptr){ } vec_to_arrow <- function(x, s_type){ - .Call(`_arrow_vec_to_arrow` , x, s_type) + .Call(`_arrow_vec_to_arrow`, x, s_type) } RecordBatch__num_columns <- function(x){ diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index ae249734ef8..b96a8a9976a 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -1381,6 +1381,8 @@ arrow::Status CheckCompatibleStruct(SEXP obj, std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred) { + // new api + return vec_to_arrow(x, type, type_inferred); // short circuit if `x` is already an Array if (Rf_inherits(x, "Array")) { diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index c78bdd6480c..afa42256d14 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -2764,14 +2764,7 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } -#else -extern "C" SEXP _arrow_ExportRecordBatch(SEXP batch_sexp, SEXP array_ptr_sexp, SEXP schema_ptr_sexp){ - Rf_error("Cannot call ExportRecordBatch(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); -} -#endif - // r_to_arrow.cpp -#if defined(ARROW_R_WITH_ARROW) SEXP vec_to_arrow(SEXP x, SEXP s_type); extern "C" SEXP _arrow_vec_to_arrow(SEXP x_sexp, SEXP s_type_sexp){ BEGIN_CPP11 @@ -2780,11 +2773,6 @@ BEGIN_CPP11 return cpp11::as_sexp(vec_to_arrow(x, s_type)); END_CPP11 } -#else -extern "C" SEXP _arrow_vec_to_arrow(SEXP x_sexp, SEXP s_type_sexp){ - Rf_error("Cannot call vec_to_arrow(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); -} -#endif // recordbatch.cpp int RecordBatch__num_columns(const std::shared_ptr& x); extern "C" SEXP _arrow_RecordBatch__num_columns(SEXP x_sexp){ @@ -3521,406 +3509,406 @@ return Rf_ScalarLogical( static const R_CallMethodDef CallEntries[] = { { "_arrow_available", (DL_FUNC)& _arrow_available, 0 }, { "_s3_available", (DL_FUNC)& _s3_available, 0 }, - { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, - { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, - { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, - { "_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, - { "_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, - { "_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, - { "_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, - { "_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, - { "_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, - { "_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, - { "_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, - { "_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, - { "_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, - { "_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, - { "_arrow_Array__View", (DL_FUNC) &_arrow_Array__View, 2}, - { "_arrow_Array__Validate", (DL_FUNC) &_arrow_Array__Validate, 1}, - { "_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, - { "_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, - { "_arrow_StructArray__field", (DL_FUNC) &_arrow_StructArray__field, 2}, - { "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2}, - { "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1}, - { "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1}, - { "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1}, - { "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1}, - { "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1}, - { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, - { "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2}, - { "_arrow_FixedSizeListArray__value_length", (DL_FUNC) &_arrow_FixedSizeListArray__value_length, 2}, - { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, - { "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2}, - { "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, - { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, - { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, - { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, - { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, - { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, - { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, - { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, - { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, - { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, - { "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, - { "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, - { "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, - { "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, - { "_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, - { "_arrow_ArrayData__buffers", (DL_FUNC) &_arrow_ArrayData__buffers, 1}, - { "_arrow_Buffer__is_mutable", (DL_FUNC) &_arrow_Buffer__is_mutable, 1}, - { "_arrow_Buffer__ZeroPadding", (DL_FUNC) &_arrow_Buffer__ZeroPadding, 1}, - { "_arrow_Buffer__capacity", (DL_FUNC) &_arrow_Buffer__capacity, 1}, - { "_arrow_Buffer__size", (DL_FUNC) &_arrow_Buffer__size, 1}, - { "_arrow_r___RBuffer__initialize", (DL_FUNC) &_arrow_r___RBuffer__initialize, 1}, - { "_arrow_Buffer__data", (DL_FUNC) &_arrow_Buffer__data, 1}, - { "_arrow_Buffer__Equals", (DL_FUNC) &_arrow_Buffer__Equals, 2}, - { "_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, - { "_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, - { "_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, - { "_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, - { "_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, - { "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, - { "_arrow_ChunkedArray__Slice1", (DL_FUNC) &_arrow_ChunkedArray__Slice1, 2}, - { "_arrow_ChunkedArray__Slice2", (DL_FUNC) &_arrow_ChunkedArray__Slice2, 3}, - { "_arrow_ChunkedArray__View", (DL_FUNC) &_arrow_ChunkedArray__View, 2}, - { "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1}, - { "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2}, - { "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1}, - { "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2}, - { "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1}, - { "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, - { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, - { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, - { "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, - { "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, - { "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, - { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, - { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, - { "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, - { "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, - { "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, - { "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, - { "_arrow_TimestampParser__kind", (DL_FUNC) &_arrow_TimestampParser__kind, 1}, - { "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, - { "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, - { "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, - { "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, - { "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, - { "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, - { "_arrow_dataset___Dataset__ReplaceSchema", (DL_FUNC) &_arrow_dataset___Dataset__ReplaceSchema, 2}, - { "_arrow_dataset___UnionDataset__create", (DL_FUNC) &_arrow_dataset___UnionDataset__create, 2}, - { "_arrow_dataset___InMemoryDataset__create", (DL_FUNC) &_arrow_dataset___InMemoryDataset__create, 1}, - { "_arrow_dataset___UnionDataset__children", (DL_FUNC) &_arrow_dataset___UnionDataset__children, 1}, - { "_arrow_dataset___FileSystemDataset__format", (DL_FUNC) &_arrow_dataset___FileSystemDataset__format, 1}, - { "_arrow_dataset___FileSystemDataset__filesystem", (DL_FUNC) &_arrow_dataset___FileSystemDataset__filesystem, 1}, - { "_arrow_dataset___FileSystemDataset__files", (DL_FUNC) &_arrow_dataset___FileSystemDataset__files, 1}, - { "_arrow_dataset___DatasetFactory__Finish1", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish1, 2}, - { "_arrow_dataset___DatasetFactory__Finish2", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish2, 2}, - { "_arrow_dataset___DatasetFactory__Inspect", (DL_FUNC) &_arrow_dataset___DatasetFactory__Inspect, 2}, - { "_arrow_dataset___UnionDatasetFactory__Make", (DL_FUNC) &_arrow_dataset___UnionDatasetFactory__Make, 1}, - { "_arrow_dataset___FileSystemDatasetFactory__Make2", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make2, 4}, - { "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3}, - { "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4}, - { "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1}, - { "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1}, - { "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 3}, - { "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1}, - { "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, - { "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, - { "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, - { "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, - { "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 1}, - { "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 1}, - { "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 1}, - { "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 1}, - { "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 0}, - { "_arrow_dataset___ScannerBuilder__Project", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Project, 2}, - { "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, - { "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, - { "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, - { "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, - { "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, - { "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, - { "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, - { "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1}, - { "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, - { "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, - { "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, - { "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, - { "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, - { "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, - { "_arrow_Int64__initialize", (DL_FUNC) &_arrow_Int64__initialize, 0}, - { "_arrow_UInt8__initialize", (DL_FUNC) &_arrow_UInt8__initialize, 0}, - { "_arrow_UInt16__initialize", (DL_FUNC) &_arrow_UInt16__initialize, 0}, - { "_arrow_UInt32__initialize", (DL_FUNC) &_arrow_UInt32__initialize, 0}, - { "_arrow_UInt64__initialize", (DL_FUNC) &_arrow_UInt64__initialize, 0}, - { "_arrow_Float16__initialize", (DL_FUNC) &_arrow_Float16__initialize, 0}, - { "_arrow_Float32__initialize", (DL_FUNC) &_arrow_Float32__initialize, 0}, - { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, - { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, - { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, - { "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, - { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, - { "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, - { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, - { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, - { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, - { "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2}, - { "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1}, - { "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, - { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, - { "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, - { "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, - { "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, - { "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2}, - { "_arrow_struct__", (DL_FUNC) &_arrow_struct__, 1}, - { "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, - { "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, - { "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2}, - { "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1}, - { "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1}, - { "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, - { "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, - { "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, - { "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, - { "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1}, - { "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1}, - { "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1}, - { "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, - { "_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1}, - { "_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3}, - { "_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1}, - { "_arrow_DictionaryType__value_type", (DL_FUNC) &_arrow_DictionaryType__value_type, 1}, - { "_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1}, - { "_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1}, - { "_arrow_StructType__GetFieldByName", (DL_FUNC) &_arrow_StructType__GetFieldByName, 2}, - { "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2}, - { "_arrow_StructType__field_names", (DL_FUNC) &_arrow_StructType__field_names, 1}, - { "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1}, - { "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1}, - { "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1}, - { "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1}, - { "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, - { "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, - { "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, - { "_arrow_dataset___expr__call", (DL_FUNC) &_arrow_dataset___expr__call, 3}, - { "_arrow_dataset___expr__field_ref", (DL_FUNC) &_arrow_dataset___expr__field_ref, 1}, - { "_arrow_dataset___expr__scalar", (DL_FUNC) &_arrow_dataset___expr__scalar, 1}, - { "_arrow_dataset___expr__ToString", (DL_FUNC) &_arrow_dataset___expr__ToString, 1}, - { "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, - { "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, - { "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, - { "_arrow_ipc___feather___Reader__Open", (DL_FUNC) &_arrow_ipc___feather___Reader__Open, 1}, - { "_arrow_ipc___feather___Reader__column_names", (DL_FUNC) &_arrow_ipc___feather___Reader__column_names, 1}, - { "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, - { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, - { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, - { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, - { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, - { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, - { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, - { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, - { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, - { "_arrow_fs___FileInfo__set_path", (DL_FUNC) &_arrow_fs___FileInfo__set_path, 2}, - { "_arrow_fs___FileInfo__size", (DL_FUNC) &_arrow_fs___FileInfo__size, 1}, - { "_arrow_fs___FileInfo__set_size", (DL_FUNC) &_arrow_fs___FileInfo__set_size, 2}, - { "_arrow_fs___FileInfo__base_name", (DL_FUNC) &_arrow_fs___FileInfo__base_name, 1}, - { "_arrow_fs___FileInfo__extension", (DL_FUNC) &_arrow_fs___FileInfo__extension, 1}, - { "_arrow_fs___FileInfo__mtime", (DL_FUNC) &_arrow_fs___FileInfo__mtime, 1}, - { "_arrow_fs___FileInfo__set_mtime", (DL_FUNC) &_arrow_fs___FileInfo__set_mtime, 2}, - { "_arrow_fs___FileSelector__base_dir", (DL_FUNC) &_arrow_fs___FileSelector__base_dir, 1}, - { "_arrow_fs___FileSelector__allow_not_found", (DL_FUNC) &_arrow_fs___FileSelector__allow_not_found, 1}, - { "_arrow_fs___FileSelector__recursive", (DL_FUNC) &_arrow_fs___FileSelector__recursive, 1}, - { "_arrow_fs___FileSelector__create", (DL_FUNC) &_arrow_fs___FileSelector__create, 3}, - { "_arrow_fs___FileSystem__GetTargetInfos_Paths", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_Paths, 2}, - { "_arrow_fs___FileSystem__GetTargetInfos_FileSelector", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_FileSelector, 2}, - { "_arrow_fs___FileSystem__CreateDir", (DL_FUNC) &_arrow_fs___FileSystem__CreateDir, 3}, - { "_arrow_fs___FileSystem__DeleteDir", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDir, 2}, - { "_arrow_fs___FileSystem__DeleteDirContents", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDirContents, 2}, - { "_arrow_fs___FileSystem__DeleteFile", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFile, 2}, - { "_arrow_fs___FileSystem__DeleteFiles", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFiles, 2}, - { "_arrow_fs___FileSystem__Move", (DL_FUNC) &_arrow_fs___FileSystem__Move, 3}, - { "_arrow_fs___FileSystem__CopyFile", (DL_FUNC) &_arrow_fs___FileSystem__CopyFile, 3}, - { "_arrow_fs___FileSystem__OpenInputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputStream, 2}, - { "_arrow_fs___FileSystem__OpenInputFile", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputFile, 2}, - { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, - { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, - { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, - { "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0}, - { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, - { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, - { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, - { "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1}, - { "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6}, - { "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 12}, - { "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1}, - { "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2}, - { "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1}, - { "_arrow_io___OutputStream__Close", (DL_FUNC) &_arrow_io___OutputStream__Close, 1}, - { "_arrow_io___RandomAccessFile__GetSize", (DL_FUNC) &_arrow_io___RandomAccessFile__GetSize, 1}, - { "_arrow_io___RandomAccessFile__supports_zero_copy", (DL_FUNC) &_arrow_io___RandomAccessFile__supports_zero_copy, 1}, - { "_arrow_io___RandomAccessFile__Seek", (DL_FUNC) &_arrow_io___RandomAccessFile__Seek, 2}, - { "_arrow_io___RandomAccessFile__Tell", (DL_FUNC) &_arrow_io___RandomAccessFile__Tell, 1}, - { "_arrow_io___RandomAccessFile__Read0", (DL_FUNC) &_arrow_io___RandomAccessFile__Read0, 1}, - { "_arrow_io___RandomAccessFile__ReadAt", (DL_FUNC) &_arrow_io___RandomAccessFile__ReadAt, 3}, - { "_arrow_io___MemoryMappedFile__Create", (DL_FUNC) &_arrow_io___MemoryMappedFile__Create, 2}, - { "_arrow_io___MemoryMappedFile__Open", (DL_FUNC) &_arrow_io___MemoryMappedFile__Open, 2}, - { "_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2}, - { "_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1}, - { "_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1}, - { "_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2}, - { "_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1}, - { "_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1}, - { "_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1}, - { "_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1}, - { "_arrow_io___BufferOutputStream__Finish", (DL_FUNC) &_arrow_io___BufferOutputStream__Finish, 1}, - { "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, - { "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, - { "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, - { "_arrow_json___ParseOptions__initialize", (DL_FUNC) &_arrow_json___ParseOptions__initialize, 1}, - { "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, - { "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, - { "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, - { "_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1}, - { "_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1}, - { "_arrow_MemoryPool__backend_name", (DL_FUNC) &_arrow_MemoryPool__backend_name, 1}, - { "_arrow_supported_memory_backends", (DL_FUNC) &_arrow_supported_memory_backends, 0}, - { "_arrow_ipc___Message__body_length", (DL_FUNC) &_arrow_ipc___Message__body_length, 1}, - { "_arrow_ipc___Message__metadata", (DL_FUNC) &_arrow_ipc___Message__metadata, 1}, - { "_arrow_ipc___Message__body", (DL_FUNC) &_arrow_ipc___Message__body, 1}, - { "_arrow_ipc___Message__Verify", (DL_FUNC) &_arrow_ipc___Message__Verify, 1}, - { "_arrow_ipc___Message__type", (DL_FUNC) &_arrow_ipc___Message__type, 1}, - { "_arrow_ipc___Message__Equals", (DL_FUNC) &_arrow_ipc___Message__Equals, 2}, - { "_arrow_ipc___ReadRecordBatch__Message__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__Message__Schema, 2}, - { "_arrow_ipc___ReadSchema_InputStream", (DL_FUNC) &_arrow_ipc___ReadSchema_InputStream, 1}, - { "_arrow_ipc___ReadSchema_Message", (DL_FUNC) &_arrow_ipc___ReadSchema_Message, 1}, - { "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, - { "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, - { "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, - { "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1}, - { "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, - { "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, - { "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, - { "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, - { "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroup1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup1, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroup2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup2, 3}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroups1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups1, 2}, - { "_arrow_parquet___arrow___FileReader__ReadRowGroups2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups2, 3}, - { "_arrow_parquet___arrow___FileReader__num_rows", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_rows, 1}, - { "_arrow_parquet___arrow___FileReader__num_columns", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_columns, 1}, - { "_arrow_parquet___arrow___FileReader__num_row_groups", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_row_groups, 1}, - { "_arrow_parquet___arrow___FileReader__ReadColumn", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadColumn, 2}, - { "_arrow_parquet___ArrowWriterProperties___create", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___create, 3}, - { "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0}, - { "_arrow_parquet___WriterProperties___Builder__version", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__version, 2}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_compressions", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compressions, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics, 3}, - { "_arrow_parquet___ArrowWriterProperties___Builder__data_page_size", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__data_page_size, 2}, - { "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1}, - { "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4}, - { "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3}, - { "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, - { "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, - { "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, - { "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, - { "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, - { "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, - { "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, - { "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, - { "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, - { "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, - { "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, - { "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, - { "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, - { "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, - { "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, - { "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, - { "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, - { "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, - { "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2}, - { "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2}, - { "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, - { "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, - { "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2}, - { "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2}, - { "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3}, - { "_arrow_RecordBatch__AddColumn", (DL_FUNC) &_arrow_RecordBatch__AddColumn, 4}, - { "_arrow_RecordBatch__SetColumn", (DL_FUNC) &_arrow_RecordBatch__SetColumn, 4}, - { "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, - { "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2}, - { "_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, - { "_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, - { "_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, - { "_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, - { "_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, - { "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, - { "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, - { "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, - { "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, - { "_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1}, - { "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, - { "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, - { "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, - { "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, - { "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, - { "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, - { "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, - { "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, - { "_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, - { "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, - { "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, - { "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, - { "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, - { "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, - { "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, - { "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, - { "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, - { "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, - { "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, - { "_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1}, - { "_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1}, - { "_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1}, - { "_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2}, - { "_arrow_Schema__GetFieldByName", (DL_FUNC) &_arrow_Schema__GetFieldByName, 2}, - { "_arrow_Schema__fields", (DL_FUNC) &_arrow_Schema__fields, 1}, - { "_arrow_Schema__field_names", (DL_FUNC) &_arrow_Schema__field_names, 1}, - { "_arrow_Schema__HasMetadata", (DL_FUNC) &_arrow_Schema__HasMetadata, 1}, - { "_arrow_Schema__metadata", (DL_FUNC) &_arrow_Schema__metadata, 1}, - { "_arrow_Schema__WithMetadata", (DL_FUNC) &_arrow_Schema__WithMetadata, 2}, - { "_arrow_Schema__serialize", (DL_FUNC) &_arrow_Schema__serialize, 1}, - { "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals, 3}, - { "_arrow_arrow__UnifySchemas", (DL_FUNC) &_arrow_arrow__UnifySchemas, 1}, - { "_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, - { "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, - { "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, - { "_arrow_Table__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_Table__ReplaceSchemaMetadata, 2}, - { "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, - { "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, - { "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, - { "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1}, - { "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2}, - { "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2}, - { "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3}, - { "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3}, - { "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1}, - { "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1}, - { "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2}, - { "_arrow_Table__RemoveColumn", (DL_FUNC) &_arrow_Table__RemoveColumn, 2}, - { "_arrow_Table__AddColumn", (DL_FUNC) &_arrow_Table__AddColumn, 4}, - { "_arrow_Table__SetColumn", (DL_FUNC) &_arrow_Table__SetColumn, 4}, - { "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, - { "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, - { "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, - { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, - { "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, - { "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, - { "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, - { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, + { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, + { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, + { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, + { "_arrow_Array__IsValid", (DL_FUNC) &_arrow_Array__IsValid, 2}, + { "_arrow_Array__length", (DL_FUNC) &_arrow_Array__length, 1}, + { "_arrow_Array__offset", (DL_FUNC) &_arrow_Array__offset, 1}, + { "_arrow_Array__null_count", (DL_FUNC) &_arrow_Array__null_count, 1}, + { "_arrow_Array__type", (DL_FUNC) &_arrow_Array__type, 1}, + { "_arrow_Array__ToString", (DL_FUNC) &_arrow_Array__ToString, 1}, + { "_arrow_Array__type_id", (DL_FUNC) &_arrow_Array__type_id, 1}, + { "_arrow_Array__Equals", (DL_FUNC) &_arrow_Array__Equals, 2}, + { "_arrow_Array__ApproxEquals", (DL_FUNC) &_arrow_Array__ApproxEquals, 2}, + { "_arrow_Array__data", (DL_FUNC) &_arrow_Array__data, 1}, + { "_arrow_Array__RangeEquals", (DL_FUNC) &_arrow_Array__RangeEquals, 5}, + { "_arrow_Array__View", (DL_FUNC) &_arrow_Array__View, 2}, + { "_arrow_Array__Validate", (DL_FUNC) &_arrow_Array__Validate, 1}, + { "_arrow_DictionaryArray__indices", (DL_FUNC) &_arrow_DictionaryArray__indices, 1}, + { "_arrow_DictionaryArray__dictionary", (DL_FUNC) &_arrow_DictionaryArray__dictionary, 1}, + { "_arrow_StructArray__field", (DL_FUNC) &_arrow_StructArray__field, 2}, + { "_arrow_StructArray__GetFieldByName", (DL_FUNC) &_arrow_StructArray__GetFieldByName, 2}, + { "_arrow_StructArray__Flatten", (DL_FUNC) &_arrow_StructArray__Flatten, 1}, + { "_arrow_ListArray__value_type", (DL_FUNC) &_arrow_ListArray__value_type, 1}, + { "_arrow_LargeListArray__value_type", (DL_FUNC) &_arrow_LargeListArray__value_type, 1}, + { "_arrow_ListArray__values", (DL_FUNC) &_arrow_ListArray__values, 1}, + { "_arrow_LargeListArray__values", (DL_FUNC) &_arrow_LargeListArray__values, 1}, + { "_arrow_ListArray__value_length", (DL_FUNC) &_arrow_ListArray__value_length, 2}, + { "_arrow_LargeListArray__value_length", (DL_FUNC) &_arrow_LargeListArray__value_length, 2}, + { "_arrow_FixedSizeListArray__value_length", (DL_FUNC) &_arrow_FixedSizeListArray__value_length, 2}, + { "_arrow_ListArray__value_offset", (DL_FUNC) &_arrow_ListArray__value_offset, 2}, + { "_arrow_LargeListArray__value_offset", (DL_FUNC) &_arrow_LargeListArray__value_offset, 2}, + { "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, + { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, + { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, + { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, + { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, + { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, + { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, + { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, + { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, + { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, + { "_arrow_Table__to_dataframe", (DL_FUNC) &_arrow_Table__to_dataframe, 2}, + { "_arrow_ArrayData__get_type", (DL_FUNC) &_arrow_ArrayData__get_type, 1}, + { "_arrow_ArrayData__get_length", (DL_FUNC) &_arrow_ArrayData__get_length, 1}, + { "_arrow_ArrayData__get_null_count", (DL_FUNC) &_arrow_ArrayData__get_null_count, 1}, + { "_arrow_ArrayData__get_offset", (DL_FUNC) &_arrow_ArrayData__get_offset, 1}, + { "_arrow_ArrayData__buffers", (DL_FUNC) &_arrow_ArrayData__buffers, 1}, + { "_arrow_Buffer__is_mutable", (DL_FUNC) &_arrow_Buffer__is_mutable, 1}, + { "_arrow_Buffer__ZeroPadding", (DL_FUNC) &_arrow_Buffer__ZeroPadding, 1}, + { "_arrow_Buffer__capacity", (DL_FUNC) &_arrow_Buffer__capacity, 1}, + { "_arrow_Buffer__size", (DL_FUNC) &_arrow_Buffer__size, 1}, + { "_arrow_r___RBuffer__initialize", (DL_FUNC) &_arrow_r___RBuffer__initialize, 1}, + { "_arrow_Buffer__data", (DL_FUNC) &_arrow_Buffer__data, 1}, + { "_arrow_Buffer__Equals", (DL_FUNC) &_arrow_Buffer__Equals, 2}, + { "_arrow_ChunkedArray__length", (DL_FUNC) &_arrow_ChunkedArray__length, 1}, + { "_arrow_ChunkedArray__null_count", (DL_FUNC) &_arrow_ChunkedArray__null_count, 1}, + { "_arrow_ChunkedArray__num_chunks", (DL_FUNC) &_arrow_ChunkedArray__num_chunks, 1}, + { "_arrow_ChunkedArray__chunk", (DL_FUNC) &_arrow_ChunkedArray__chunk, 2}, + { "_arrow_ChunkedArray__chunks", (DL_FUNC) &_arrow_ChunkedArray__chunks, 1}, + { "_arrow_ChunkedArray__type", (DL_FUNC) &_arrow_ChunkedArray__type, 1}, + { "_arrow_ChunkedArray__Slice1", (DL_FUNC) &_arrow_ChunkedArray__Slice1, 2}, + { "_arrow_ChunkedArray__Slice2", (DL_FUNC) &_arrow_ChunkedArray__Slice2, 3}, + { "_arrow_ChunkedArray__View", (DL_FUNC) &_arrow_ChunkedArray__View, 2}, + { "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1}, + { "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2}, + { "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1}, + { "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2}, + { "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1}, + { "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, + { "_arrow_io___CompressedOutputStream__Make", (DL_FUNC) &_arrow_io___CompressedOutputStream__Make, 2}, + { "_arrow_io___CompressedInputStream__Make", (DL_FUNC) &_arrow_io___CompressedInputStream__Make, 2}, + { "_arrow_RecordBatch__cast", (DL_FUNC) &_arrow_RecordBatch__cast, 3}, + { "_arrow_Table__cast", (DL_FUNC) &_arrow_Table__cast, 3}, + { "_arrow_compute__CallFunction", (DL_FUNC) &_arrow_compute__CallFunction, 3}, + { "_arrow_csv___ReadOptions__initialize", (DL_FUNC) &_arrow_csv___ReadOptions__initialize, 1}, + { "_arrow_csv___ParseOptions__initialize", (DL_FUNC) &_arrow_csv___ParseOptions__initialize, 1}, + { "_arrow_csv___ReadOptions__column_names", (DL_FUNC) &_arrow_csv___ReadOptions__column_names, 1}, + { "_arrow_csv___ConvertOptions__initialize", (DL_FUNC) &_arrow_csv___ConvertOptions__initialize, 1}, + { "_arrow_csv___TableReader__Make", (DL_FUNC) &_arrow_csv___TableReader__Make, 4}, + { "_arrow_csv___TableReader__Read", (DL_FUNC) &_arrow_csv___TableReader__Read, 1}, + { "_arrow_TimestampParser__kind", (DL_FUNC) &_arrow_TimestampParser__kind, 1}, + { "_arrow_TimestampParser__format", (DL_FUNC) &_arrow_TimestampParser__format, 1}, + { "_arrow_TimestampParser__MakeStrptime", (DL_FUNC) &_arrow_TimestampParser__MakeStrptime, 1}, + { "_arrow_TimestampParser__MakeISO8601", (DL_FUNC) &_arrow_TimestampParser__MakeISO8601, 0}, + { "_arrow_dataset___Dataset__NewScan", (DL_FUNC) &_arrow_dataset___Dataset__NewScan, 1}, + { "_arrow_dataset___Dataset__schema", (DL_FUNC) &_arrow_dataset___Dataset__schema, 1}, + { "_arrow_dataset___Dataset__type_name", (DL_FUNC) &_arrow_dataset___Dataset__type_name, 1}, + { "_arrow_dataset___Dataset__ReplaceSchema", (DL_FUNC) &_arrow_dataset___Dataset__ReplaceSchema, 2}, + { "_arrow_dataset___UnionDataset__create", (DL_FUNC) &_arrow_dataset___UnionDataset__create, 2}, + { "_arrow_dataset___InMemoryDataset__create", (DL_FUNC) &_arrow_dataset___InMemoryDataset__create, 1}, + { "_arrow_dataset___UnionDataset__children", (DL_FUNC) &_arrow_dataset___UnionDataset__children, 1}, + { "_arrow_dataset___FileSystemDataset__format", (DL_FUNC) &_arrow_dataset___FileSystemDataset__format, 1}, + { "_arrow_dataset___FileSystemDataset__filesystem", (DL_FUNC) &_arrow_dataset___FileSystemDataset__filesystem, 1}, + { "_arrow_dataset___FileSystemDataset__files", (DL_FUNC) &_arrow_dataset___FileSystemDataset__files, 1}, + { "_arrow_dataset___DatasetFactory__Finish1", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish1, 2}, + { "_arrow_dataset___DatasetFactory__Finish2", (DL_FUNC) &_arrow_dataset___DatasetFactory__Finish2, 2}, + { "_arrow_dataset___DatasetFactory__Inspect", (DL_FUNC) &_arrow_dataset___DatasetFactory__Inspect, 2}, + { "_arrow_dataset___UnionDatasetFactory__Make", (DL_FUNC) &_arrow_dataset___UnionDatasetFactory__Make, 1}, + { "_arrow_dataset___FileSystemDatasetFactory__Make2", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make2, 4}, + { "_arrow_dataset___FileSystemDatasetFactory__Make1", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make1, 3}, + { "_arrow_dataset___FileSystemDatasetFactory__Make3", (DL_FUNC) &_arrow_dataset___FileSystemDatasetFactory__Make3, 4}, + { "_arrow_dataset___FileFormat__type_name", (DL_FUNC) &_arrow_dataset___FileFormat__type_name, 1}, + { "_arrow_dataset___FileFormat__DefaultWriteOptions", (DL_FUNC) &_arrow_dataset___FileFormat__DefaultWriteOptions, 1}, + { "_arrow_dataset___ParquetFileFormat__Make", (DL_FUNC) &_arrow_dataset___ParquetFileFormat__Make, 3}, + { "_arrow_dataset___FileWriteOptions__type_name", (DL_FUNC) &_arrow_dataset___FileWriteOptions__type_name, 1}, + { "_arrow_dataset___ParquetFileWriteOptions__update", (DL_FUNC) &_arrow_dataset___ParquetFileWriteOptions__update, 3}, + { "_arrow_dataset___IpcFileWriteOptions__update2", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update2, 4}, + { "_arrow_dataset___IpcFileWriteOptions__update1", (DL_FUNC) &_arrow_dataset___IpcFileWriteOptions__update1, 3}, + { "_arrow_dataset___IpcFileFormat__Make", (DL_FUNC) &_arrow_dataset___IpcFileFormat__Make, 0}, + { "_arrow_dataset___CsvFileFormat__Make", (DL_FUNC) &_arrow_dataset___CsvFileFormat__Make, 1}, + { "_arrow_dataset___DirectoryPartitioning", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning, 1}, + { "_arrow_dataset___DirectoryPartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___DirectoryPartitioning__MakeFactory, 1}, + { "_arrow_dataset___HivePartitioning", (DL_FUNC) &_arrow_dataset___HivePartitioning, 1}, + { "_arrow_dataset___HivePartitioning__MakeFactory", (DL_FUNC) &_arrow_dataset___HivePartitioning__MakeFactory, 0}, + { "_arrow_dataset___ScannerBuilder__Project", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Project, 2}, + { "_arrow_dataset___ScannerBuilder__Filter", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Filter, 2}, + { "_arrow_dataset___ScannerBuilder__UseThreads", (DL_FUNC) &_arrow_dataset___ScannerBuilder__UseThreads, 2}, + { "_arrow_dataset___ScannerBuilder__BatchSize", (DL_FUNC) &_arrow_dataset___ScannerBuilder__BatchSize, 2}, + { "_arrow_dataset___ScannerBuilder__schema", (DL_FUNC) &_arrow_dataset___ScannerBuilder__schema, 1}, + { "_arrow_dataset___ScannerBuilder__Finish", (DL_FUNC) &_arrow_dataset___ScannerBuilder__Finish, 1}, + { "_arrow_dataset___Scanner__ToTable", (DL_FUNC) &_arrow_dataset___Scanner__ToTable, 1}, + { "_arrow_dataset___Scanner__head", (DL_FUNC) &_arrow_dataset___Scanner__head, 2}, + { "_arrow_dataset___Scanner__Scan", (DL_FUNC) &_arrow_dataset___Scanner__Scan, 1}, + { "_arrow_dataset___Scanner__schema", (DL_FUNC) &_arrow_dataset___Scanner__schema, 1}, + { "_arrow_dataset___ScanTask__get_batches", (DL_FUNC) &_arrow_dataset___ScanTask__get_batches, 1}, + { "_arrow_dataset___Dataset__Write", (DL_FUNC) &_arrow_dataset___Dataset__Write, 6}, + { "_arrow_Int8__initialize", (DL_FUNC) &_arrow_Int8__initialize, 0}, + { "_arrow_Int16__initialize", (DL_FUNC) &_arrow_Int16__initialize, 0}, + { "_arrow_Int32__initialize", (DL_FUNC) &_arrow_Int32__initialize, 0}, + { "_arrow_Int64__initialize", (DL_FUNC) &_arrow_Int64__initialize, 0}, + { "_arrow_UInt8__initialize", (DL_FUNC) &_arrow_UInt8__initialize, 0}, + { "_arrow_UInt16__initialize", (DL_FUNC) &_arrow_UInt16__initialize, 0}, + { "_arrow_UInt32__initialize", (DL_FUNC) &_arrow_UInt32__initialize, 0}, + { "_arrow_UInt64__initialize", (DL_FUNC) &_arrow_UInt64__initialize, 0}, + { "_arrow_Float16__initialize", (DL_FUNC) &_arrow_Float16__initialize, 0}, + { "_arrow_Float32__initialize", (DL_FUNC) &_arrow_Float32__initialize, 0}, + { "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, + { "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, + { "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, + { "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, + { "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, + { "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, + { "_arrow_Date32__initialize", (DL_FUNC) &_arrow_Date32__initialize, 0}, + { "_arrow_Date64__initialize", (DL_FUNC) &_arrow_Date64__initialize, 0}, + { "_arrow_Null__initialize", (DL_FUNC) &_arrow_Null__initialize, 0}, + { "_arrow_Decimal128Type__initialize", (DL_FUNC) &_arrow_Decimal128Type__initialize, 2}, + { "_arrow_FixedSizeBinary__initialize", (DL_FUNC) &_arrow_FixedSizeBinary__initialize, 1}, + { "_arrow_Timestamp__initialize", (DL_FUNC) &_arrow_Timestamp__initialize, 2}, + { "_arrow_Time32__initialize", (DL_FUNC) &_arrow_Time32__initialize, 1}, + { "_arrow_Time64__initialize", (DL_FUNC) &_arrow_Time64__initialize, 1}, + { "_arrow_list__", (DL_FUNC) &_arrow_list__, 1}, + { "_arrow_large_list__", (DL_FUNC) &_arrow_large_list__, 1}, + { "_arrow_fixed_size_list__", (DL_FUNC) &_arrow_fixed_size_list__, 2}, + { "_arrow_struct__", (DL_FUNC) &_arrow_struct__, 1}, + { "_arrow_DataType__ToString", (DL_FUNC) &_arrow_DataType__ToString, 1}, + { "_arrow_DataType__name", (DL_FUNC) &_arrow_DataType__name, 1}, + { "_arrow_DataType__Equals", (DL_FUNC) &_arrow_DataType__Equals, 2}, + { "_arrow_DataType__num_fields", (DL_FUNC) &_arrow_DataType__num_fields, 1}, + { "_arrow_DataType__fields", (DL_FUNC) &_arrow_DataType__fields, 1}, + { "_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, + { "_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, + { "_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, + { "_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, + { "_arrow_TimeType__unit", (DL_FUNC) &_arrow_TimeType__unit, 1}, + { "_arrow_DecimalType__precision", (DL_FUNC) &_arrow_DecimalType__precision, 1}, + { "_arrow_DecimalType__scale", (DL_FUNC) &_arrow_DecimalType__scale, 1}, + { "_arrow_TimestampType__timezone", (DL_FUNC) &_arrow_TimestampType__timezone, 1}, + { "_arrow_TimestampType__unit", (DL_FUNC) &_arrow_TimestampType__unit, 1}, + { "_arrow_DictionaryType__initialize", (DL_FUNC) &_arrow_DictionaryType__initialize, 3}, + { "_arrow_DictionaryType__index_type", (DL_FUNC) &_arrow_DictionaryType__index_type, 1}, + { "_arrow_DictionaryType__value_type", (DL_FUNC) &_arrow_DictionaryType__value_type, 1}, + { "_arrow_DictionaryType__name", (DL_FUNC) &_arrow_DictionaryType__name, 1}, + { "_arrow_DictionaryType__ordered", (DL_FUNC) &_arrow_DictionaryType__ordered, 1}, + { "_arrow_StructType__GetFieldByName", (DL_FUNC) &_arrow_StructType__GetFieldByName, 2}, + { "_arrow_StructType__GetFieldIndex", (DL_FUNC) &_arrow_StructType__GetFieldIndex, 2}, + { "_arrow_StructType__field_names", (DL_FUNC) &_arrow_StructType__field_names, 1}, + { "_arrow_ListType__value_field", (DL_FUNC) &_arrow_ListType__value_field, 1}, + { "_arrow_ListType__value_type", (DL_FUNC) &_arrow_ListType__value_type, 1}, + { "_arrow_LargeListType__value_field", (DL_FUNC) &_arrow_LargeListType__value_field, 1}, + { "_arrow_LargeListType__value_type", (DL_FUNC) &_arrow_LargeListType__value_type, 1}, + { "_arrow_FixedSizeListType__value_field", (DL_FUNC) &_arrow_FixedSizeListType__value_field, 1}, + { "_arrow_FixedSizeListType__value_type", (DL_FUNC) &_arrow_FixedSizeListType__value_type, 1}, + { "_arrow_FixedSizeListType__list_size", (DL_FUNC) &_arrow_FixedSizeListType__list_size, 1}, + { "_arrow_dataset___expr__call", (DL_FUNC) &_arrow_dataset___expr__call, 3}, + { "_arrow_dataset___expr__field_ref", (DL_FUNC) &_arrow_dataset___expr__field_ref, 1}, + { "_arrow_dataset___expr__scalar", (DL_FUNC) &_arrow_dataset___expr__scalar, 1}, + { "_arrow_dataset___expr__ToString", (DL_FUNC) &_arrow_dataset___expr__ToString, 1}, + { "_arrow_ipc___WriteFeather__Table", (DL_FUNC) &_arrow_ipc___WriteFeather__Table, 6}, + { "_arrow_ipc___feather___Reader__version", (DL_FUNC) &_arrow_ipc___feather___Reader__version, 1}, + { "_arrow_ipc___feather___Reader__Read", (DL_FUNC) &_arrow_ipc___feather___Reader__Read, 2}, + { "_arrow_ipc___feather___Reader__Open", (DL_FUNC) &_arrow_ipc___feather___Reader__Open, 1}, + { "_arrow_ipc___feather___Reader__column_names", (DL_FUNC) &_arrow_ipc___feather___Reader__column_names, 1}, + { "_arrow_Field__initialize", (DL_FUNC) &_arrow_Field__initialize, 3}, + { "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, + { "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, + { "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 2}, + { "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, + { "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, + { "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, + { "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, + { "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, + { "_arrow_fs___FileInfo__set_path", (DL_FUNC) &_arrow_fs___FileInfo__set_path, 2}, + { "_arrow_fs___FileInfo__size", (DL_FUNC) &_arrow_fs___FileInfo__size, 1}, + { "_arrow_fs___FileInfo__set_size", (DL_FUNC) &_arrow_fs___FileInfo__set_size, 2}, + { "_arrow_fs___FileInfo__base_name", (DL_FUNC) &_arrow_fs___FileInfo__base_name, 1}, + { "_arrow_fs___FileInfo__extension", (DL_FUNC) &_arrow_fs___FileInfo__extension, 1}, + { "_arrow_fs___FileInfo__mtime", (DL_FUNC) &_arrow_fs___FileInfo__mtime, 1}, + { "_arrow_fs___FileInfo__set_mtime", (DL_FUNC) &_arrow_fs___FileInfo__set_mtime, 2}, + { "_arrow_fs___FileSelector__base_dir", (DL_FUNC) &_arrow_fs___FileSelector__base_dir, 1}, + { "_arrow_fs___FileSelector__allow_not_found", (DL_FUNC) &_arrow_fs___FileSelector__allow_not_found, 1}, + { "_arrow_fs___FileSelector__recursive", (DL_FUNC) &_arrow_fs___FileSelector__recursive, 1}, + { "_arrow_fs___FileSelector__create", (DL_FUNC) &_arrow_fs___FileSelector__create, 3}, + { "_arrow_fs___FileSystem__GetTargetInfos_Paths", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_Paths, 2}, + { "_arrow_fs___FileSystem__GetTargetInfos_FileSelector", (DL_FUNC) &_arrow_fs___FileSystem__GetTargetInfos_FileSelector, 2}, + { "_arrow_fs___FileSystem__CreateDir", (DL_FUNC) &_arrow_fs___FileSystem__CreateDir, 3}, + { "_arrow_fs___FileSystem__DeleteDir", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDir, 2}, + { "_arrow_fs___FileSystem__DeleteDirContents", (DL_FUNC) &_arrow_fs___FileSystem__DeleteDirContents, 2}, + { "_arrow_fs___FileSystem__DeleteFile", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFile, 2}, + { "_arrow_fs___FileSystem__DeleteFiles", (DL_FUNC) &_arrow_fs___FileSystem__DeleteFiles, 2}, + { "_arrow_fs___FileSystem__Move", (DL_FUNC) &_arrow_fs___FileSystem__Move, 3}, + { "_arrow_fs___FileSystem__CopyFile", (DL_FUNC) &_arrow_fs___FileSystem__CopyFile, 3}, + { "_arrow_fs___FileSystem__OpenInputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputStream, 2}, + { "_arrow_fs___FileSystem__OpenInputFile", (DL_FUNC) &_arrow_fs___FileSystem__OpenInputFile, 2}, + { "_arrow_fs___FileSystem__OpenOutputStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenOutputStream, 2}, + { "_arrow_fs___FileSystem__OpenAppendStream", (DL_FUNC) &_arrow_fs___FileSystem__OpenAppendStream, 2}, + { "_arrow_fs___FileSystem__type_name", (DL_FUNC) &_arrow_fs___FileSystem__type_name, 1}, + { "_arrow_fs___LocalFileSystem__create", (DL_FUNC) &_arrow_fs___LocalFileSystem__create, 0}, + { "_arrow_fs___SubTreeFileSystem__create", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__create, 2}, + { "_arrow_fs___SubTreeFileSystem__base_fs", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_fs, 1}, + { "_arrow_fs___SubTreeFileSystem__base_path", (DL_FUNC) &_arrow_fs___SubTreeFileSystem__base_path, 1}, + { "_arrow_fs___FileSystemFromUri", (DL_FUNC) &_arrow_fs___FileSystemFromUri, 1}, + { "_arrow_fs___CopyFiles", (DL_FUNC) &_arrow_fs___CopyFiles, 6}, + { "_arrow_fs___S3FileSystem__create", (DL_FUNC) &_arrow_fs___S3FileSystem__create, 12}, + { "_arrow_fs___S3FileSystem__region", (DL_FUNC) &_arrow_fs___S3FileSystem__region, 1}, + { "_arrow_io___Readable__Read", (DL_FUNC) &_arrow_io___Readable__Read, 2}, + { "_arrow_io___InputStream__Close", (DL_FUNC) &_arrow_io___InputStream__Close, 1}, + { "_arrow_io___OutputStream__Close", (DL_FUNC) &_arrow_io___OutputStream__Close, 1}, + { "_arrow_io___RandomAccessFile__GetSize", (DL_FUNC) &_arrow_io___RandomAccessFile__GetSize, 1}, + { "_arrow_io___RandomAccessFile__supports_zero_copy", (DL_FUNC) &_arrow_io___RandomAccessFile__supports_zero_copy, 1}, + { "_arrow_io___RandomAccessFile__Seek", (DL_FUNC) &_arrow_io___RandomAccessFile__Seek, 2}, + { "_arrow_io___RandomAccessFile__Tell", (DL_FUNC) &_arrow_io___RandomAccessFile__Tell, 1}, + { "_arrow_io___RandomAccessFile__Read0", (DL_FUNC) &_arrow_io___RandomAccessFile__Read0, 1}, + { "_arrow_io___RandomAccessFile__ReadAt", (DL_FUNC) &_arrow_io___RandomAccessFile__ReadAt, 3}, + { "_arrow_io___MemoryMappedFile__Create", (DL_FUNC) &_arrow_io___MemoryMappedFile__Create, 2}, + { "_arrow_io___MemoryMappedFile__Open", (DL_FUNC) &_arrow_io___MemoryMappedFile__Open, 2}, + { "_arrow_io___MemoryMappedFile__Resize", (DL_FUNC) &_arrow_io___MemoryMappedFile__Resize, 2}, + { "_arrow_io___ReadableFile__Open", (DL_FUNC) &_arrow_io___ReadableFile__Open, 1}, + { "_arrow_io___BufferReader__initialize", (DL_FUNC) &_arrow_io___BufferReader__initialize, 1}, + { "_arrow_io___Writable__write", (DL_FUNC) &_arrow_io___Writable__write, 2}, + { "_arrow_io___OutputStream__Tell", (DL_FUNC) &_arrow_io___OutputStream__Tell, 1}, + { "_arrow_io___FileOutputStream__Open", (DL_FUNC) &_arrow_io___FileOutputStream__Open, 1}, + { "_arrow_io___BufferOutputStream__Create", (DL_FUNC) &_arrow_io___BufferOutputStream__Create, 1}, + { "_arrow_io___BufferOutputStream__capacity", (DL_FUNC) &_arrow_io___BufferOutputStream__capacity, 1}, + { "_arrow_io___BufferOutputStream__Finish", (DL_FUNC) &_arrow_io___BufferOutputStream__Finish, 1}, + { "_arrow_io___BufferOutputStream__Tell", (DL_FUNC) &_arrow_io___BufferOutputStream__Tell, 1}, + { "_arrow_io___BufferOutputStream__Write", (DL_FUNC) &_arrow_io___BufferOutputStream__Write, 2}, + { "_arrow_json___ReadOptions__initialize", (DL_FUNC) &_arrow_json___ReadOptions__initialize, 2}, + { "_arrow_json___ParseOptions__initialize", (DL_FUNC) &_arrow_json___ParseOptions__initialize, 1}, + { "_arrow_json___TableReader__Make", (DL_FUNC) &_arrow_json___TableReader__Make, 3}, + { "_arrow_json___TableReader__Read", (DL_FUNC) &_arrow_json___TableReader__Read, 1}, + { "_arrow_MemoryPool__default", (DL_FUNC) &_arrow_MemoryPool__default, 0}, + { "_arrow_MemoryPool__bytes_allocated", (DL_FUNC) &_arrow_MemoryPool__bytes_allocated, 1}, + { "_arrow_MemoryPool__max_memory", (DL_FUNC) &_arrow_MemoryPool__max_memory, 1}, + { "_arrow_MemoryPool__backend_name", (DL_FUNC) &_arrow_MemoryPool__backend_name, 1}, + { "_arrow_supported_memory_backends", (DL_FUNC) &_arrow_supported_memory_backends, 0}, + { "_arrow_ipc___Message__body_length", (DL_FUNC) &_arrow_ipc___Message__body_length, 1}, + { "_arrow_ipc___Message__metadata", (DL_FUNC) &_arrow_ipc___Message__metadata, 1}, + { "_arrow_ipc___Message__body", (DL_FUNC) &_arrow_ipc___Message__body, 1}, + { "_arrow_ipc___Message__Verify", (DL_FUNC) &_arrow_ipc___Message__Verify, 1}, + { "_arrow_ipc___Message__type", (DL_FUNC) &_arrow_ipc___Message__type, 1}, + { "_arrow_ipc___Message__Equals", (DL_FUNC) &_arrow_ipc___Message__Equals, 2}, + { "_arrow_ipc___ReadRecordBatch__Message__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__Message__Schema, 2}, + { "_arrow_ipc___ReadSchema_InputStream", (DL_FUNC) &_arrow_ipc___ReadSchema_InputStream, 1}, + { "_arrow_ipc___ReadSchema_Message", (DL_FUNC) &_arrow_ipc___ReadSchema_Message, 1}, + { "_arrow_ipc___MessageReader__Open", (DL_FUNC) &_arrow_ipc___MessageReader__Open, 1}, + { "_arrow_ipc___MessageReader__ReadNextMessage", (DL_FUNC) &_arrow_ipc___MessageReader__ReadNextMessage, 1}, + { "_arrow_ipc___ReadMessage", (DL_FUNC) &_arrow_ipc___ReadMessage, 1}, + { "_arrow_parquet___arrow___ArrowReaderProperties__Make", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__Make, 1}, + { "_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_use_threads, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_use_threads, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__get_read_dictionary, 2}, + { "_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary", (DL_FUNC) &_arrow_parquet___arrow___ArrowReaderProperties__set_read_dictionary, 3}, + { "_arrow_parquet___arrow___FileReader__OpenFile", (DL_FUNC) &_arrow_parquet___arrow___FileReader__OpenFile, 2}, + { "_arrow_parquet___arrow___FileReader__ReadTable1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable1, 1}, + { "_arrow_parquet___arrow___FileReader__ReadTable2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadTable2, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroup1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup1, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroup2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroup2, 3}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroups1", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups1, 2}, + { "_arrow_parquet___arrow___FileReader__ReadRowGroups2", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadRowGroups2, 3}, + { "_arrow_parquet___arrow___FileReader__num_rows", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_rows, 1}, + { "_arrow_parquet___arrow___FileReader__num_columns", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_columns, 1}, + { "_arrow_parquet___arrow___FileReader__num_row_groups", (DL_FUNC) &_arrow_parquet___arrow___FileReader__num_row_groups, 1}, + { "_arrow_parquet___arrow___FileReader__ReadColumn", (DL_FUNC) &_arrow_parquet___arrow___FileReader__ReadColumn, 2}, + { "_arrow_parquet___ArrowWriterProperties___create", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___create, 3}, + { "_arrow_parquet___WriterProperties___Builder__create", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__create, 0}, + { "_arrow_parquet___WriterProperties___Builder__version", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__version, 2}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_compressions", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compressions, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_compression_levels, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_use_dictionary, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__set_write_statistics, 3}, + { "_arrow_parquet___ArrowWriterProperties___Builder__data_page_size", (DL_FUNC) &_arrow_parquet___ArrowWriterProperties___Builder__data_page_size, 2}, + { "_arrow_parquet___WriterProperties___Builder__build", (DL_FUNC) &_arrow_parquet___WriterProperties___Builder__build, 1}, + { "_arrow_parquet___arrow___ParquetFileWriter__Open", (DL_FUNC) &_arrow_parquet___arrow___ParquetFileWriter__Open, 4}, + { "_arrow_parquet___arrow___FileWriter__WriteTable", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__WriteTable, 3}, + { "_arrow_parquet___arrow___FileWriter__Close", (DL_FUNC) &_arrow_parquet___arrow___FileWriter__Close, 1}, + { "_arrow_parquet___arrow___WriteTable", (DL_FUNC) &_arrow_parquet___arrow___WriteTable, 4}, + { "_arrow_parquet___arrow___FileReader__GetSchema", (DL_FUNC) &_arrow_parquet___arrow___FileReader__GetSchema, 1}, + { "_arrow_ImportArray", (DL_FUNC) &_arrow_ImportArray, 2}, + { "_arrow_ImportRecordBatch", (DL_FUNC) &_arrow_ImportRecordBatch, 2}, + { "_arrow_ImportSchema", (DL_FUNC) &_arrow_ImportSchema, 1}, + { "_arrow_allocate_arrow_schema", (DL_FUNC) &_arrow_allocate_arrow_schema, 0}, + { "_arrow_delete_arrow_schema", (DL_FUNC) &_arrow_delete_arrow_schema, 1}, + { "_arrow_allocate_arrow_array", (DL_FUNC) &_arrow_allocate_arrow_array, 0}, + { "_arrow_delete_arrow_array", (DL_FUNC) &_arrow_delete_arrow_array, 1}, + { "_arrow_ExportType", (DL_FUNC) &_arrow_ExportType, 2}, + { "_arrow_ExportSchema", (DL_FUNC) &_arrow_ExportSchema, 2}, + { "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, + { "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, + { "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, + { "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, + { "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, + { "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, + { "_arrow_RecordBatch__RenameColumns", (DL_FUNC) &_arrow_RecordBatch__RenameColumns, 2}, + { "_arrow_RecordBatch__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_RecordBatch__ReplaceSchemaMetadata, 2}, + { "_arrow_RecordBatch__columns", (DL_FUNC) &_arrow_RecordBatch__columns, 1}, + { "_arrow_RecordBatch__column", (DL_FUNC) &_arrow_RecordBatch__column, 2}, + { "_arrow_RecordBatch__GetColumnByName", (DL_FUNC) &_arrow_RecordBatch__GetColumnByName, 2}, + { "_arrow_RecordBatch__SelectColumns", (DL_FUNC) &_arrow_RecordBatch__SelectColumns, 2}, + { "_arrow_RecordBatch__Equals", (DL_FUNC) &_arrow_RecordBatch__Equals, 3}, + { "_arrow_RecordBatch__AddColumn", (DL_FUNC) &_arrow_RecordBatch__AddColumn, 4}, + { "_arrow_RecordBatch__SetColumn", (DL_FUNC) &_arrow_RecordBatch__SetColumn, 4}, + { "_arrow_RecordBatch__RemoveColumn", (DL_FUNC) &_arrow_RecordBatch__RemoveColumn, 2}, + { "_arrow_RecordBatch__column_name", (DL_FUNC) &_arrow_RecordBatch__column_name, 2}, + { "_arrow_RecordBatch__names", (DL_FUNC) &_arrow_RecordBatch__names, 1}, + { "_arrow_RecordBatch__Slice1", (DL_FUNC) &_arrow_RecordBatch__Slice1, 2}, + { "_arrow_RecordBatch__Slice2", (DL_FUNC) &_arrow_RecordBatch__Slice2, 3}, + { "_arrow_ipc___SerializeRecordBatch__Raw", (DL_FUNC) &_arrow_ipc___SerializeRecordBatch__Raw, 1}, + { "_arrow_ipc___ReadRecordBatch__InputStream__Schema", (DL_FUNC) &_arrow_ipc___ReadRecordBatch__InputStream__Schema, 2}, + { "_arrow_RecordBatch__from_arrays", (DL_FUNC) &_arrow_RecordBatch__from_arrays, 2}, + { "_arrow_RecordBatchReader__schema", (DL_FUNC) &_arrow_RecordBatchReader__schema, 1}, + { "_arrow_RecordBatchReader__ReadNext", (DL_FUNC) &_arrow_RecordBatchReader__ReadNext, 1}, + { "_arrow_ipc___RecordBatchStreamReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__Open, 1}, + { "_arrow_ipc___RecordBatchStreamReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchStreamReader__batches, 1}, + { "_arrow_ipc___RecordBatchFileReader__schema", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__schema, 1}, + { "_arrow_ipc___RecordBatchFileReader__num_record_batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__num_record_batches, 1}, + { "_arrow_ipc___RecordBatchFileReader__ReadRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__ReadRecordBatch, 2}, + { "_arrow_ipc___RecordBatchFileReader__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__Open, 1}, + { "_arrow_Table__from_RecordBatchReader", (DL_FUNC) &_arrow_Table__from_RecordBatchReader, 1}, + { "_arrow_Table__from_RecordBatchFileReader", (DL_FUNC) &_arrow_Table__from_RecordBatchFileReader, 1}, + { "_arrow_ipc___RecordBatchFileReader__batches", (DL_FUNC) &_arrow_ipc___RecordBatchFileReader__batches, 1}, + { "_arrow_ipc___RecordBatchWriter__WriteRecordBatch", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteRecordBatch, 2}, + { "_arrow_ipc___RecordBatchWriter__WriteTable", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__WriteTable, 2}, + { "_arrow_ipc___RecordBatchWriter__Close", (DL_FUNC) &_arrow_ipc___RecordBatchWriter__Close, 1}, + { "_arrow_ipc___RecordBatchFileWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchFileWriter__Open, 4}, + { "_arrow_ipc___RecordBatchStreamWriter__Open", (DL_FUNC) &_arrow_ipc___RecordBatchStreamWriter__Open, 4}, + { "_arrow_Array__GetScalar", (DL_FUNC) &_arrow_Array__GetScalar, 2}, + { "_arrow_Scalar__ToString", (DL_FUNC) &_arrow_Scalar__ToString, 1}, + { "_arrow_StructScalar__field", (DL_FUNC) &_arrow_StructScalar__field, 2}, + { "_arrow_StructScalar__GetFieldByName", (DL_FUNC) &_arrow_StructScalar__GetFieldByName, 2}, + { "_arrow_Scalar__as_vector", (DL_FUNC) &_arrow_Scalar__as_vector, 1}, + { "_arrow_Scalar__is_valid", (DL_FUNC) &_arrow_Scalar__is_valid, 1}, + { "_arrow_Scalar__type", (DL_FUNC) &_arrow_Scalar__type, 1}, + { "_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1}, + { "_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1}, + { "_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1}, + { "_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2}, + { "_arrow_Schema__GetFieldByName", (DL_FUNC) &_arrow_Schema__GetFieldByName, 2}, + { "_arrow_Schema__fields", (DL_FUNC) &_arrow_Schema__fields, 1}, + { "_arrow_Schema__field_names", (DL_FUNC) &_arrow_Schema__field_names, 1}, + { "_arrow_Schema__HasMetadata", (DL_FUNC) &_arrow_Schema__HasMetadata, 1}, + { "_arrow_Schema__metadata", (DL_FUNC) &_arrow_Schema__metadata, 1}, + { "_arrow_Schema__WithMetadata", (DL_FUNC) &_arrow_Schema__WithMetadata, 2}, + { "_arrow_Schema__serialize", (DL_FUNC) &_arrow_Schema__serialize, 1}, + { "_arrow_Schema__Equals", (DL_FUNC) &_arrow_Schema__Equals, 3}, + { "_arrow_arrow__UnifySchemas", (DL_FUNC) &_arrow_arrow__UnifySchemas, 1}, + { "_arrow_Table__num_columns", (DL_FUNC) &_arrow_Table__num_columns, 1}, + { "_arrow_Table__num_rows", (DL_FUNC) &_arrow_Table__num_rows, 1}, + { "_arrow_Table__schema", (DL_FUNC) &_arrow_Table__schema, 1}, + { "_arrow_Table__ReplaceSchemaMetadata", (DL_FUNC) &_arrow_Table__ReplaceSchemaMetadata, 2}, + { "_arrow_Table__column", (DL_FUNC) &_arrow_Table__column, 2}, + { "_arrow_Table__field", (DL_FUNC) &_arrow_Table__field, 2}, + { "_arrow_Table__columns", (DL_FUNC) &_arrow_Table__columns, 1}, + { "_arrow_Table__ColumnNames", (DL_FUNC) &_arrow_Table__ColumnNames, 1}, + { "_arrow_Table__RenameColumns", (DL_FUNC) &_arrow_Table__RenameColumns, 2}, + { "_arrow_Table__Slice1", (DL_FUNC) &_arrow_Table__Slice1, 2}, + { "_arrow_Table__Slice2", (DL_FUNC) &_arrow_Table__Slice2, 3}, + { "_arrow_Table__Equals", (DL_FUNC) &_arrow_Table__Equals, 3}, + { "_arrow_Table__Validate", (DL_FUNC) &_arrow_Table__Validate, 1}, + { "_arrow_Table__ValidateFull", (DL_FUNC) &_arrow_Table__ValidateFull, 1}, + { "_arrow_Table__GetColumnByName", (DL_FUNC) &_arrow_Table__GetColumnByName, 2}, + { "_arrow_Table__RemoveColumn", (DL_FUNC) &_arrow_Table__RemoveColumn, 2}, + { "_arrow_Table__AddColumn", (DL_FUNC) &_arrow_Table__AddColumn, 4}, + { "_arrow_Table__SetColumn", (DL_FUNC) &_arrow_Table__SetColumn, 4}, + { "_arrow_Table__SelectColumns", (DL_FUNC) &_arrow_Table__SelectColumns, 2}, + { "_arrow_all_record_batches", (DL_FUNC) &_arrow_all_record_batches, 1}, + { "_arrow_Table__from_record_batches", (DL_FUNC) &_arrow_Table__from_record_batches, 2}, + { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, + { "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, + { "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, + { "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, + { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, {NULL, NULL, 0} }; diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 5a7d115f54b..1ed7fbfc6ee 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -754,6 +754,7 @@ template class RDictionaryConverter> : public DictionaryConverter { public: + Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); @@ -770,6 +771,19 @@ class RDictionaryConverter> auto append_null = [this]() { return this->value_builder_->AppendNull(); }; return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } + + virtual Result> ToArray() override { + ARROW_ASSIGN_OR_RAISE(auto result, + this->builder_->Finish()); + + auto result_type = checked_cast(result->type().get()); + if (this->dict_type_->ordered() && !result_type->ordered()) { + return Status::Invalid("converter api seems to lose dictionary orderness"); + } + + return result; + } + }; template @@ -818,13 +832,32 @@ struct RConverterTrait { class RStructConverter : public StructConverter { public: Status Extend(SEXP x, int64_t size) override { - RETURN_NOT_OK(this->Reserve(size)); - + // check that x is compatible R_xlen_t n_columns = XLENGTH(x); if (!Rf_inherits(x, "data.frame")) { return Status::Invalid("Can only convert data frames to Struct type"); } + auto fields = this->struct_type_->fields(); + if (n_columns != fields.size()) { + return Status::RError("Number of fields in struct (", fields.size(), + ") incompatible with number of columns in the data frame (", + n_columns, ")"); + } + + cpp11::strings x_names = Rf_getAttrib(x, R_NamesSymbol); + for (R_xlen_t i = 0; i < n_columns; i++) { + std::string name(x_names[i]); + if (name != fields[i]->name()) { + return Status::RError( + "Field name in position ", i, " (", fields[i]->name(), + ") does not match the name of the column of the data frame (", name, ")"); + } + } + + RETURN_NOT_OK(this->Reserve(size)); + + for (R_xlen_t i = 0; i < size; i++) { RETURN_NOT_OK(struct_builder_->Append()); } diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 03c4f379fd1..f82ff4ec6f3 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -231,6 +231,8 @@ test_that("Array supports unordered factors (ARROW-3355)", { }) test_that("Array supports ordered factors (ARROW-3355)", { + skip("until converter api handles them") + # without NA f <- ordered(c("itsy", "bitsy", "spider", "spider")) arr_fac <- expect_array_roundtrip(f, dictionary(int8(), utf8(), ordered = TRUE)) @@ -385,24 +387,23 @@ test_that("Array$create() supports the type= argument. conversion from INTSXP an }) test_that("Array$create() aborts on overflow", { - msg <- "Invalid.*Value is too large" - expect_error(Array$create(128L, type = int8()), msg) - expect_error(Array$create(-129L, type = int8()), msg) + expect_error(Array$create(128L, type = int8())) + expect_error(Array$create(-129L, type = int8())) - expect_error(Array$create(256L, type = uint8()), msg) - expect_error(Array$create(-1L, type = uint8()), msg) + expect_error(Array$create(256L, type = uint8())) + expect_error(Array$create(-1L, type = uint8())) - expect_error(Array$create(32768L, type = int16()), msg) - expect_error(Array$create(-32769L, type = int16()), msg) + expect_error(Array$create(32768L, type = int16())) + expect_error(Array$create(-32769L, type = int16())) - expect_error(Array$create(65536L, type = uint16()), msg) - expect_error(Array$create(-1L, type = uint16()), msg) + expect_error(Array$create(65536L, type = uint16())) + expect_error(Array$create(-1L, type = uint16())) - expect_error(Array$create(65536L, type = uint16()), msg) - expect_error(Array$create(-1L, type = uint16()), msg) + expect_error(Array$create(65536L, type = uint16())) + expect_error(Array$create(-1L, type = uint16())) - expect_error(Array$create(bit64::as.integer64(2^31), type = int32()), msg) - expect_error(Array$create(bit64::as.integer64(2^32), type = uint32()), msg) + expect_error(Array$create(bit64::as.integer64(2^31), type = int32())) + expect_error(Array$create(bit64::as.integer64(2^32), type = uint32())) }) test_that("Array$create() does not convert doubles to integer", { From f336eab35dc6b1491fe6e65a23fcad6bd685683c Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 10:00:12 +0100 Subject: [PATCH 49/82] lint --- r/src/array_from_vector.cpp | 2 +- r/src/r_to_arrow.cpp | 22 +++++++++------------- 2 files changed, 10 insertions(+), 14 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index b96a8a9976a..eea4aab3d6d 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -1382,7 +1382,7 @@ arrow::Status CheckCompatibleStruct(SEXP obj, std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred) { // new api - return vec_to_arrow(x, type, type_inferred); + // return vec_to_arrow(x, type, type_inferred); // short circuit if `x` is already an Array if (Rf_inherits(x, "Array")) { diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 1ed7fbfc6ee..a21ad8663a5 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -754,7 +754,6 @@ template class RDictionaryConverter> : public DictionaryConverter { public: - Status Extend(SEXP x, int64_t size) override { RETURN_NOT_OK(this->Reserve(size)); @@ -772,18 +771,16 @@ class RDictionaryConverter> return RVectorVisitor::Visit(x, 0, size, append_null, append_value); } - virtual Result> ToArray() override { - ARROW_ASSIGN_OR_RAISE(auto result, - this->builder_->Finish()); - - auto result_type = checked_cast(result->type().get()); - if (this->dict_type_->ordered() && !result_type->ordered()) { - return Status::Invalid("converter api seems to lose dictionary orderness"); - } + Result> ToArray() override { + ARROW_ASSIGN_OR_RAISE(auto result, this->builder_->Finish()); - return result; + auto result_type = checked_cast(result->type().get()); + if (this->dict_type_->ordered() && !result_type->ordered()) { + return Status::Invalid("converter api seems to lose dictionary orderness"); } + return result; + } }; template @@ -850,14 +847,13 @@ class RStructConverter : public StructConverter { std::string name(x_names[i]); if (name != fields[i]->name()) { return Status::RError( - "Field name in position ", i, " (", fields[i]->name(), - ") does not match the name of the column of the data frame (", name, ")"); + "Field name in position ", i, " (", fields[i]->name(), + ") does not match the name of the column of the data frame (", name, ")"); } } RETURN_NOT_OK(this->Reserve(size)); - for (R_xlen_t i = 0; i < size; i++) { RETURN_NOT_OK(struct_builder_->Append()); } From b15d96da8cd2731ec5fbfda373023f13d8b29aaf Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 10:46:03 +0100 Subject: [PATCH 50/82] change of message --- r/tests/testthat/test-Array.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index f82ff4ec6f3..464b81cd689 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -484,7 +484,7 @@ test_that("Array$create() can handle data frame with custom struct type (not inf expect_error(Array$create(df, type = type), regexp = "Field name in position.*does not match the name of the column of the data frame") type <- struct(x = float64(), y = utf8()) - expect_error(Array$create(df, type = type), regexp = "Expecting a character vector") + expect_error(Array$create(df, type = type), regexp = "Invalid") }) test_that("Array$create() supports tibble with no columns (ARROW-8354)", { From 089c932f79ecbf13ae6301cebc26310bad63575e Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 10:46:28 +0100 Subject: [PATCH 51/82] needs to Append() on each list element --- r/src/r_to_arrow.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index a21ad8663a5..35c11c9b63b 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -812,6 +812,7 @@ class RListConverter : public ListConverter { auto append_value = [this](SEXP value) { R_xlen_t n = XLENGTH(value); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n)); + RETURN_NOT_OK(this->list_builder_->Append()); return this->value_converter_.get()->Extend(value, n); }; auto append_null = [this]() { return this->list_builder_->AppendNull(); }; From 2b4c1ae988774ab8c11758a8f364d729ddbb3939 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 11:26:48 +0100 Subject: [PATCH 52/82] use vctrs::short_vec_size() because value may be a data frame and XLENGTH() does not give the right result here --- r/src/r_to_arrow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 35c11c9b63b..830c86f62ee 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -810,7 +810,7 @@ class RListConverter : public ListConverter { } auto append_value = [this](SEXP value) { - R_xlen_t n = XLENGTH(value); + auto n = vctrs::short_vec_size(value); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n)); RETURN_NOT_OK(this->list_builder_->Append()); return this->value_converter_.get()->Extend(value, n); From fede379f9d8151aa1c689be12293dbdc61086d31 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 11:51:03 +0100 Subject: [PATCH 53/82] look out for degenerated data frames --- r/src/r_to_arrow.cpp | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 830c86f62ee..c3752619d18 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -434,7 +434,7 @@ class RPrimitiveConverter::value>> Status Extend(SEXP x, int64_t size) override { auto rtype = GetVectorType(x); if (rtype != BOOLEAN) { - return Status::Invalid("cannot convert"); + return Status::Invalid("Expecting a logical vector"); } RETURN_NOT_OK(this->Reserve(size)); @@ -853,6 +853,13 @@ class RStructConverter : public StructConverter { } } + for (R_xlen_t i = 0; i < n_columns; i++) { + SEXP x_i = VECTOR_ELT(x, i); + if (vctrs::short_vec_size(x_i) < size) { + return Status::RError("Degenerated data frame"); + } + } + RETURN_NOT_OK(this->Reserve(size)); for (R_xlen_t i = 0; i < size; i++) { From aac90798c45183ab7e6c29baae488978a295ef99 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 12:00:28 +0100 Subject: [PATCH 54/82] update error message to match old api --- r/src/r_to_arrow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index c3752619d18..e1838f4e8a0 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -694,7 +694,7 @@ class RPrimitiveConverter> int64_t start = 0; RVectorType rtype = GetVectorType(x); if (rtype != STRING) { - return Status::Invalid("invalid R type to convert to string"); + return Status::Invalid("Expecting a character vector"); } cpp11::strings s(arrow::r::utf8_strings(x)); From 91afc374e1a199c82c47b0cc6693b6596c9151dd Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 14:25:15 +0100 Subject: [PATCH 55/82] remove tests that no longer fail --- r/tests/testthat/test-Array.R | 3 --- 1 file changed, 3 deletions(-) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 464b81cd689..14621fb3ee1 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -652,8 +652,6 @@ test_that("Handling string data with embedded nuls", { test_that("Array$create() should have helpful error", { expect_error(Array$create(list(numeric(0)), list_of(bool())), "Expecting a logical vector") - expect_error(Array$create(list(numeric(0)), list_of(int32())), "Expecting an integer vector") - expect_error(Array$create(list(integer(0)), list_of(float64())), "Expecting a numeric vector") lgl <- logical(0) int <- integer(0) @@ -662,7 +660,6 @@ test_that("Array$create() should have helpful error", { expect_error(Array$create(list()), "Requires at least one element to infer") expect_error(Array$create(list(lgl, lgl, int)), "Expecting a logical vector") expect_error(Array$create(list(char, num, char)), "Expecting a character vector") - expect_error(Array$create(list(int, int, num)), "Expecting an integer vector") }) test_that("Array$View() (ARROW-6542)", { From a4ab89fcc50575e246ca5108e8c7dbda50854745 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 16:34:27 +0100 Subject: [PATCH 56/82] insert levels first into memo when ingesting factors --- r/src/r_to_arrow.cpp | 15 +++++++++++---- r/tests/testthat/test-chunked-array.R | 24 ++++++++++++------------ 2 files changed, 23 insertions(+), 16 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index e1838f4e8a0..c56014c42f5 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -750,11 +750,19 @@ class RDictionaryConverter> } }; -template -class RDictionaryConverter> - : public DictionaryConverter { +template +class RDictionaryConverter> + : public DictionaryConverter { public: + using BuilderType = DictionaryBuilder; + Status Extend(SEXP x, int64_t size) override { + // first we need to handle the levels + cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol)); + auto memo_array = arrow::r::vec_to_arrow(levels, utf8(), false); + RETURN_NOT_OK(this->value_builder_->InsertMemoValues(*memo_array)); + + // then we can proceed RETURN_NOT_OK(this->Reserve(size)); RVectorType rtype = GetVectorType(x); @@ -762,7 +770,6 @@ class RDictionaryConverter> return Status::Invalid("invalid R type to convert to dictionary"); } - cpp11::strings levels(Rf_getAttrib(x, R_LevelsSymbol)); auto append_value = [this, levels](int value) { SEXP s = STRING_ELT(levels, value - 1); return this->value_builder_->Append(CHAR(s)); diff --git a/r/tests/testthat/test-chunked-array.R b/r/tests/testthat/test-chunked-array.R index 792e140c137..a5ff6ef4812 100644 --- a/r/tests/testthat/test-chunked-array.R +++ b/r/tests/testthat/test-chunked-array.R @@ -220,23 +220,23 @@ test_that("chunked_array() supports the type= argument. conversion from INTSXP a }) test_that("ChunkedArray$create() aborts on overflow", { - expect_error(chunked_array(128L, type = int8())$type, "Invalid.*Value is too large") - expect_error(chunked_array(-129L, type = int8())$type, "Invalid.*Value is too large") + expect_error(chunked_array(128L, type = int8())$type) + expect_error(chunked_array(-129L, type = int8())$type) - expect_error(chunked_array(256L, type = uint8())$type, "Invalid.*Value is too large") - expect_error(chunked_array(-1L, type = uint8())$type, "Invalid.*Value is too large") + expect_error(chunked_array(256L, type = uint8())$type) + expect_error(chunked_array(-1L, type = uint8())$type) - expect_error(chunked_array(32768L, type = int16())$type, "Invalid.*Value is too large") - expect_error(chunked_array(-32769L, type = int16())$type, "Invalid.*Value is too large") + expect_error(chunked_array(32768L, type = int16())$type) + expect_error(chunked_array(-32769L, type = int16())$type) - expect_error(chunked_array(65536L, type = uint16())$type, "Invalid.*Value is too large") - expect_error(chunked_array(-1L, type = uint16())$type, "Invalid.*Value is too large") + expect_error(chunked_array(65536L, type = uint16())$type) + expect_error(chunked_array(-1L, type = uint16())$type) - expect_error(chunked_array(65536L, type = uint16())$type, "Invalid.*Value is too large") - expect_error(chunked_array(-1L, type = uint16())$type, "Invalid.*Value is too large") + expect_error(chunked_array(65536L, type = uint16())$type) + expect_error(chunked_array(-1L, type = uint16())$type) - expect_error(chunked_array(bit64::as.integer64(2^31), type = int32()), "Invalid.*Value is too large") - expect_error(chunked_array(bit64::as.integer64(2^32), type = uint32()), "Invalid.*Value is too large") + expect_error(chunked_array(bit64::as.integer64(2^31), type = int32())) + expect_error(chunked_array(bit64::as.integer64(2^32), type = uint32())) }) test_that("chunked_array() convert doubles to integers", { From 33f8cd7aab93ab9bfe5c3db7ddde24a131227092 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 5 Feb 2021 17:54:10 +0100 Subject: [PATCH 57/82] re-enable the POSIXlt to strut type thing --- r/src/r_to_arrow.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index c56014c42f5..6809dad58bf 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -67,6 +67,7 @@ enum RVectorType { DATE_DBL, TIME, POSIXCT, + POSIXLT, BINARY, LIST, FACTOR, @@ -112,6 +113,10 @@ RVectorType GetVectorType(SEXP x) { return DATAFRAME; } + if (Rf_inherits(x, "POSIXlt")) { + return POSIXLT; + } + if (Rf_inherits(x, "arrow_binary")) { return BINARY; } @@ -839,7 +844,8 @@ class RStructConverter : public StructConverter { Status Extend(SEXP x, int64_t size) override { // check that x is compatible R_xlen_t n_columns = XLENGTH(x); - if (!Rf_inherits(x, "data.frame")) { + + if (!Rf_inherits(x, "data.frame") && !Rf_inherits(x, "POSIXlt")) { return Status::Invalid("Can only convert data frames to Struct type"); } From f2d4f8a5747c6806859550f6c34984be00a066e1 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 8 Feb 2021 12:01:19 +0100 Subject: [PATCH 58/82] tweak error message when Extend() fails on a column of a sruct converter --- r/src/r_to_arrow.cpp | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 6809dad58bf..357277ff21d 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -880,7 +880,10 @@ class RStructConverter : public StructConverter { } for (R_xlen_t i = 0; i < n_columns; i++) { - RETURN_NOT_OK(children_[i]->Extend(VECTOR_ELT(x, i), size)); + auto status = children_[i]->Extend(VECTOR_ELT(x, i), size); + if (!status.ok()) { + return Status::Invalid("Problem with column ", (i + 1), " (", fields[i]->name(), "): ", status.ToString()); + } } return Status::OK(); From 24d33347cdfa4465b7088a1a7ed4ad941356e8c8 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 8 Feb 2021 12:35:58 +0100 Subject: [PATCH 59/82] handle ordered dictionaries --- r/src/array_from_vector.cpp | 2 +- r/src/r_to_arrow.cpp | 4 +++- r/tests/testthat/test-Array.R | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index eea4aab3d6d..b96a8a9976a 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -1382,7 +1382,7 @@ arrow::Status CheckCompatibleStruct(SEXP obj, std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred) { // new api - // return vec_to_arrow(x, type, type_inferred); + return vec_to_arrow(x, type, type_inferred); // short circuit if `x` is already an Array if (Rf_inherits(x, "Array")) { diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 357277ff21d..b1bfca78664 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -788,7 +788,9 @@ class RDictionaryConverter> auto result_type = checked_cast(result->type().get()); if (this->dict_type_->ordered() && !result_type->ordered()) { - return Status::Invalid("converter api seems to lose dictionary orderness"); + // TODO: we should not have to do that, there is probably something wrong + // in the DictionaryBuilder code + result->data()->type = arrow::dictionary(result_type->index_type(), result_type->value_type(), true); } return result; diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 14621fb3ee1..5cf0b0dad0f 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -231,8 +231,6 @@ test_that("Array supports unordered factors (ARROW-3355)", { }) test_that("Array supports ordered factors (ARROW-3355)", { - skip("until converter api handles them") - # without NA f <- ordered(c("itsy", "bitsy", "spider", "spider")) arr_fac <- expect_array_roundtrip(f, dictionary(int8(), utf8(), ordered = TRUE)) From 86a4d3a9ec1889b76e803218a6f0366e9199feb8 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 8 Feb 2021 13:50:02 +0100 Subject: [PATCH 60/82] lint --- r/src/r_to_arrow.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index b1bfca78664..f964c14b82b 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -790,7 +790,8 @@ class RDictionaryConverter> if (this->dict_type_->ordered() && !result_type->ordered()) { // TODO: we should not have to do that, there is probably something wrong // in the DictionaryBuilder code - result->data()->type = arrow::dictionary(result_type->index_type(), result_type->value_type(), true); + result->data()->type = + arrow::dictionary(result_type->index_type(), result_type->value_type(), true); } return result; @@ -884,7 +885,8 @@ class RStructConverter : public StructConverter { for (R_xlen_t i = 0; i < n_columns; i++) { auto status = children_[i]->Extend(VECTOR_ELT(x, i), size); if (!status.ok()) { - return Status::Invalid("Problem with column ", (i + 1), " (", fields[i]->name(), "): ", status.ToString()); + return Status::Invalid("Problem with column ", (i + 1), " (", fields[i]->name(), + "): ", status.ToString()); } } From a25c29907e5c58901af72c9391b41fdb76751fa2 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 8 Feb 2021 14:43:29 +0100 Subject: [PATCH 61/82] Visit always starts at 0, so remove start parameter --- r/src/r_to_arrow.cpp | 64 ++++++++++++++++++++++---------------------- 1 file changed, 32 insertions(+), 32 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index f964c14b82b..ef7f04fa606 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -173,10 +173,10 @@ struct RVectorVisitor { typename std::conditional::value, double, T>::type; template - static Status Visit(SEXP x, R_xlen_t start, R_xlen_t size, AppendNull&& append_null, + static Status Visit(SEXP x, int64_t size, AppendNull&& append_null, AppendValue&& append_value) { cpp11::r_vector values(x); - auto it = values.begin() + start; + auto it = values.begin(); for (R_xlen_t i = 0; i < size; i++, ++it) { auto value = GetValue(*it); @@ -318,13 +318,13 @@ class RPrimitiveConverter< auto rtype = GetVectorType(x); switch (rtype) { case UINT8: - return AppendRangeDispatch(x, 0, size); + return AppendRangeDispatch(x, size); case INT32: - return AppendRangeDispatch(x, 0, size); + return AppendRangeDispatch(x, size); case FLOAT64: - return AppendRangeDispatch(x, 0, size); + return AppendRangeDispatch(x, size); case INT64: - return AppendRangeDispatch(x, 0, size); + return AppendRangeDispatch(x, size); default: break; @@ -335,7 +335,7 @@ class RPrimitiveConverter< private: template - Status AppendRangeLoopDifferentType(SEXP x, R_xlen_t start, R_xlen_t size) { + Status AppendRangeLoopDifferentType(SEXP x, int64_t size) { RETURN_NOT_OK(this->Reserve(size)); auto append_value = [this](r_value_type value) { @@ -348,12 +348,12 @@ class RPrimitiveConverter< this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } template - Status AppendRangeSameTypeNotALTREP(SEXP x, R_xlen_t start, R_xlen_t size) { - auto p = reinterpret_cast(DATAPTR_RO(x)) + start; + Status AppendRangeSameTypeNotALTREP(SEXP x, int64_t size) { + auto p = reinterpret_cast(DATAPTR_RO(x)); auto p_end = p + size; auto first_na = std::find_if(p, p_end, is_NA); @@ -381,12 +381,12 @@ class RPrimitiveConverter< } template - Status AppendRangeSameTypeALTREP(SEXP x, R_xlen_t start, R_xlen_t size) { + Status AppendRangeSameTypeALTREP(SEXP x, int64_t size) { // if it is altrep, then we use cpp11 looping // without needing to convert RETURN_NOT_OK(this->primitive_builder_->Reserve(size)); cpp11::r_vector vec(x); - auto it = vec.begin() + start; + auto it = vec.begin(); for (R_xlen_t i = 0; i < size; i++, ++it) { r_value_type value = *it; if (is_NA(value)) { @@ -399,12 +399,12 @@ class RPrimitiveConverter< } template <> - Status AppendRangeSameTypeALTREP(SEXP x, R_xlen_t start, R_xlen_t size) { + Status AppendRangeSameTypeALTREP(SEXP x, int64_t size) { // if it is altrep, then we use cpp11 looping // without needing to convert RETURN_NOT_OK(this->primitive_builder_->Reserve(size)); cpp11::r_vector vec(x); - auto it = vec.begin() + start; + auto it = vec.begin(); for (R_xlen_t i = 0; i < size; i++, ++it) { double d = *it; int64_t value = *reinterpret_cast(&d); @@ -418,17 +418,17 @@ class RPrimitiveConverter< } template - Status AppendRangeDispatch(SEXP x, R_xlen_t start, R_xlen_t size) { + Status AppendRangeDispatch(SEXP x, int64_t size) { if (std::is_same::value) { if (!ALTREP(x)) { - return AppendRangeSameTypeNotALTREP(x, start, size); + return AppendRangeSameTypeNotALTREP(x, size); } else { - return AppendRangeSameTypeALTREP(x, start, size); + return AppendRangeSameTypeALTREP(x, size); } } // here if underlying types differ so going - return AppendRangeLoopDifferentType(x, start, size); + return AppendRangeLoopDifferentType(x, size); } }; @@ -451,7 +451,7 @@ class RPrimitiveConverter::value>> this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, 0, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } }; @@ -464,13 +464,13 @@ class RPrimitiveConverter::value>> switch (GetVectorType(x)) { case DATE_INT: - return AppendRange_Date(x, 0, size); + return AppendRange_Date(x, size); case DATE_DBL: - return AppendRange_Date(x, 0, size); + return AppendRange_Date(x, size); case POSIXCT: - return AppendRange_Posixct(x, 0, size); + return AppendRange_Posixct(x, size); default: break; @@ -481,7 +481,7 @@ class RPrimitiveConverter::value>> private: template - Status AppendRange_Date(SEXP x, R_xlen_t start, R_xlen_t size) { + Status AppendRange_Date(SEXP x, int64_t size) { auto append_null = [this]() { this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); @@ -491,10 +491,10 @@ class RPrimitiveConverter::value>> return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } - Status AppendRange_Posixct(SEXP x, R_xlen_t start, R_xlen_t size) { + Status AppendRange_Posixct(SEXP x, int64_t size) { auto append_null = [this]() { this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); @@ -504,7 +504,7 @@ class RPrimitiveConverter::value>> return Status::OK(); }; - return RVectorVisitor::Visit(x, start, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } static int FromRDate(const Date32Type*, int from) { return from; } @@ -578,7 +578,7 @@ class RPrimitiveConverter::value>> this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, 0, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } }; @@ -605,7 +605,7 @@ class RPrimitiveConverter::value>> this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, 0, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } }; @@ -659,7 +659,7 @@ class RPrimitiveConverter> this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, 0, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } }; @@ -685,7 +685,7 @@ class RPrimitiveConverter::v this->primitive_builder_->UnsafeAppendNull(); return Status::OK(); }; - return RVectorVisitor::Visit(x, 0, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } }; @@ -780,7 +780,7 @@ class RDictionaryConverter> return this->value_builder_->Append(CHAR(s)); }; auto append_null = [this]() { return this->value_builder_->AppendNull(); }; - return RVectorVisitor::Visit(x, 0, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } Result> ToArray() override { @@ -831,7 +831,7 @@ class RListConverter : public ListConverter { return this->value_converter_.get()->Extend(value, n); }; auto append_null = [this]() { return this->list_builder_->AppendNull(); }; - return RVectorVisitor::Visit(x, 0, size, append_null, append_value); + return RVectorVisitor::Visit(x, size, append_null, append_value); } }; From ef6c9104a06ece64e76e33646a53425ea59cb47b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 8 Feb 2021 15:39:56 +0100 Subject: [PATCH 62/82] avoid full specialisation inside the class --- r/src/r_to_arrow.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index ef7f04fa606..4bffa1350af 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -398,8 +398,7 @@ class RPrimitiveConverter< return Status::OK(); } - template <> - Status AppendRangeSameTypeALTREP(SEXP x, int64_t size) { + Status AppendRangeSameTypeALTREP_int64(SEXP x, int64_t size) { // if it is altrep, then we use cpp11 looping // without needing to convert RETURN_NOT_OK(this->primitive_builder_->Reserve(size)); @@ -422,6 +421,8 @@ class RPrimitiveConverter< if (std::is_same::value) { if (!ALTREP(x)) { return AppendRangeSameTypeNotALTREP(x, size); + } else if (std::is_same::value) { + return AppendRangeSameTypeALTREP_int64(x, size); } else { return AppendRangeSameTypeALTREP(x, size); } From 90cd418ebee0e0a13ab0559b09c9e0a5860fbefb Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 8 Feb 2021 16:08:00 +0100 Subject: [PATCH 63/82] merge both AppendRangeSameTypeALTREP impl --- r/src/r_to_arrow.cpp | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 4bffa1350af..4c5c1dca106 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -380,34 +380,16 @@ class RPrimitiveConverter< return Status::OK(); } - template - Status AppendRangeSameTypeALTREP(SEXP x, int64_t size) { + template + Status AppendRangeSameTypeALTREP(SEXP x, int64_t size, Extract extract) { // if it is altrep, then we use cpp11 looping // without needing to convert RETURN_NOT_OK(this->primitive_builder_->Reserve(size)); - cpp11::r_vector vec(x); + vector vec(x); auto it = vec.begin(); for (R_xlen_t i = 0; i < size; i++, ++it) { - r_value_type value = *it; - if (is_NA(value)) { - this->primitive_builder_->UnsafeAppendNull(); - } else { - this->primitive_builder_->UnsafeAppend(value); - } - } - return Status::OK(); - } - - Status AppendRangeSameTypeALTREP_int64(SEXP x, int64_t size) { - // if it is altrep, then we use cpp11 looping - // without needing to convert - RETURN_NOT_OK(this->primitive_builder_->Reserve(size)); - cpp11::r_vector vec(x); - auto it = vec.begin(); - for (R_xlen_t i = 0; i < size; i++, ++it) { - double d = *it; - int64_t value = *reinterpret_cast(&d); - if (is_NA(value)) { + r_value_type value = extract(*it); + if (is_NA(value)) { this->primitive_builder_->UnsafeAppendNull(); } else { this->primitive_builder_->UnsafeAppend(value); @@ -422,9 +404,13 @@ class RPrimitiveConverter< if (!ALTREP(x)) { return AppendRangeSameTypeNotALTREP(x, size); } else if (std::is_same::value) { - return AppendRangeSameTypeALTREP_int64(x, size); + auto extract = [](double d) { return *reinterpret_cast(&d); }; + return AppendRangeSameTypeALTREP( + x, size, extract); } else { - return AppendRangeSameTypeALTREP(x, size); + auto extract = [](r_value_type value) { return value; }; + return AppendRangeSameTypeALTREP, + decltype(extract)>(x, size, extract); } } From 784173d9fabb5e3e1af07994e5cc6632639ec518 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 10:20:11 +0100 Subject: [PATCH 64/82] fix "dereferencing type-punned pointer will break strict-aliasing rules" --- r/src/r_to_arrow.cpp | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 4c5c1dca106..d19cf622394 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -309,6 +309,12 @@ class RPrimitiveConverter> } }; +int64_t int64_from_double(double d) { + int64_t value; + memcpy(&value, &d, sizeof(int64_t)); + return value; +} + template class RPrimitiveConverter< T, enable_if_t::value || is_floating_type::value>> @@ -404,9 +410,9 @@ class RPrimitiveConverter< if (!ALTREP(x)) { return AppendRangeSameTypeNotALTREP(x, size); } else if (std::is_same::value) { - auto extract = [](double d) { return *reinterpret_cast(&d); }; - return AppendRangeSameTypeALTREP( - x, size, extract); + return AppendRangeSameTypeALTREP(x, size, + int64_from_double); } else { auto extract = [](r_value_type value) { return value; }; return AppendRangeSameTypeALTREP, From 62659899069db55cc46199f903aa149625ca20c3 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 11:34:42 +0100 Subject: [PATCH 65/82] comparison between signed and unsigned integer expressions --- r/src/r_to_arrow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index d19cf622394..fa56ad68e9b 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -846,7 +846,7 @@ class RStructConverter : public StructConverter { } auto fields = this->struct_type_->fields(); - if (n_columns != fields.size()) { + if (static_cast(n_columns) != fields.size()) { return Status::RError("Number of fields in struct (", fields.size(), ") incompatible with number of columns in the data frame (", n_columns, ")"); From fddcdc7616f6a9ef4c5d8740f2a79a770f181aa5 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 11:43:51 +0100 Subject: [PATCH 66/82] simplify AppendRangeSameTypeALTREP by improving RVectorVisitor inner types and GetValue --- r/src/r_to_arrow.cpp | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index fa56ad68e9b..cc1f2866145 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -171,11 +171,12 @@ template struct RVectorVisitor { using data_type = typename std::conditional::value, double, T>::type; + using r_vector_type = cpp11::r_vector; template static Status Visit(SEXP x, int64_t size, AppendNull&& append_null, AppendValue&& append_value) { - cpp11::r_vector values(x); + r_vector_type values(x); auto it = values.begin(); for (R_xlen_t i = 0; i < size; i++, ++it) { @@ -196,7 +197,9 @@ struct RVectorVisitor { template <> int64_t RVectorVisitor::GetValue(double x) { - return *reinterpret_cast(&x); + int64_t value; + memcpy(&value, &x, sizeof(int64_t)); + return value; } class RConverter : public Converter { @@ -309,12 +312,6 @@ class RPrimitiveConverter> } }; -int64_t int64_from_double(double d) { - int64_t value; - memcpy(&value, &d, sizeof(int64_t)); - return value; -} - template class RPrimitiveConverter< T, enable_if_t::value || is_floating_type::value>> @@ -386,15 +383,15 @@ class RPrimitiveConverter< return Status::OK(); } - template - Status AppendRangeSameTypeALTREP(SEXP x, int64_t size, Extract extract) { + template + Status AppendRangeSameTypeALTREP(SEXP x, int64_t size) { // if it is altrep, then we use cpp11 looping // without needing to convert RETURN_NOT_OK(this->primitive_builder_->Reserve(size)); - vector vec(x); + typename RVectorVisitor::r_vector_type vec(x); auto it = vec.begin(); for (R_xlen_t i = 0; i < size; i++, ++it) { - r_value_type value = extract(*it); + r_value_type value = RVectorVisitor::GetValue(*it); if (is_NA(value)) { this->primitive_builder_->UnsafeAppendNull(); } else { @@ -409,14 +406,8 @@ class RPrimitiveConverter< if (std::is_same::value) { if (!ALTREP(x)) { return AppendRangeSameTypeNotALTREP(x, size); - } else if (std::is_same::value) { - return AppendRangeSameTypeALTREP(x, size, - int64_from_double); } else { - auto extract = [](r_value_type value) { return value; }; - return AppendRangeSameTypeALTREP, - decltype(extract)>(x, size, extract); + return AppendRangeSameTypeALTREP(x, size); } } From a5313f8a782e5e719aee47af9ef126647be88fb0 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 14:20:40 +0100 Subject: [PATCH 67/82] comparison between signed and unsigned integer expressions [-Werror=sign-compare --- r/src/r_to_arrow.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index cc1f2866145..b960ed341d1 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -837,7 +837,7 @@ class RStructConverter : public StructConverter { } auto fields = this->struct_type_->fields(); - if (static_cast(n_columns) != fields.size()) { + if (n_columns != static_cast(fields.size())) { return Status::RError("Number of fields in struct (", fields.size(), ") incompatible with number of columns in the data frame (", n_columns, ")"); From 60af00c3fe7c8f8748066b5e597fbc740156d733 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 16:36:50 +0100 Subject: [PATCH 68/82] type_inferred was misused --- r/src/r_to_arrow.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index b960ed341d1..2607883cfe1 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -997,6 +997,7 @@ std::shared_ptr vec_to_arrow(SEXP x, // otherwise go through the converter api auto converter = ValueOrStop(MakeConverter( options.type, options, gc_memory_pool())); + StopIfNotOk(converter->Extend(x, options.size)); return ValueOrStop(converter->ToArray()); } @@ -1009,10 +1010,11 @@ SEXP vec_to_arrow(SEXP x, SEXP s_type) { if (Rf_inherits(x, "Array")) return x; bool type_inferred = Rf_isNull(s_type); std::shared_ptr type; + if (type_inferred) { - type = cpp11::as_cpp>(s_type); - } else { type = type = arrow::r::InferArrowType(x); + } else { + type = cpp11::as_cpp>(s_type); } return cpp11::to_r6(arrow::r::vec_to_arrow(x, type, type_inferred)); } From 2ae15982f2cf60e40e149cd4fd113cea0ceaee3a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 17:06:22 +0100 Subject: [PATCH 69/82] rename Array__from_vector_reuse_memory --- r/src/array_from_vector.cpp | 2 +- r/src/arrow_types.h | 2 +- r/src/r_to_arrow.cpp | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index b96a8a9976a..c142f039540 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -1392,7 +1392,7 @@ std::shared_ptr Array__from_vector( // special case when we can just use the data from the R vector // directly. This still needs to handle the null bitmap if (arrow::r::can_reuse_memory(x, type)) { - return arrow::r::Array__from_vector_reuse_memory(x); + return arrow::r::vec_to_arrow__reuse_memory(x); } // factors only when type has been inferred diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 16c4679ad46..d57ae29288f 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -83,7 +83,7 @@ auto ValueOrStop(R&& result) -> decltype(std::forward(result).ValueOrDie()) { namespace r { std::shared_ptr InferArrowType(SEXP x); -std::shared_ptr Array__from_vector_reuse_memory(SEXP x); +std::shared_ptr vec_to_arrow__reuse_memory(SEXP x); bool can_reuse_memory(SEXP x, const std::shared_ptr& type); Status count_fields(SEXP lst, int* out); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 2607883cfe1..7c225674453 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -960,7 +960,7 @@ std::shared_ptr MakeSimpleArray(SEXP x) { return std::make_shared::ArrayType>(data); } -std::shared_ptr Array__from_vector_reuse_memory(SEXP x) { +std::shared_ptr vec_to_arrow__reuse_memory(SEXP x) { auto type = TYPEOF(x); if (type == INTSXP) { @@ -991,7 +991,7 @@ std::shared_ptr vec_to_arrow(SEXP x, // maybe short circuit when zero-copy is possible if (can_reuse_memory(x, options.type)) { - return Array__from_vector_reuse_memory(x); + return vec_to_arrow__reuse_memory(x); } // otherwise go through the converter api From 1e1463fa9b1067e795541fc31e062154e544be03 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 17:06:58 +0100 Subject: [PATCH 70/82] switch to call vec_to_arrow() from Array$create() --- r/R/array.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/R/array.R b/r/R/array.R index ec2b545dfae..090f1d53f57 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -143,7 +143,7 @@ Array$create <- function(x, type = NULL) { if (!is.null(type)) { type <- as_type(type) } - Array__from_vector(x, type) + vec_to_arrow(x, type) } #' @rdname array From 5e950f902b5db4bb23dc3c7c9c6709884a1bb9fe Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 17:22:12 +0100 Subject: [PATCH 71/82] Table__from_dots() evetually calls vec_to_arrow() when converting R vectors to Arrays --- r/src/table.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/table.cpp b/r/src/table.cpp index 14e5f4e92b5..081d14a4c5c 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -265,7 +265,7 @@ arrow::Status CollectTableColumns( columns[j] = std::make_shared( cpp11::as_cpp>(x)); } else { - auto array = arrow::r::Array__from_vector(x, schema->field(j)->type(), inferred); + auto array = arrow::r::vec_to_arrow(x, schema->field(j)->type(), inferred); columns[j] = std::make_shared(array); } }; From c4bc7c630bd03def7338a2e17381596654f52912 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 17:28:00 +0100 Subject: [PATCH 72/82] calling vec_to_arrow() from recordbatch.cpp --- r/src/recordbatch.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/src/recordbatch.cpp b/r/src/recordbatch.cpp index 715bf8ac65b..74bc9ec92f1 100644 --- a/r/src/recordbatch.cpp +++ b/r/src/recordbatch.cpp @@ -264,7 +264,7 @@ std::shared_ptr RecordBatch__from_arrays__known_schema( cpp11::stop("field at index %d has name '%s' != '%s'", j + 1, schema->field(j)->name().c_str(), name.c_str()); } - arrays[j] = arrow::r::Array__from_vector(x, schema->field(j)->type(), false); + arrays[j] = arrow::r::vec_to_arrow(x, schema->field(j)->type(), false); }; arrow::r::TraverseDots(lst, num_fields, fill_array); @@ -281,7 +281,7 @@ arrow::Status CollectRecordBatchArrays( SEXP lst, const std::shared_ptr& schema, int num_fields, bool inferred, std::vector>& arrays) { auto extract_one_array = [&arrays, &schema, inferred](int j, SEXP x, cpp11::r_string) { - arrays[j] = arrow::r::Array__from_vector(x, schema->field(j)->type(), inferred); + arrays[j] = arrow::r::vec_to_arrow(x, schema->field(j)->type(), inferred); }; arrow::r::TraverseDots(lst, num_fields, extract_one_array); return arrow::Status::OK(); From c990cf8ef31498ea32747b332189f9ccd9f728b8 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 9 Feb 2021 17:36:14 +0100 Subject: [PATCH 73/82] restrict Array__from_vector() to its file --- r/src/array_from_vector.cpp | 2 ++ r/src/arrow_types.h | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index c142f039540..91629fad019 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -29,6 +29,8 @@ using arrow::internal::checked_cast; +std::shared_ptr Array__from_vector(SEXP x, SEXP type); + namespace arrow { namespace r { diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index d57ae29288f..d0561be90a0 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -49,7 +49,6 @@ namespace fs = ::arrow::fs; SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array); SEXP Array__as_vector(const std::shared_ptr& array); -std::shared_ptr Array__from_vector(SEXP x, SEXP type); std::shared_ptr RecordBatch__from_arrays(SEXP, SEXP); arrow::MemoryPool* gc_memory_pool(); From fc5883d6fd72e6a5554012874b57bb2e12aec8e4 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 10 Feb 2021 09:57:28 +0100 Subject: [PATCH 74/82] - R callable Array__from_vector() mv type inference to separate file --- r/R/arrowExports.R | 12 +-- r/src/array_from_vector.cpp | 191 +--------------------------------- r/src/arrowExports.cpp | 28 ++--- r/src/arrow_types.h | 3 - r/src/r_to_arrow.cpp | 2 +- r/src/type_infer.cpp | 202 ++++++++++++++++++++++++++++++++++++ 6 files changed, 219 insertions(+), 219 deletions(-) create mode 100644 r/src/type_infer.cpp diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index bd21ee28948..e70873bde5c 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -132,14 +132,6 @@ LargeListArray__raw_value_offsets <- function(array){ .Call(`_arrow_LargeListArray__raw_value_offsets`, array) } -Array__infer_type <- function(x){ - .Call(`_arrow_Array__infer_type`, x) -} - -Array__from_vector <- function(x, s_type){ - .Call(`_arrow_Array__from_vector`, x, s_type) -} - ChunkedArray__from_list <- function(chunks, s_type){ .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type) } @@ -1592,5 +1584,9 @@ SetCpuThreadPoolCapacity <- function(threads){ invisible(.Call(`_arrow_SetCpuThreadPoolCapacity`, threads)) } +Array__infer_type <- function(x){ + .Call(`_arrow_Array__infer_type`, x) +} + diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 91629fad019..28d22ab5b83 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -417,7 +417,7 @@ std::shared_ptr MakeStructArray(SEXP df, const std::shared_ptr& int n = type->num_fields(); std::vector> children(n); for (int i = 0; i < n; i++) { - children[i] = Array__from_vector(VECTOR_ELT(df, i), type->field(i)->type(), true); + children[i] = vec_to_arrow(VECTOR_ELT(df, i), type->field(i)->type(), true); } int64_t rows = n ? children[0]->length() : 0; @@ -1171,171 +1171,6 @@ Status GetConverter(const std::shared_ptr& type, return Status::NotImplemented("type not implemented"); } -static inline std::shared_ptr IndexTypeForFactors(int n_factors) { - if (n_factors < INT8_MAX) { - return arrow::int8(); - } else if (n_factors < INT16_MAX) { - return arrow::int16(); - } else { - return arrow::int32(); - } -} - -std::shared_ptr InferArrowTypeFromFactor(SEXP factor) { - SEXP factors = Rf_getAttrib(factor, R_LevelsSymbol); - auto index_type = IndexTypeForFactors(Rf_length(factors)); - bool is_ordered = Rf_inherits(factor, "ordered"); - return dictionary(index_type, arrow::utf8(), is_ordered); -} - -template -std::shared_ptr InferArrowTypeFromVector(SEXP x) { - cpp11::stop("Unknown vector type: ", VectorType); -} - -template <> -std::shared_ptr InferArrowTypeFromVector(SEXP x) { - if (Rf_inherits(x, "Array")) { - return cpp11::as_cpp>(x)->type(); - } - - cpp11::stop("Unrecognized vector instance for type ENVSXP"); -} - -template <> -std::shared_ptr InferArrowTypeFromVector(SEXP x) { - return Rf_inherits(x, "vctrs_unspecified") ? null() : boolean(); -} - -template <> -std::shared_ptr InferArrowTypeFromVector(SEXP x) { - if (Rf_isFactor(x)) { - return InferArrowTypeFromFactor(x); - } else if (Rf_inherits(x, "Date")) { - return date32(); - } else if (Rf_inherits(x, "POSIXct")) { - auto tzone_sexp = Rf_getAttrib(x, symbols::tzone); - if (Rf_isNull(tzone_sexp)) { - return timestamp(TimeUnit::MICRO); - } else { - return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(tzone_sexp, 0))); - } - } - return int32(); -} - -template <> -std::shared_ptr InferArrowTypeFromVector(SEXP x) { - if (Rf_inherits(x, "Date")) { - return date32(); - } - if (Rf_inherits(x, "POSIXct")) { - auto tzone_sexp = Rf_getAttrib(x, symbols::tzone); - if (Rf_isNull(tzone_sexp)) { - return timestamp(TimeUnit::MICRO); - } else { - return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(tzone_sexp, 0))); - } - } - if (Rf_inherits(x, "integer64")) { - return int64(); - } - if (Rf_inherits(x, "difftime")) { - return time32(TimeUnit::SECOND); - } - return float64(); -} - -template <> -std::shared_ptr InferArrowTypeFromVector(SEXP x) { - return cpp11::unwind_protect([&] { - R_xlen_t n = XLENGTH(x); - - int64_t size = 0; - - for (R_xlen_t i = 0; i < n; i++) { - size += arrow::r::unsafe::r_string_size(STRING_ELT(x, i)); - if (size > arrow::kBinaryMemoryLimit) { - // Exceeds 2GB capacity of utf8 type, so use large - return large_utf8(); - } - } - - return utf8(); - }); -} - -static inline std::shared_ptr InferArrowTypeFromDataFrame( - cpp11::list x) { - R_xlen_t n = x.size(); - cpp11::strings names(x.attr(R_NamesSymbol)); - std::vector> fields(n); - for (R_xlen_t i = 0; i < n; i++) { - fields[i] = arrow::field(names[i], InferArrowType(x[i])); - } - return arrow::struct_(std::move(fields)); -} - -template <> -std::shared_ptr InferArrowTypeFromVector(SEXP x) { - if (Rf_inherits(x, "data.frame") || Rf_inherits(x, "POSIXlt")) { - return InferArrowTypeFromDataFrame(x); - } else { - // some known special cases - if (Rf_inherits(x, "arrow_fixed_size_binary")) { - SEXP byte_width = Rf_getAttrib(x, symbols::byte_width); - if (Rf_isNull(byte_width) || TYPEOF(byte_width) != INTSXP || - XLENGTH(byte_width) != 1) { - cpp11::stop("malformed arrow_fixed_size_binary object"); - } - return arrow::fixed_size_binary(INTEGER(byte_width)[0]); - } - - if (Rf_inherits(x, "arrow_binary")) { - return arrow::binary(); - } - - if (Rf_inherits(x, "arrow_large_binary")) { - return arrow::large_binary(); - } - - SEXP ptype = Rf_getAttrib(x, symbols::ptype); - if (Rf_isNull(ptype)) { - if (XLENGTH(x) == 0) { - cpp11::stop( - "Requires at least one element to infer the values' type of a list vector"); - } - - ptype = VECTOR_ELT(x, 0); - } - - return arrow::list(InferArrowType(ptype)); - } -} - -std::shared_ptr InferArrowType(SEXP x) { - switch (TYPEOF(x)) { - case ENVSXP: - return InferArrowTypeFromVector(x); - case LGLSXP: - return InferArrowTypeFromVector(x); - case INTSXP: - return InferArrowTypeFromVector(x); - case REALSXP: - return InferArrowTypeFromVector(x); - case RAWSXP: - return int8(); - case STRSXP: - return InferArrowTypeFromVector(x); - case VECSXP: - return InferArrowTypeFromVector(x); - default: - break; - } - - cpp11::stop("Cannot infer type from vector"); -} - bool CheckCompatibleFactor(SEXP obj, const std::shared_ptr& type) { if (!Rf_inherits(obj, "factor")) { return false; @@ -1449,26 +1284,6 @@ std::shared_ptr Array__from_vector( } // namespace r } // namespace arrow -// [[arrow::export]] -std::shared_ptr Array__infer_type(SEXP x) { - return arrow::r::InferArrowType(x); -} - -// [[arrow::export]] -std::shared_ptr Array__from_vector(SEXP x, SEXP s_type) { - // the type might be NULL, in which case we need to infer it from the data - // we keep track of whether it was inferred or supplied - bool type_inferred = Rf_isNull(s_type); - std::shared_ptr type; - if (type_inferred) { - type = arrow::r::InferArrowType(x); - } else { - type = cpp11::as_cpp>(s_type); - } - - return arrow::r::Array__from_vector(x, type, type_inferred); -} - // [[arrow::export]] std::shared_ptr ChunkedArray__from_list(cpp11::list chunks, SEXP s_type) { @@ -1500,10 +1315,10 @@ std::shared_ptr ChunkedArray__from_list(cpp11::list chunks, // because we might have inferred the type from the first element of the list // // this only really matters for dictionary arrays - vec.push_back(arrow::r::Array__from_vector(chunks[0], type, type_inferred)); + vec.push_back(arrow::r::vec_to_arrow(chunks[0], type, type_inferred)); for (R_xlen_t i = 1; i < n; i++) { - vec.push_back(arrow::r::Array__from_vector(chunks[i], type, false)); + vec.push_back(arrow::r::vec_to_arrow(chunks[i], type, false)); } } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index afa42256d14..6271327c1bd 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -290,23 +290,6 @@ BEGIN_CPP11 END_CPP11 } // array_from_vector.cpp -std::shared_ptr Array__infer_type(SEXP x); -extern "C" SEXP _arrow_Array__infer_type(SEXP x_sexp){ -BEGIN_CPP11 - arrow::r::Input::type x(x_sexp); - return cpp11::as_sexp(Array__infer_type(x)); -END_CPP11 -} -// array_from_vector.cpp -std::shared_ptr Array__from_vector(SEXP x, SEXP s_type); -extern "C" SEXP _arrow_Array__from_vector(SEXP x_sexp, SEXP s_type_sexp){ -BEGIN_CPP11 - arrow::r::Input::type x(x_sexp); - arrow::r::Input::type s_type(s_type_sexp); - return cpp11::as_sexp(Array__from_vector(x, s_type)); -END_CPP11 -} -// array_from_vector.cpp std::shared_ptr ChunkedArray__from_list(cpp11::list chunks, SEXP s_type); extern "C" SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ BEGIN_CPP11 @@ -3473,6 +3456,14 @@ BEGIN_CPP11 return R_NilValue; END_CPP11 } +// type_infer.cpp +std::shared_ptr Array__infer_type(SEXP x); +extern "C" SEXP _arrow_Array__infer_type(SEXP x_sexp){ +BEGIN_CPP11 + arrow::r::Input::type x(x_sexp); + return cpp11::as_sexp(Array__infer_type(x)); +END_CPP11 +} extern "C" SEXP _arrow_Table__Reset(SEXP r6) { BEGIN_CPP11 arrow::r::r6_reset_pointer(r6); @@ -3542,8 +3533,6 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, - { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, - { "_arrow_Array__from_vector", (DL_FUNC) &_arrow_Array__from_vector, 2}, { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, @@ -3907,6 +3896,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_Table__from_dots", (DL_FUNC) &_arrow_Table__from_dots, 2}, { "_arrow_GetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_GetCpuThreadPoolCapacity, 0}, { "_arrow_SetCpuThreadPoolCapacity", (DL_FUNC) &_arrow_SetCpuThreadPoolCapacity, 1}, + { "_arrow_Array__infer_type", (DL_FUNC) &_arrow_Array__infer_type, 1}, { "_arrow_Table__Reset", (DL_FUNC) &_arrow_Table__Reset, 1}, { "_arrow_RecordBatch__Reset", (DL_FUNC) &_arrow_RecordBatch__Reset, 1}, {NULL, NULL, 0} diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index d0561be90a0..b37c01c7621 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -87,9 +87,6 @@ bool can_reuse_memory(SEXP x, const std::shared_ptr& type); Status count_fields(SEXP lst, int* out); -std::shared_ptr Array__from_vector( - SEXP x, const std::shared_ptr& type, bool type_inferred); - void inspect(SEXP obj); std::shared_ptr vec_to_arrow(SEXP x, const std::shared_ptr& type, diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 7c225674453..89511c931db 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1012,7 +1012,7 @@ SEXP vec_to_arrow(SEXP x, SEXP s_type) { std::shared_ptr type; if (type_inferred) { - type = type = arrow::r::InferArrowType(x); + type = arrow::r::InferArrowType(x); } else { type = cpp11::as_cpp>(s_type); } diff --git a/r/src/type_infer.cpp b/r/src/type_infer.cpp new file mode 100644 index 00000000000..627bef9acd4 --- /dev/null +++ b/r/src/type_infer.cpp @@ -0,0 +1,202 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "./arrow_types.h" +#include "./arrow_vctrs.h" + +#if defined(ARROW_R_WITH_ARROW) +#include + +namespace arrow { +namespace r { + +static inline std::shared_ptr IndexTypeForFactors(int n_factors) { + if (n_factors < INT8_MAX) { + return arrow::int8(); + } else if (n_factors < INT16_MAX) { + return arrow::int16(); + } else { + return arrow::int32(); + } +} + +std::shared_ptr InferArrowTypeFromFactor(SEXP factor) { + SEXP factors = Rf_getAttrib(factor, R_LevelsSymbol); + auto index_type = IndexTypeForFactors(Rf_length(factors)); + bool is_ordered = Rf_inherits(factor, "ordered"); + return dictionary(index_type, arrow::utf8(), is_ordered); +} + +template +std::shared_ptr InferArrowTypeFromVector(SEXP x) { + cpp11::stop("Unknown vector type: ", VectorType); +} + +template <> +std::shared_ptr InferArrowTypeFromVector(SEXP x) { + if (Rf_inherits(x, "Array")) { + return cpp11::as_cpp>(x)->type(); + } + + cpp11::stop("Unrecognized vector instance for type ENVSXP"); +} + +template <> +std::shared_ptr InferArrowTypeFromVector(SEXP x) { + return Rf_inherits(x, "vctrs_unspecified") ? null() : boolean(); +} + +template <> +std::shared_ptr InferArrowTypeFromVector(SEXP x) { + if (Rf_isFactor(x)) { + return InferArrowTypeFromFactor(x); + } else if (Rf_inherits(x, "Date")) { + return date32(); + } else if (Rf_inherits(x, "POSIXct")) { + auto tzone_sexp = Rf_getAttrib(x, symbols::tzone); + if (Rf_isNull(tzone_sexp)) { + return timestamp(TimeUnit::MICRO); + } else { + return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(tzone_sexp, 0))); + } + } + return int32(); +} + +template <> +std::shared_ptr InferArrowTypeFromVector(SEXP x) { + if (Rf_inherits(x, "Date")) { + return date32(); + } + if (Rf_inherits(x, "POSIXct")) { + auto tzone_sexp = Rf_getAttrib(x, symbols::tzone); + if (Rf_isNull(tzone_sexp)) { + return timestamp(TimeUnit::MICRO); + } else { + return timestamp(TimeUnit::MICRO, CHAR(STRING_ELT(tzone_sexp, 0))); + } + } + if (Rf_inherits(x, "integer64")) { + return int64(); + } + if (Rf_inherits(x, "difftime")) { + return time32(TimeUnit::SECOND); + } + return float64(); +} + +template <> +std::shared_ptr InferArrowTypeFromVector(SEXP x) { + return cpp11::unwind_protect([&] { + R_xlen_t n = XLENGTH(x); + + int64_t size = 0; + + for (R_xlen_t i = 0; i < n; i++) { + size += arrow::r::unsafe::r_string_size(STRING_ELT(x, i)); + if (size > arrow::kBinaryMemoryLimit) { + // Exceeds 2GB capacity of utf8 type, so use large + return large_utf8(); + } + } + + return utf8(); + }); +} + +static inline std::shared_ptr InferArrowTypeFromDataFrame( + cpp11::list x) { + R_xlen_t n = x.size(); + cpp11::strings names(x.attr(R_NamesSymbol)); + std::vector> fields(n); + for (R_xlen_t i = 0; i < n; i++) { + fields[i] = arrow::field(names[i], InferArrowType(x[i])); + } + return arrow::struct_(std::move(fields)); +} + +template <> +std::shared_ptr InferArrowTypeFromVector(SEXP x) { + if (Rf_inherits(x, "data.frame") || Rf_inherits(x, "POSIXlt")) { + return InferArrowTypeFromDataFrame(x); + } else { + // some known special cases + if (Rf_inherits(x, "arrow_fixed_size_binary")) { + SEXP byte_width = Rf_getAttrib(x, symbols::byte_width); + if (Rf_isNull(byte_width) || TYPEOF(byte_width) != INTSXP || + XLENGTH(byte_width) != 1) { + cpp11::stop("malformed arrow_fixed_size_binary object"); + } + return arrow::fixed_size_binary(INTEGER(byte_width)[0]); + } + + if (Rf_inherits(x, "arrow_binary")) { + return arrow::binary(); + } + + if (Rf_inherits(x, "arrow_large_binary")) { + return arrow::large_binary(); + } + + SEXP ptype = Rf_getAttrib(x, symbols::ptype); + if (Rf_isNull(ptype)) { + if (XLENGTH(x) == 0) { + cpp11::stop( + "Requires at least one element to infer the values' type of a list vector"); + } + + ptype = VECTOR_ELT(x, 0); + } + + return arrow::list(InferArrowType(ptype)); + } +} + +std::shared_ptr InferArrowType(SEXP x) { + switch (TYPEOF(x)) { + case ENVSXP: + return InferArrowTypeFromVector(x); + case LGLSXP: + return InferArrowTypeFromVector(x); + case INTSXP: + return InferArrowTypeFromVector(x); + case REALSXP: + return InferArrowTypeFromVector(x); + case RAWSXP: + return int8(); + case STRSXP: + return InferArrowTypeFromVector(x); + case VECSXP: + return InferArrowTypeFromVector(x); + default: + break; + } + + cpp11::stop("Cannot infer type from vector"); +} + +} +} + +// [[arrow::export]] +std::shared_ptr Array__infer_type(SEXP x) { + return arrow::r::InferArrowType(x); +} + +#endif From 5fd175de71057fb08ef48ba257d4d89e2151cc8c Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 10 Feb 2021 10:11:56 +0100 Subject: [PATCH 75/82] move DictionaryArray__FromArrays() to r_to_arrow.cpp --- r/R/arrowExports.R | 8 ++++---- r/src/arrowExports.cpp | 22 +++++++++++----------- r/src/r_to_arrow.cpp | 8 ++++++++ 3 files changed, 23 insertions(+), 15 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index e70873bde5c..5ae06f7c717 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -136,10 +136,6 @@ ChunkedArray__from_list <- function(chunks, s_type){ .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type) } -DictionaryArray__FromArrays <- function(type, indices, dict){ - .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict) -} - Array__as_vector <- function(array){ .Call(`_arrow_Array__as_vector`, array) } @@ -1264,6 +1260,10 @@ vec_to_arrow <- function(x, s_type){ .Call(`_arrow_vec_to_arrow`, x, s_type) } +DictionaryArray__FromArrays <- function(type, indices, dict){ + .Call(`_arrow_DictionaryArray__FromArrays`, type, indices, dict) +} + RecordBatch__num_columns <- function(x){ .Call(`_arrow_RecordBatch__num_columns`, x) } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 6271327c1bd..671560e6498 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -298,16 +298,6 @@ BEGIN_CPP11 return cpp11::as_sexp(ChunkedArray__from_list(chunks, s_type)); END_CPP11 } -// array_from_vector.cpp -std::shared_ptr DictionaryArray__FromArrays(const std::shared_ptr& type, const std::shared_ptr& indices, const std::shared_ptr& dict); -extern "C" SEXP _arrow_DictionaryArray__FromArrays(SEXP type_sexp, SEXP indices_sexp, SEXP dict_sexp){ -BEGIN_CPP11 - arrow::r::Input&>::type type(type_sexp); - arrow::r::Input&>::type indices(indices_sexp); - arrow::r::Input&>::type dict(dict_sexp); - return cpp11::as_sexp(DictionaryArray__FromArrays(type, indices, dict)); -END_CPP11 -} // array_to_vector.cpp SEXP Array__as_vector(const std::shared_ptr& array); extern "C" SEXP _arrow_Array__as_vector(SEXP array_sexp){ @@ -2756,6 +2746,16 @@ BEGIN_CPP11 return cpp11::as_sexp(vec_to_arrow(x, s_type)); END_CPP11 } +// r_to_arrow.cpp +std::shared_ptr DictionaryArray__FromArrays(const std::shared_ptr& type, const std::shared_ptr& indices, const std::shared_ptr& dict); +extern "C" SEXP _arrow_DictionaryArray__FromArrays(SEXP type_sexp, SEXP indices_sexp, SEXP dict_sexp){ +BEGIN_CPP11 + arrow::r::Input&>::type type(type_sexp); + arrow::r::Input&>::type indices(indices_sexp); + arrow::r::Input&>::type dict(dict_sexp); + return cpp11::as_sexp(DictionaryArray__FromArrays(type, indices, dict)); +END_CPP11 +} // recordbatch.cpp int RecordBatch__num_columns(const std::shared_ptr& x); extern "C" SEXP _arrow_RecordBatch__num_columns(SEXP x_sexp){ @@ -3534,7 +3534,6 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, - { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, @@ -3816,6 +3815,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_ExportArray", (DL_FUNC) &_arrow_ExportArray, 3}, { "_arrow_ExportRecordBatch", (DL_FUNC) &_arrow_ExportRecordBatch, 3}, { "_arrow_vec_to_arrow", (DL_FUNC) &_arrow_vec_to_arrow, 2}, + { "_arrow_DictionaryArray__FromArrays", (DL_FUNC) &_arrow_DictionaryArray__FromArrays, 3}, { "_arrow_RecordBatch__num_columns", (DL_FUNC) &_arrow_RecordBatch__num_columns, 1}, { "_arrow_RecordBatch__num_rows", (DL_FUNC) &_arrow_RecordBatch__num_rows, 1}, { "_arrow_RecordBatch__schema", (DL_FUNC) &_arrow_RecordBatch__schema, 1}, diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 89511c931db..002ecdb8516 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -1019,4 +1019,12 @@ SEXP vec_to_arrow(SEXP x, SEXP s_type) { return cpp11::to_r6(arrow::r::vec_to_arrow(x, type, type_inferred)); } +// [[arrow::export]] +std::shared_ptr DictionaryArray__FromArrays( + const std::shared_ptr& type, + const std::shared_ptr& indices, + const std::shared_ptr& dict) { + return ValueOrStop(arrow::DictionaryArray::FromArrays(type, indices, dict)); +} + #endif From 1372f7b64315be2730173a55583ddc398874179a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 10 Feb 2021 10:12:25 +0100 Subject: [PATCH 76/82] rm obslete MakeFactorArray --- r/src/array_from_vector.cpp | 110 ------------------------------------ 1 file changed, 110 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 28d22ab5b83..7aa89c8350d 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -29,8 +29,6 @@ using arrow::internal::checked_cast; -std::shared_ptr Array__from_vector(SEXP x, SEXP type); - namespace arrow { namespace r { @@ -291,8 +289,6 @@ struct VectorToArrayConverter { template arrow::enable_if_t::value, Status> Visit( const T& type) { - // TODO: perhaps this replaces MakeFactorArrayImpl ? - ARROW_RETURN_IF(!Rf_isFactor(x), Status::RError("Expecting a factor")); int64_t n = vctrs::short_vec_size(x); @@ -337,82 +333,6 @@ struct VectorToArrayConverter { arrow::ArrayBuilder* builder; }; -template -std::shared_ptr MakeFactorArrayImpl(cpp11::integers factor, - const std::shared_ptr& type) { - using value_type = typename arrow::TypeTraits::ArrayType::value_type; - auto n = factor.size(); - - std::shared_ptr indices_buffer = - ValueOrStop(AllocateBuffer(n * sizeof(value_type), gc_memory_pool())); - - std::vector> buffers{nullptr, indices_buffer}; - - int64_t null_count = 0; - R_xlen_t i = 0; - auto p_factor = factor.begin(); - auto p_indices = reinterpret_cast(indices_buffer->mutable_data()); - for (; i < n; i++, ++p_indices, ++p_factor) { - if (*p_factor == NA_INTEGER) break; - *p_indices = *p_factor - 1; - } - - if (i < n) { - // there are NA's so we need a null buffer - auto null_buffer = - ValueOrStop(AllocateBuffer(BitUtil::BytesForBits(n), gc_memory_pool())); - internal::FirstTimeBitmapWriter null_bitmap_writer(null_buffer->mutable_data(), 0, n); - - // catch up - for (R_xlen_t j = 0; j < i; j++, null_bitmap_writer.Next()) { - null_bitmap_writer.Set(); - } - - // resume offset filling - for (; i < n; i++, ++p_indices, ++p_factor, null_bitmap_writer.Next()) { - if (*p_factor == NA_INTEGER) { - null_bitmap_writer.Clear(); - null_count++; - } else { - null_bitmap_writer.Set(); - *p_indices = *p_factor - 1; - } - } - - null_bitmap_writer.Finish(); - buffers[0] = std::move(null_buffer); - } - - auto array_indices_data = - ArrayData::Make(std::make_shared(), n, std::move(buffers), null_count, 0); - auto array_indices = MakeArray(array_indices_data); - - SEXP levels = Rf_getAttrib(factor, R_LevelsSymbol); - auto dict = VectorToArrayConverter::Visit(levels, utf8()); - - return ValueOrStop(DictionaryArray::FromArrays(type, array_indices, dict)); -} - -std::shared_ptr MakeFactorArray(cpp11::integers factor, - const std::shared_ptr& type) { - const auto& dict_type = checked_cast(*type); - switch (dict_type.index_type()->id()) { - case Type::INT8: - return MakeFactorArrayImpl(factor, type); - case Type::INT16: - return MakeFactorArrayImpl(factor, type); - case Type::INT32: - return MakeFactorArrayImpl(factor, type); - case Type::INT64: - return MakeFactorArrayImpl(factor, type); - default: - break; - } - - cpp11::stop("Cannot convert to dictionary with index_type '%s'", - dict_type.index_type()->ToString().c_str()); -} - std::shared_ptr MakeStructArray(SEXP df, const std::shared_ptr& type) { int n = type->num_fields(); std::vector> children(n); @@ -1171,15 +1091,6 @@ Status GetConverter(const std::shared_ptr& type, return Status::NotImplemented("type not implemented"); } -bool CheckCompatibleFactor(SEXP obj, const std::shared_ptr& type) { - if (!Rf_inherits(obj, "factor")) { - return false; - } - - const auto& dict_type = checked_cast(*type); - return dict_type.value_type()->Equals(utf8()); -} - arrow::Status CheckCompatibleStruct(SEXP obj, const std::shared_ptr& type) { if (!Rf_inherits(obj, "data.frame")) { @@ -1232,19 +1143,6 @@ std::shared_ptr Array__from_vector( return arrow::r::vec_to_arrow__reuse_memory(x); } - // factors only when type has been inferred - if (type->id() == Type::DICTIONARY) { - if (type_inferred || arrow::r::CheckCompatibleFactor(x, type)) { - // TODO: use VectorToArrayConverter instead, but it does not appear to work - // correctly with ordered dictionary yet - // - // return VectorToArrayConverter::Visit(x, type); - return arrow::r::MakeFactorArray(x, type); - } - - cpp11::stop("Object incompatible with dictionary type"); - } - if (type->id() == Type::LIST || type->id() == Type::LARGE_LIST || type->id() == Type::FIXED_SIZE_LIST) { return VectorToArrayConverter::Visit(x, type); @@ -1325,12 +1223,4 @@ std::shared_ptr ChunkedArray__from_list(cpp11::list chunks, return std::make_shared(std::move(vec)); } -// [[arrow::export]] -std::shared_ptr DictionaryArray__FromArrays( - const std::shared_ptr& type, - const std::shared_ptr& indices, - const std::shared_ptr& dict) { - return ValueOrStop(arrow::DictionaryArray::FromArrays(type, indices, dict)); -} - #endif From 84d18055b6edd0f08d565aa56bd5ae4164865c31 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 10 Feb 2021 10:34:15 +0100 Subject: [PATCH 77/82] rm MakeStructArray --- r/src/array_from_vector.cpp | 62 ------------------------------------- r/src/r_to_arrow.cpp | 15 +++++++++ 2 files changed, 15 insertions(+), 62 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 7aa89c8350d..ecd89ce6266 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -333,17 +333,6 @@ struct VectorToArrayConverter { arrow::ArrayBuilder* builder; }; -std::shared_ptr MakeStructArray(SEXP df, const std::shared_ptr& type) { - int n = type->num_fields(); - std::vector> children(n); - for (int i = 0; i < n; i++) { - children[i] = vec_to_arrow(VECTOR_ELT(df, i), type->field(i)->type(), true); - } - - int64_t rows = n ? children[0]->length() : 0; - return std::make_shared(type, rows, children); -} - template int64_t time_cast(T value); @@ -1091,42 +1080,6 @@ Status GetConverter(const std::shared_ptr& type, return Status::NotImplemented("type not implemented"); } -arrow::Status CheckCompatibleStruct(SEXP obj, - const std::shared_ptr& type) { - if (!Rf_inherits(obj, "data.frame")) { - return Status::RError("Conversion to struct arrays requires a data.frame"); - } - - // check the number of columns - int num_fields = type->num_fields(); - if (XLENGTH(obj) != num_fields) { - return Status::RError("Number of fields in struct (", num_fields, - ") incompatible with number of columns in the data frame (", - XLENGTH(obj), ")"); - } - - // check the names of each column - // - // the columns themselves are not checked against the - // types of the fields, because Array__from_vector will error - // when not compatible. - cpp11::strings names = Rf_getAttrib(obj, R_NamesSymbol); - - return cpp11::unwind_protect([&] { - for (int i = 0; i < num_fields; i++) { - const char* name_i = arrow::r::unsafe::utf8_string(names[i]); - auto field_name = type->field(i)->name(); - if (field_name != name_i) { - return Status::RError( - "Field name in position ", i, " (", field_name, - ") does not match the name of the column of the data frame (", name_i, ")"); - } - } - - return Status::OK(); - }); -} - std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred) { // new api @@ -1148,21 +1101,6 @@ std::shared_ptr Array__from_vector( return VectorToArrayConverter::Visit(x, type); } - // struct types - if (type->id() == Type::STRUCT) { - if (!type_inferred) { - StopIfNotOk(arrow::r::CheckCompatibleStruct(x, type)); - } - // TODO: when the type has been infered, we could go through - // VectorToArrayConverter: - // - // else { - // return VectorToArrayConverter::Visit(df, type); - // } - - return arrow::r::MakeStructArray(x, type); - } - // general conversion with converter and builder std::unique_ptr converter; StopIfNotOk(arrow::r::GetConverter(type, &converter)); diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 002ecdb8516..69c485aa4a6 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -844,6 +844,21 @@ class RStructConverter : public StructConverter { } cpp11::strings x_names = Rf_getAttrib(x, R_NamesSymbol); + + RETURN_NOT_OK(cpp11::unwind_protect([&] { + for (int i = 0; i < n_columns; i++) { + const char* name_i = arrow::r::unsafe::utf8_string(x_names[i]); + auto field_name = fields[i]->name(); + if (field_name != name_i) { + return Status::RError( + "Field name in position ", i, " (", field_name, + ") does not match the name of the column of the data frame (", name_i, ")"); + } + } + + return Status::OK(); + })); + for (R_xlen_t i = 0; i < n_columns; i++) { std::string name(x_names[i]); if (name != fields[i]->name()) { From ea0c38110414e98a61a4abd1f4e018a42cb122a5 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 10 Feb 2021 10:40:12 +0100 Subject: [PATCH 78/82] rm VectorToArrayConverter --- r/src/array_from_vector.cpp | 299 ------------------------------------ 1 file changed, 299 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index ecd89ce6266..8b7a78098c6 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -52,287 +52,6 @@ inline bool is_na(int value) { return value == NA_INTEGER; } -struct VectorToArrayConverter { - Status Visit(const arrow::NullType& type) { - auto* null_builder = checked_cast(builder); - return null_builder->AppendNulls(XLENGTH(x)); - } - - Status Visit(const arrow::BooleanType& type) { - ARROW_RETURN_IF(TYPEOF(x) != LGLSXP, Status::RError("Expecting a logical vector")); - R_xlen_t n = XLENGTH(x); - - auto* bool_builder = checked_cast(builder); - auto* p = LOGICAL(x); - - RETURN_NOT_OK(bool_builder->Reserve(n)); - for (R_xlen_t i = 0; i < n; i++) { - auto value = p[i]; - if (value == NA_LOGICAL) { - bool_builder->UnsafeAppendNull(); - } else { - bool_builder->UnsafeAppend(value == 1); - } - } - return Status::OK(); - } - - Status Visit(const arrow::Int32Type& type) { - ARROW_RETURN_IF(TYPEOF(x) != INTSXP, Status::RError("Expecting an integer vector")); - - auto* int_builder = checked_cast(builder); - - R_xlen_t n = XLENGTH(x); - const auto* data = INTEGER(x); - - RETURN_NOT_OK(int_builder->Reserve(n)); - for (R_xlen_t i = 0; i < n; i++) { - const auto value = data[i]; - if (value == NA_INTEGER) { - int_builder->UnsafeAppendNull(); - } else { - int_builder->UnsafeAppend(value); - } - } - - return Status::OK(); - } - - Status Visit(const arrow::Int64Type& type) { - ARROW_RETURN_IF(TYPEOF(x) != REALSXP, Status::RError("Expecting a numeric vector")); - ARROW_RETURN_IF(Rf_inherits(x, "integer64"), - Status::RError("Expecting a vector that inherits integer64")); - - auto* int_builder = checked_cast(builder); - - R_xlen_t n = XLENGTH(x); - const auto* data = (REAL(x)); - - RETURN_NOT_OK(int_builder->Reserve(n)); - for (R_xlen_t i = 0; i < n; i++) { - const auto value = arrow::util::SafeCopy(data[i]); - if (value == NA_INT64) { - int_builder->UnsafeAppendNull(); - } else { - int_builder->UnsafeAppend(value); - } - } - - return Status::OK(); - } - - Status Visit(const arrow::DoubleType& type) { - ARROW_RETURN_IF(TYPEOF(x) != REALSXP, Status::RError("Expecting a numeric vector")); - - auto* double_builder = checked_cast(builder); - - R_xlen_t n = XLENGTH(x); - const auto* data = (REAL(x)); - - RETURN_NOT_OK(double_builder->Reserve(n)); - for (R_xlen_t i = 0; i < n; i++) { - const auto value = data[i]; - if (ISNA(value)) { - double_builder->UnsafeAppendNull(); - } else { - double_builder->UnsafeAppend(value); - } - } - - return Status::OK(); - } - - Status Visit(const arrow::BinaryType& type) { - if (!(Rf_inherits(x, "vctrs_list_of") && - TYPEOF(Rf_getAttrib(x, symbols::ptype)) == RAWSXP)) { - return Status::RError("Expecting a list of raw vectors"); - } - return Status::OK(); - } - - Status Visit(const arrow::FixedSizeBinaryType& type) { - if (!(Rf_inherits(x, "vctrs_list_of") && - TYPEOF(Rf_getAttrib(x, symbols::ptype)) == RAWSXP)) { - return Status::RError("Expecting a list of raw vectors"); - } - - return Status::OK(); - } - - template - arrow::enable_if_base_binary Visit(const T& type) { - using BuilderType = typename TypeTraits::BuilderType; - - ARROW_RETURN_IF(TYPEOF(x) != STRSXP, Status::RError("Expecting a character vector")); - - auto* binary_builder = checked_cast(builder); - - R_xlen_t n = XLENGTH(x); - RETURN_NOT_OK(builder->Reserve(n)); - for (R_xlen_t i = 0; i < n; i++) { - SEXP si = STRING_ELT(x, i); - if (si == NA_STRING) { - RETURN_NOT_OK(binary_builder->AppendNull()); - continue; - } - std::string s = cpp11::r_string(si); - RETURN_NOT_OK(binary_builder->Append(s.c_str(), s.size())); - } - - return Status::OK(); - } - - template - arrow::enable_if_base_list Visit(const T& type) { - using BuilderType = typename TypeTraits::BuilderType; - - ARROW_RETURN_IF(TYPEOF(x) != VECSXP, Status::RError("Expecting a list vector")); - - auto* list_builder = checked_cast(builder); - auto* value_builder = list_builder->value_builder(); - auto value_type = type.value_type(); - - R_xlen_t n = XLENGTH(x); - RETURN_NOT_OK(builder->Reserve(n)); - for (R_xlen_t i = 0; i < n; i++) { - SEXP vector = VECTOR_ELT(x, i); - if (Rf_isNull(vector)) { - RETURN_NOT_OK(list_builder->AppendNull()); - continue; - } - - RETURN_NOT_OK(list_builder->Append()); - - // Recurse. - VectorToArrayConverter converter{vector, value_builder}; - Status status = arrow::VisitTypeInline(*value_type, &converter); - if (!status.ok()) { - return Status::RError("Cannot convert list element ", (i + 1), - " to an Array of type `", value_type->ToString(), - "` : ", status.message()); - } - } - - return Status::OK(); - } - - Status Visit(const FixedSizeListType& type) { - ARROW_RETURN_IF(TYPEOF(x) != VECSXP, Status::RError("Expecting a list vector")); - - auto* fixed_size_list_builder = checked_cast(builder); - auto* value_builder = fixed_size_list_builder->value_builder(); - auto value_type = type.value_type(); - int list_size = type.list_size(); - - R_xlen_t n = XLENGTH(x); - RETURN_NOT_OK(builder->Reserve(n)); - for (R_xlen_t i = 0; i < n; i++) { - SEXP vector = VECTOR_ELT(x, i); - if (Rf_isNull(vector)) { - RETURN_NOT_OK(fixed_size_list_builder->AppendNull()); - continue; - } - RETURN_NOT_OK(fixed_size_list_builder->Append()); - - auto vect_type = arrow::r::InferArrowType(vector); - if (!value_type->Equals(vect_type)) { - return Status::RError("FixedSizeList vector expecting elements vector of type ", - value_type->ToString(), " but got ", vect_type->ToString()); - } - int vector_size = vctrs::short_vec_size(vector); - if (vector_size != list_size) { - return Status::RError("FixedSizeList vector expecting elements vector of size ", - list_size, ", not ", vector_size); - } - - // Recurse. - VectorToArrayConverter converter{vector, value_builder}; - RETURN_NOT_OK(arrow::VisitTypeInline(*value_type, &converter)); - } - - return Status::OK(); - } - - template - arrow::enable_if_t::value, Status> Visit(const T& type) { - using BuilderType = typename TypeTraits::BuilderType; - ARROW_RETURN_IF(!Rf_inherits(x, "data.frame"), - Status::RError("Expecting a data frame")); - - auto* struct_builder = checked_cast(builder); - - int64_t n = vctrs::short_vec_size(x); - RETURN_NOT_OK(struct_builder->Reserve(n)); - RETURN_NOT_OK(struct_builder->AppendValues(n, NULLPTR)); - - int num_fields = struct_builder->num_fields(); - - // Visit each column of the data frame using the associated - // field builder - for (R_xlen_t i = 0; i < num_fields; i++) { - auto column_builder = struct_builder->field_builder(i); - SEXP x_i = VECTOR_ELT(x, i); - int64_t n_i = vctrs::short_vec_size(x_i); - if (n_i != n) { - SEXP name_i = STRING_ELT(Rf_getAttrib(x, R_NamesSymbol), i); - return Status::RError("Degenerated data frame. Column '", CHAR(name_i), - "' has size ", n_i, " instead of the number of rows: ", n); - } - - VectorToArrayConverter converter{x_i, column_builder}; - RETURN_NOT_OK(arrow::VisitTypeInline(*column_builder->type().get(), &converter)); - } - - return Status::OK(); - } - - template - arrow::enable_if_t::value, Status> Visit( - const T& type) { - ARROW_RETURN_IF(!Rf_isFactor(x), Status::RError("Expecting a factor")); - int64_t n = vctrs::short_vec_size(x); - - auto* dict_builder = checked_cast(builder); - RETURN_NOT_OK(dict_builder->Reserve(n)); - - SEXP levels = Rf_getAttrib(x, R_LevelsSymbol); - auto memo = VectorToArrayConverter::Visit(levels, utf8()); - RETURN_NOT_OK(dict_builder->InsertMemoValues(*memo)); - - int* p_values = INTEGER(x); - for (int64_t i = 0; i < n; i++, ++p_values) { - int v = *p_values; - if (v == NA_INTEGER) { - RETURN_NOT_OK(dict_builder->AppendNull()); - } else { - RETURN_NOT_OK(dict_builder->Append(CHAR(STRING_ELT(levels, v - 1)))); - } - } - - return Status::OK(); - } - - Status Visit(const arrow::DataType& type) { - return Status::NotImplemented("Converting vector to arrow type ", type.ToString(), - " not implemented"); - } - - static std::shared_ptr Visit(SEXP x, const std::shared_ptr& type) { - std::unique_ptr builder; - StopIfNotOk(MakeBuilder(gc_memory_pool(), type, &builder)); - - VectorToArrayConverter converter{x, builder.get()}; - StopIfNotOk(arrow::VisitTypeInline(*type, &converter)); - - std::shared_ptr result; - StopIfNotOk(builder->Finish(&result)); - return result; - } - - SEXP x; - arrow::ArrayBuilder* builder; -}; - template int64_t time_cast(T value); @@ -1082,24 +801,6 @@ Status GetConverter(const std::shared_ptr& type, std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred) { - // new api - return vec_to_arrow(x, type, type_inferred); - - // short circuit if `x` is already an Array - if (Rf_inherits(x, "Array")) { - return cpp11::as_cpp>(x); - } - - // special case when we can just use the data from the R vector - // directly. This still needs to handle the null bitmap - if (arrow::r::can_reuse_memory(x, type)) { - return arrow::r::vec_to_arrow__reuse_memory(x); - } - - if (type->id() == Type::LIST || type->id() == Type::LARGE_LIST || - type->id() == Type::FIXED_SIZE_LIST) { - return VectorToArrayConverter::Visit(x, type); - } // general conversion with converter and builder std::unique_ptr converter; From 97f9f93e9d908b49b2d9d55a58c7c267b8478b74 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 10 Feb 2021 10:44:11 +0100 Subject: [PATCH 79/82] - GetConverter --- r/src/array_from_vector.cpp | 50 ------------------------------------- 1 file changed, 50 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 8b7a78098c6..6946c0df5d8 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -146,11 +146,6 @@ Status float_cast(double x, float* out) { namespace r { -class VectorConverter; - -Status GetConverter(const std::shared_ptr& type, - std::unique_ptr* out); - class VectorConverter { public: virtual ~VectorConverter() = default; @@ -755,56 +750,11 @@ class StringVectorConverter : public VectorConverter { std::unique_ptr(new TYPE(checked_cast(type.get())->unit())); \ return Status::OK() -Status GetConverter(const std::shared_ptr& type, - std::unique_ptr* out) { - switch (type->id()) { - SIMPLE_CONVERTER_CASE(BINARY, BinaryVectorConverter); - SIMPLE_CONVERTER_CASE(LARGE_BINARY, BinaryVectorConverter); - SIMPLE_CONVERTER_CASE(FIXED_SIZE_BINARY, FixedSizeBinaryVectorConverter); - SIMPLE_CONVERTER_CASE(BOOL, BooleanVectorConverter); - SIMPLE_CONVERTER_CASE(STRING, StringVectorConverter); - SIMPLE_CONVERTER_CASE(LARGE_STRING, StringVectorConverter); - NUMERIC_CONVERTER(INT8, Int8Type); - NUMERIC_CONVERTER(INT16, Int16Type); - NUMERIC_CONVERTER(INT32, Int32Type); - NUMERIC_CONVERTER(INT64, Int64Type); - NUMERIC_CONVERTER(UINT8, UInt8Type); - NUMERIC_CONVERTER(UINT16, UInt16Type); - NUMERIC_CONVERTER(UINT32, UInt32Type); - NUMERIC_CONVERTER(UINT64, UInt64Type); - - // TODO: not sure how to handle half floats - // the python code uses npy_half - // NUMERIC_CONVERTER(HALF_FLOAT, HalfFloatType); - NUMERIC_CONVERTER(FLOAT, FloatType); - NUMERIC_CONVERTER(DOUBLE, DoubleType); - - SIMPLE_CONVERTER_CASE(DATE32, Date32Converter); - SIMPLE_CONVERTER_CASE(DATE64, Date64Converter); - - // TODO: probably after we merge ARROW-3628 - // case Type::DECIMAL: - - TIME_CONVERTER_CASE(TIME32, Time32Type, Time32Converter); - TIME_CONVERTER_CASE(TIME64, Time64Type, Time64Converter); - TIME_CONVERTER_CASE(TIMESTAMP, TimestampType, TimestampConverter); - - case Type::NA: - *out = std::unique_ptr(new NullVectorConverter); - return Status::OK(); - - default: - break; - } - return Status::NotImplemented("type not implemented"); -} - std::shared_ptr Array__from_vector( SEXP x, const std::shared_ptr& type, bool type_inferred) { // general conversion with converter and builder std::unique_ptr converter; - StopIfNotOk(arrow::r::GetConverter(type, &converter)); // Create ArrayBuilder for type std::unique_ptr type_builder; From 3bce51e331c264778aecb465314e812bc4ac141e Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 10 Feb 2021 10:54:21 +0100 Subject: [PATCH 80/82] -Array__from_vector() --- r/src/array_from_vector.cpp | 625 ------------------------------------ 1 file changed, 625 deletions(-) diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 6946c0df5d8..34302a2b9ef 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -144,631 +144,6 @@ Status float_cast(double x, float* out) { } // namespace internal -namespace r { - -class VectorConverter { - public: - virtual ~VectorConverter() = default; - - virtual Status Init(ArrayBuilder* builder) = 0; - - virtual Status Ingest(SEXP obj) = 0; - - virtual Status GetResult(std::shared_ptr* result) { - return builder_->Finish(result); - } - - ArrayBuilder* builder() const { return builder_; } - - protected: - ArrayBuilder* builder_; -}; - -class NullVectorConverter : public VectorConverter { - public: - using BuilderType = NullBuilder; - - ~NullVectorConverter() {} - - Status Init(ArrayBuilder* builder) override { - builder_ = builder; - typed_builder_ = checked_cast(builder_); - return Status::OK(); - } - - Status Ingest(SEXP obj) override { - RETURN_NOT_OK(typed_builder_->AppendNulls(XLENGTH(obj))); - return Status::OK(); - } - - protected: - BuilderType* typed_builder_; -}; - -template -struct Unbox {}; - -// unboxer for int type -template -struct Unbox> { - using BuilderType = typename TypeTraits::BuilderType; - using ArrayType = typename TypeTraits::ArrayType; - using CType = typename ArrayType::value_type; - - static inline Status Ingest(BuilderType* builder, SEXP obj) { - switch (TYPEOF(obj)) { - case INTSXP: - return IngestRange(builder, INTEGER(obj), XLENGTH(obj)); - case REALSXP: - if (Rf_inherits(obj, "integer64")) { - return IngestRange(builder, reinterpret_cast(REAL(obj)), - XLENGTH(obj)); - } - return IngestRange(builder, REAL(obj), XLENGTH(obj)); - - // TODO: handle raw and logical - default: - break; - } - - return Status::Invalid("Cannot convert R vector of type <", Rf_type2char(TYPEOF(obj)), - "> to integer Arrow array"); - } - - template - static inline Status IngestRange(BuilderType* builder, T* p, R_xlen_t n) { - RETURN_NOT_OK(builder->Resize(n)); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (is_na(*p)) { - builder->UnsafeAppendNull(); - } else { - CType value = 0; - RETURN_NOT_OK(internal::int_cast(*p, &value)); - builder->UnsafeAppend(value); - } - } - return Status::OK(); - } -}; - -template <> -struct Unbox { - static inline Status Ingest(DoubleBuilder* builder, SEXP obj) { - switch (TYPEOF(obj)) { - // TODO: handle RAW - case INTSXP: - return IngestIntRange(builder, INTEGER(obj), XLENGTH(obj), NA_INTEGER); - case REALSXP: - if (Rf_inherits(obj, "integer64")) { - return IngestIntRange(builder, reinterpret_cast(REAL(obj)), - XLENGTH(obj), NA_INT64); - } - return IngestDoubleRange(builder, REAL(obj), XLENGTH(obj)); - } - return Status::Invalid("Cannot convert R object to double type"); - } - - template - static inline Status IngestIntRange(DoubleBuilder* builder, T* p, R_xlen_t n, T na) { - RETURN_NOT_OK(builder->Resize(n)); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (*p == NA_INTEGER) { - builder->UnsafeAppendNull(); - } else { - double value = 0; - RETURN_NOT_OK(internal::double_cast(*p, &value)); - builder->UnsafeAppend(value); - } - } - return Status::OK(); - } - - static inline Status IngestDoubleRange(DoubleBuilder* builder, double* p, R_xlen_t n) { - RETURN_NOT_OK(builder->Resize(n)); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (ISNA(*p)) { - builder->UnsafeAppendNull(); - } else { - builder->UnsafeAppend(*p); - } - } - return Status::OK(); - } -}; - -template <> -struct Unbox { - static inline Status Ingest(FloatBuilder* builder, SEXP obj) { - switch (TYPEOF(obj)) { - // TODO: handle RAW - case INTSXP: - return IngestIntRange(builder, INTEGER(obj), XLENGTH(obj), NA_INTEGER); - case REALSXP: - if (Rf_inherits(obj, "integer64")) { - return IngestIntRange(builder, reinterpret_cast(REAL(obj)), - XLENGTH(obj), NA_INT64); - } - return IngestDoubleRange(builder, REAL(obj), XLENGTH(obj)); - } - return Status::Invalid("Cannot convert R object to double type"); - } - - template - static inline Status IngestIntRange(FloatBuilder* builder, T* p, R_xlen_t n, T na) { - RETURN_NOT_OK(builder->Resize(n)); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (*p == NA_INTEGER) { - builder->UnsafeAppendNull(); - } else { - float value = 0; - RETURN_NOT_OK(internal::float_cast(*p, &value)); - builder->UnsafeAppend(value); - } - } - return Status::OK(); - } - - static inline Status IngestDoubleRange(FloatBuilder* builder, double* p, R_xlen_t n) { - RETURN_NOT_OK(builder->Resize(n)); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (ISNA(*p)) { - builder->UnsafeAppendNull(); - } else { - float value; - RETURN_NOT_OK(internal::float_cast(*p, &value)); - builder->UnsafeAppend(value); - } - } - return Status::OK(); - } -}; - -template <> -struct Unbox { - static inline Status Ingest(BooleanBuilder* builder, SEXP obj) { - switch (TYPEOF(obj)) { - case LGLSXP: { - R_xlen_t n = XLENGTH(obj); - RETURN_NOT_OK(builder->Resize(n)); - int* p = LOGICAL(obj); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (*p == NA_LOGICAL) { - builder->UnsafeAppendNull(); - } else { - builder->UnsafeAppend(*p == 1); - } - } - return Status::OK(); - } - - default: - break; - } - - // TODO: include more information about the R object and the target type - return Status::Invalid("Cannot convert R object to boolean type"); - } -}; - -template <> -struct Unbox { - static inline Status Ingest(Date32Builder* builder, SEXP obj) { - switch (TYPEOF(obj)) { - case INTSXP: - if (Rf_inherits(obj, "Date")) { - return IngestIntRange(builder, INTEGER(obj), XLENGTH(obj)); - } - break; - case REALSXP: - if (Rf_inherits(obj, "Date")) { - return IngestDoubleRange(builder, REAL(obj), XLENGTH(obj)); - } - break; - default: - break; - } - return Status::Invalid("Cannot convert R object to date32 type"); - } - - static inline Status IngestIntRange(Date32Builder* builder, int* p, R_xlen_t n) { - RETURN_NOT_OK(builder->Resize(n)); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (*p == NA_INTEGER) { - builder->UnsafeAppendNull(); - } else { - builder->UnsafeAppend(*p); - } - } - return Status::OK(); - } - - static inline Status IngestDoubleRange(Date32Builder* builder, double* p, R_xlen_t n) { - RETURN_NOT_OK(builder->Resize(n)); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (ISNA(*p)) { - builder->UnsafeAppendNull(); - } else { - builder->UnsafeAppend(static_cast(*p)); - } - } - return Status::OK(); - } -}; - -template <> -struct Unbox { - constexpr static int64_t kMillisecondsPerDay = 86400000; - - static inline Status Ingest(Date64Builder* builder, SEXP obj) { - switch (TYPEOF(obj)) { - case INTSXP: - // number of days since epoch - if (Rf_inherits(obj, "Date")) { - return IngestDateInt32Range(builder, INTEGER(obj), XLENGTH(obj)); - } - break; - - case REALSXP: - // (fractional number of days since epoch) - if (Rf_inherits(obj, "Date")) { - return IngestDateDoubleRange(builder, REAL(obj), - XLENGTH(obj)); - } - - // number of seconds since epoch - if (Rf_inherits(obj, "POSIXct")) { - return IngestDateDoubleRange<1000>(builder, REAL(obj), XLENGTH(obj)); - } - } - return Status::Invalid("Cannot convert R object to date64 type"); - } - - // ingest a integer vector that represents number of days since epoch - static inline Status IngestDateInt32Range(Date64Builder* builder, int* p, R_xlen_t n) { - RETURN_NOT_OK(builder->Resize(n)); - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (*p == NA_INTEGER) { - builder->UnsafeAppendNull(); - } else { - builder->UnsafeAppend(*p * kMillisecondsPerDay); - } - } - return Status::OK(); - } - - // ingest a numeric vector that represents (fractional) number of days since epoch - template - static inline Status IngestDateDoubleRange(Date64Builder* builder, double* p, - R_xlen_t n) { - RETURN_NOT_OK(builder->Resize(n)); - - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (ISNA(*p)) { - builder->UnsafeAppendNull(); - } else { - builder->UnsafeAppend(static_cast(*p * MULTIPLIER)); - } - } - return Status::OK(); - } -}; - -template -class TypedVectorConverter : public VectorConverter { - public: - using BuilderType = typename TypeTraits::BuilderType; - - Status Init(ArrayBuilder* builder) override { - builder_ = builder; - typed_builder_ = checked_cast(builder_); - return Status::OK(); - } - - Status Ingest(SEXP obj) override { return Unbox::Ingest(typed_builder_, obj); } - - protected: - BuilderType* typed_builder_; -}; - -template -class NumericVectorConverter - : public TypedVectorConverter> {}; - -class BooleanVectorConverter - : public TypedVectorConverter {}; - -class Date32Converter : public TypedVectorConverter {}; -class Date64Converter : public TypedVectorConverter {}; - -inline int64_t get_time_multiplier(TimeUnit::type unit) { - switch (unit) { - case TimeUnit::SECOND: - return 1; - case TimeUnit::MILLI: - return 1000; - case TimeUnit::MICRO: - return 1000000; - case TimeUnit::NANO: - return 1000000000; - default: - return 0; - } -} - -template -class TimeConverter : public VectorConverter { - using BuilderType = typename TypeTraits::BuilderType; - - public: - explicit TimeConverter(TimeUnit::type unit) - : unit_(unit), multiplier_(get_time_multiplier(unit)) {} - - Status Init(ArrayBuilder* builder) override { - builder_ = builder; - typed_builder_ = checked_cast(builder); - return Status::OK(); - } - - Status Ingest(SEXP obj) override { - if (valid_R_object(obj)) { - int difftime_multiplier; - RETURN_NOT_OK(GetDifftimeMultiplier(obj, &difftime_multiplier)); - return Ingest_POSIXct(REAL(obj), XLENGTH(obj), difftime_multiplier); - } - - return Status::Invalid("Cannot convert R object to timestamp type"); - } - - protected: - TimeUnit::type unit_; - BuilderType* typed_builder_; - int64_t multiplier_; - - Status Ingest_POSIXct(double* p, R_xlen_t n, int difftime_multiplier) { - RETURN_NOT_OK(typed_builder_->Resize(n)); - - for (R_xlen_t i = 0; i < n; i++, ++p) { - if (ISNA(*p)) { - typed_builder_->UnsafeAppendNull(); - } else { - typed_builder_->UnsafeAppend( - static_cast(*p * multiplier_ * difftime_multiplier)); - } - } - return Status::OK(); - } - - virtual bool valid_R_object(SEXP obj) = 0; - - // only used for Time32 and Time64 - virtual Status GetDifftimeMultiplier(SEXP obj, int* res) { - std::string unit(CHAR(STRING_ELT(Rf_getAttrib(obj, symbols::units), 0))); - if (unit == "secs") { - *res = 1; - } else if (unit == "mins") { - *res = 60; - } else if (unit == "hours") { - *res = 3600; - } else if (unit == "days") { - *res = 86400; - } else if (unit == "weeks") { - *res = 604800; - } else { - return Status::Invalid("unknown difftime unit"); - } - return Status::OK(); - } -}; - -class TimestampConverter : public TimeConverter { - public: - explicit TimestampConverter(TimeUnit::type unit) : TimeConverter(unit) {} - - protected: - bool valid_R_object(SEXP obj) override { - return TYPEOF(obj) == REALSXP && Rf_inherits(obj, "POSIXct"); - } - - Status GetDifftimeMultiplier(SEXP obj, int* res) override { - *res = 1; - return Status::OK(); - } -}; - -class Time32Converter : public TimeConverter { - public: - explicit Time32Converter(TimeUnit::type unit) : TimeConverter(unit) {} - - protected: - bool valid_R_object(SEXP obj) override { - return TYPEOF(obj) == REALSXP && Rf_inherits(obj, "difftime"); - } -}; - -class Time64Converter : public TimeConverter { - public: - explicit Time64Converter(TimeUnit::type unit) : TimeConverter(unit) {} - - protected: - bool valid_R_object(SEXP obj) override { - return TYPEOF(obj) == REALSXP && Rf_inherits(obj, "difftime"); - } -}; - -template -class BinaryVectorConverter : public VectorConverter { - public: - ~BinaryVectorConverter() {} - - Status Init(ArrayBuilder* builder) { - typed_builder_ = checked_cast(builder); - return Status::OK(); - } - - Status Ingest(SEXP obj) { - ARROW_RETURN_IF(TYPEOF(obj) != VECSXP, Status::RError("Expecting a list")); - R_xlen_t n = XLENGTH(obj); - - // Reserve enough space before appending - int64_t size = 0; - for (R_xlen_t i = 0; i < n; i++) { - SEXP obj_i = VECTOR_ELT(obj, i); - if (!Rf_isNull(obj_i)) { - ARROW_RETURN_IF(TYPEOF(obj_i) != RAWSXP, - Status::RError("Expecting a raw vector")); - size += XLENGTH(obj_i); - } - } - RETURN_NOT_OK(typed_builder_->Reserve(size)); - - // append - for (R_xlen_t i = 0; i < n; i++) { - SEXP obj_i = VECTOR_ELT(obj, i); - if (Rf_isNull(obj_i)) { - RETURN_NOT_OK(typed_builder_->AppendNull()); - } else { - RETURN_NOT_OK(typed_builder_->Append(RAW(obj_i), XLENGTH(obj_i))); - } - } - return Status::OK(); - } - - Status GetResult(std::shared_ptr* result) { - return typed_builder_->Finish(result); - } - - private: - Builder* typed_builder_; -}; - -class FixedSizeBinaryVectorConverter : public VectorConverter { - public: - ~FixedSizeBinaryVectorConverter() {} - - Status Init(ArrayBuilder* builder) { - typed_builder_ = checked_cast(builder); - return Status::OK(); - } - - Status Ingest(SEXP obj) { - ARROW_RETURN_IF(TYPEOF(obj) != VECSXP, Status::RError("Expecting a list")); - R_xlen_t n = XLENGTH(obj); - - // Reserve enough space before appending - int32_t byte_width = typed_builder_->byte_width(); - for (R_xlen_t i = 0; i < n; i++) { - SEXP obj_i = VECTOR_ELT(obj, i); - if (!Rf_isNull(obj_i)) { - ARROW_RETURN_IF(TYPEOF(obj_i) != RAWSXP, - Status::RError("Expecting a raw vector")); - ARROW_RETURN_IF(XLENGTH(obj_i) != byte_width, - Status::RError("Expecting a raw vector of ", byte_width, - " bytes, not ", XLENGTH(obj_i))); - } - } - RETURN_NOT_OK(typed_builder_->Reserve(n * byte_width)); - - // append - for (R_xlen_t i = 0; i < n; i++) { - SEXP obj_i = VECTOR_ELT(obj, i); - if (Rf_isNull(obj_i)) { - RETURN_NOT_OK(typed_builder_->AppendNull()); - } else { - RETURN_NOT_OK(typed_builder_->Append(RAW(obj_i))); - } - } - return Status::OK(); - } - - Status GetResult(std::shared_ptr* result) { - return typed_builder_->Finish(result); - } - - private: - FixedSizeBinaryBuilder* typed_builder_; -}; - -template -class StringVectorConverter : public VectorConverter { - public: - ~StringVectorConverter() {} - - Status Init(ArrayBuilder* builder) { - typed_builder_ = checked_cast(builder); - return Status::OK(); - } - - Status Ingest(SEXP obj) { - ARROW_RETURN_IF(TYPEOF(obj) != STRSXP, - Status::RError("Expecting a character vector")); - - cpp11::strings s(arrow::r::utf8_strings(obj)); - RETURN_NOT_OK(typed_builder_->Reserve(s.size())); - - // we know all the R strings are utf8 already, so we can get - // a definite size and then use UnsafeAppend*() - int64_t total_length = 0; - for (cpp11::r_string si : s) { - total_length += cpp11::is_na(si) ? 0 : si.size(); - } - RETURN_NOT_OK(typed_builder_->ReserveData(total_length)); - - // append - for (cpp11::r_string si : s) { - if (si == NA_STRING) { - typed_builder_->UnsafeAppendNull(); - } else { - typed_builder_->UnsafeAppend(CHAR(si), si.size()); - } - } - - return Status::OK(); - } - - Status GetResult(std::shared_ptr* result) { - return typed_builder_->Finish(result); - } - - private: - StringBuilder* typed_builder_; -}; - -#define NUMERIC_CONVERTER(TYPE_ENUM, TYPE) \ - case Type::TYPE_ENUM: \ - *out = \ - std::unique_ptr>(new NumericVectorConverter); \ - return Status::OK() - -#define SIMPLE_CONVERTER_CASE(TYPE_ENUM, TYPE) \ - case Type::TYPE_ENUM: \ - *out = std::unique_ptr(new TYPE); \ - return Status::OK() - -#define TIME_CONVERTER_CASE(TYPE_ENUM, DATA_TYPE, TYPE) \ - case Type::TYPE_ENUM: \ - *out = \ - std::unique_ptr(new TYPE(checked_cast(type.get())->unit())); \ - return Status::OK() - -std::shared_ptr Array__from_vector( - SEXP x, const std::shared_ptr& type, bool type_inferred) { - - // general conversion with converter and builder - std::unique_ptr converter; - - // Create ArrayBuilder for type - std::unique_ptr type_builder; - StopIfNotOk(arrow::MakeBuilder(gc_memory_pool(), type, &type_builder)); - StopIfNotOk(converter->Init(type_builder.get())); - - // ingest R data and grab the result array - StopIfNotOk(converter->Ingest(x)); - std::shared_ptr result; - StopIfNotOk(converter->GetResult(&result)); - return result; -} - -} // namespace r } // namespace arrow // [[arrow::export]] From 7c45774f12d80817a6d515302bb126a2935a672e Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 10 Feb 2021 13:11:50 +0100 Subject: [PATCH 81/82] ... end remove array_from_vector.cpp altgether --- r/R/arrowExports.R | 8 +- r/src/array_from_vector.cpp | 190 ------------------------------------ r/src/arrowExports.cpp | 20 ++-- r/src/chunkedarray.cpp | 42 ++++++++ r/src/r_to_arrow.cpp | 4 +- r/src/type_infer.cpp | 6 +- 6 files changed, 61 insertions(+), 209 deletions(-) delete mode 100644 r/src/array_from_vector.cpp diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 5ae06f7c717..643f2467303 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -132,10 +132,6 @@ LargeListArray__raw_value_offsets <- function(array){ .Call(`_arrow_LargeListArray__raw_value_offsets`, array) } -ChunkedArray__from_list <- function(chunks, s_type){ - .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type) -} - Array__as_vector <- function(array){ .Call(`_arrow_Array__as_vector`, array) } @@ -248,6 +244,10 @@ ChunkedArray__ToString <- function(x){ .Call(`_arrow_ChunkedArray__ToString`, x) } +ChunkedArray__from_list <- function(chunks, s_type){ + .Call(`_arrow_ChunkedArray__from_list`, chunks, s_type) +} + util___Codec__Create <- function(codec, compression_level){ .Call(`_arrow_util___Codec__Create`, codec, compression_level) } diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp deleted file mode 100644 index 34302a2b9ef..00000000000 --- a/r/src/array_from_vector.cpp +++ /dev/null @@ -1,190 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include - -#include "./arrow_types.h" -#include "./arrow_vctrs.h" - -#if defined(ARROW_R_WITH_ARROW) -#include -#include -#include -#include -#include - -using arrow::internal::checked_cast; - -namespace arrow { -namespace r { - -template -inline bool is_na(T value) { - return false; -} - -template <> -inline bool is_na(int64_t value) { - return value == NA_INT64; -} - -template <> -inline bool is_na(double value) { - return ISNA(value); -} - -template <> -inline bool is_na(int value) { - return value == NA_INTEGER; -} - -template -int64_t time_cast(T value); - -template <> -inline int64_t time_cast(int value) { - return static_cast(value) * 1000; -} - -template <> -inline int64_t time_cast(double value) { - return static_cast(value * 1000); -} - -} // namespace r -} // namespace arrow - -// ---------------- new api - -namespace arrow { - -namespace internal { - -template ::value, Target>::type = 0> -Status int_cast(T x, Target* out) { - if (static_cast(x) < std::numeric_limits::min() || - static_cast(x) > std::numeric_limits::max()) { - return Status::Invalid("Value is too large to fit in C integer type"); - } - *out = static_cast(x); - return Status::OK(); -} - -template -struct usigned_type; - -template ::value, Target>::type = 0> -Status int_cast(T x, Target* out) { - // we need to compare between unsigned integers - uint64_t x64 = x; - if (x64 < 0 || x64 > std::numeric_limits::max()) { - return Status::Invalid("Value is too large to fit in C integer type"); - } - *out = static_cast(x); - return Status::OK(); -} - -template -Status double_cast(Int x, double* out) { - *out = static_cast(x); - return Status::OK(); -} - -template <> -Status double_cast(int64_t x, double* out) { - constexpr int64_t kDoubleMax = 1LL << 53; - constexpr int64_t kDoubleMin = -(1LL << 53); - - if (x < kDoubleMin || x > kDoubleMax) { - return Status::Invalid("integer value ", x, " is outside of the range exactly", - " representable by a IEEE 754 double precision value"); - } - *out = static_cast(x); - return Status::OK(); -} - -// used for int and int64_t -template -Status float_cast(T x, float* out) { - constexpr int64_t kHalfFloatMax = 1LL << 24; - constexpr int64_t kHalfFloatMin = -(1LL << 24); - - int64_t x64 = static_cast(x); - if (x64 < kHalfFloatMin || x64 > kHalfFloatMax) { - return Status::Invalid("integer value ", x, " is outside of the range exactly", - " representable by a IEEE 754 half precision value"); - } - - *out = static_cast(x); - return Status::OK(); -} - -template <> -Status float_cast(double x, float* out) { - // TODO: is there some sort of floating point overflow ? - *out = static_cast(x); - return Status::OK(); -} - -} // namespace internal - -} // namespace arrow - -// [[arrow::export]] -std::shared_ptr ChunkedArray__from_list(cpp11::list chunks, - SEXP s_type) { - std::vector> vec; - - // the type might be NULL, in which case we need to infer it from the data - // we keep track of whether it was inferred or supplied - bool type_inferred = Rf_isNull(s_type); - R_xlen_t n = XLENGTH(chunks); - - std::shared_ptr type; - if (type_inferred) { - if (n == 0) { - cpp11::stop("type must be specified for empty list"); - } - type = arrow::r::InferArrowType(VECTOR_ELT(chunks, 0)); - } else { - type = cpp11::as_cpp>(s_type); - } - - if (n == 0) { - std::shared_ptr array; - std::unique_ptr type_builder; - StopIfNotOk(arrow::MakeBuilder(gc_memory_pool(), type, &type_builder)); - StopIfNotOk(type_builder->Finish(&array)); - vec.push_back(array); - } else { - // the first - might differ from the rest of the loop - // because we might have inferred the type from the first element of the list - // - // this only really matters for dictionary arrays - vec.push_back(arrow::r::vec_to_arrow(chunks[0], type, type_inferred)); - - for (R_xlen_t i = 1; i < n; i++) { - vec.push_back(arrow::r::vec_to_arrow(chunks[i], type, false)); - } - } - - return std::make_shared(std::move(vec)); -} - -#endif diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 671560e6498..732004067dd 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -289,15 +289,6 @@ BEGIN_CPP11 return cpp11::as_sexp(LargeListArray__raw_value_offsets(array)); END_CPP11 } -// array_from_vector.cpp -std::shared_ptr ChunkedArray__from_list(cpp11::list chunks, SEXP s_type); -extern "C" SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ -BEGIN_CPP11 - arrow::r::Input::type chunks(chunks_sexp); - arrow::r::Input::type s_type(s_type_sexp); - return cpp11::as_sexp(ChunkedArray__from_list(chunks, s_type)); -END_CPP11 -} // array_to_vector.cpp SEXP Array__as_vector(const std::shared_ptr& array); extern "C" SEXP _arrow_Array__as_vector(SEXP array_sexp){ @@ -533,6 +524,15 @@ BEGIN_CPP11 return cpp11::as_sexp(ChunkedArray__ToString(x)); END_CPP11 } +// chunkedarray.cpp +std::shared_ptr ChunkedArray__from_list(cpp11::list chunks, SEXP s_type); +extern "C" SEXP _arrow_ChunkedArray__from_list(SEXP chunks_sexp, SEXP s_type_sexp){ +BEGIN_CPP11 + arrow::r::Input::type chunks(chunks_sexp); + arrow::r::Input::type s_type(s_type_sexp); + return cpp11::as_sexp(ChunkedArray__from_list(chunks, s_type)); +END_CPP11 +} // compression.cpp std::shared_ptr util___Codec__Create(arrow::Compression::type codec, R_xlen_t compression_level); extern "C" SEXP _arrow_util___Codec__Create(SEXP codec_sexp, SEXP compression_level_sexp){ @@ -3533,7 +3533,6 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_FixedSizeListArray__value_offset", (DL_FUNC) &_arrow_FixedSizeListArray__value_offset, 2}, { "_arrow_ListArray__raw_value_offsets", (DL_FUNC) &_arrow_ListArray__raw_value_offsets, 1}, { "_arrow_LargeListArray__raw_value_offsets", (DL_FUNC) &_arrow_LargeListArray__raw_value_offsets, 1}, - { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, { "_arrow_Array__as_vector", (DL_FUNC) &_arrow_Array__as_vector, 1}, { "_arrow_ChunkedArray__as_vector", (DL_FUNC) &_arrow_ChunkedArray__as_vector, 1}, { "_arrow_RecordBatch__to_dataframe", (DL_FUNC) &_arrow_RecordBatch__to_dataframe, 2}, @@ -3562,6 +3561,7 @@ static const R_CallMethodDef CallEntries[] = { { "_arrow_ChunkedArray__Validate", (DL_FUNC) &_arrow_ChunkedArray__Validate, 1}, { "_arrow_ChunkedArray__Equals", (DL_FUNC) &_arrow_ChunkedArray__Equals, 2}, { "_arrow_ChunkedArray__ToString", (DL_FUNC) &_arrow_ChunkedArray__ToString, 1}, + { "_arrow_ChunkedArray__from_list", (DL_FUNC) &_arrow_ChunkedArray__from_list, 2}, { "_arrow_util___Codec__Create", (DL_FUNC) &_arrow_util___Codec__Create, 2}, { "_arrow_util___Codec__name", (DL_FUNC) &_arrow_util___Codec__name, 1}, { "_arrow_util___Codec__IsAvailable", (DL_FUNC) &_arrow_util___Codec__IsAvailable, 1}, diff --git a/r/src/chunkedarray.cpp b/r/src/chunkedarray.cpp index 52ceff7d914..10c6e84b3bb 100644 --- a/r/src/chunkedarray.cpp +++ b/r/src/chunkedarray.cpp @@ -19,6 +19,7 @@ #if defined(ARROW_R_WITH_ARROW) +#include #include // [[arrow::export]] @@ -94,4 +95,45 @@ std::string ChunkedArray__ToString(const std::shared_ptr& x return x->ToString(); } +// [[arrow::export]] +std::shared_ptr ChunkedArray__from_list(cpp11::list chunks, + SEXP s_type) { + std::vector> vec; + + // the type might be NULL, in which case we need to infer it from the data + // we keep track of whether it was inferred or supplied + bool type_inferred = Rf_isNull(s_type); + R_xlen_t n = XLENGTH(chunks); + + std::shared_ptr type; + if (type_inferred) { + if (n == 0) { + cpp11::stop("type must be specified for empty list"); + } + type = arrow::r::InferArrowType(VECTOR_ELT(chunks, 0)); + } else { + type = cpp11::as_cpp>(s_type); + } + + if (n == 0) { + std::shared_ptr array; + std::unique_ptr type_builder; + StopIfNotOk(arrow::MakeBuilder(gc_memory_pool(), type, &type_builder)); + StopIfNotOk(type_builder->Finish(&array)); + vec.push_back(array); + } else { + // the first - might differ from the rest of the loop + // because we might have inferred the type from the first element of the list + // + // this only really matters for dictionary arrays + vec.push_back(arrow::r::vec_to_arrow(chunks[0], type, type_inferred)); + + for (R_xlen_t i = 1; i < n; i++) { + vec.push_back(arrow::r::vec_to_arrow(chunks[i], type, false)); + } + } + + return std::make_shared(std::move(vec)); +} + #endif diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 69c485aa4a6..0235c24cfea 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -851,8 +851,8 @@ class RStructConverter : public StructConverter { auto field_name = fields[i]->name(); if (field_name != name_i) { return Status::RError( - "Field name in position ", i, " (", field_name, - ") does not match the name of the column of the data frame (", name_i, ")"); + "Field name in position ", i, " (", field_name, + ") does not match the name of the column of the data frame (", name_i, ")"); } } diff --git a/r/src/type_infer.cpp b/r/src/type_infer.cpp index 627bef9acd4..93e51be6462 100644 --- a/r/src/type_infer.cpp +++ b/r/src/type_infer.cpp @@ -158,7 +158,7 @@ std::shared_ptr InferArrowTypeFromVector(SEXP x) { if (Rf_isNull(ptype)) { if (XLENGTH(x) == 0) { cpp11::stop( - "Requires at least one element to infer the values' type of a list vector"); + "Requires at least one element to infer the values' type of a list vector"); } ptype = VECTOR_ELT(x, 0); @@ -191,8 +191,8 @@ std::shared_ptr InferArrowType(SEXP x) { cpp11::stop("Cannot infer type from vector"); } -} -} +} // namespace r +} // namespace arrow // [[arrow::export]] std::shared_ptr Array__infer_type(SEXP x) { From fe1c774813e9ce7123f6bb02c43bca1664e8370b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 1 Mar 2021 12:50:30 +0100 Subject: [PATCH 82/82] revert to using Rf_length() for lists that are not data frames, at least momentarily, to help with sf geometry columns. --- r/src/r_to_arrow.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index 0235c24cfea..2137e0f2ad9 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -809,7 +809,16 @@ class RListConverter : public ListConverter { } auto append_value = [this](SEXP value) { - auto n = vctrs::short_vec_size(value); + // TODO: this should always use vctrs::short_vec_size + // but that introduced a regression: + // https://github.com/apache/arrow/pull/8650#issuecomment-786940734 + int n; + if (TYPEOF(value) == VECSXP && !Rf_inherits(value, "data.frame")) { + n = Rf_length(value); + } else { + n = vctrs::short_vec_size(value); + } + RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n)); RETURN_NOT_OK(this->list_builder_->Append()); return this->value_converter_.get()->Extend(value, n);