From 6a2d388fc9d2102f6803e392f8b6d521e49a8898 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 3 Jun 2021 17:04:12 +0200 Subject: [PATCH 01/22] skeleton for short-circuit to altrep vector with zero copy --- r/src/array_to_vector.cpp | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index d5fae295181..de69caa1d2f 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -28,6 +28,7 @@ #include #include +#include #include namespace arrow { @@ -1270,12 +1271,34 @@ cpp11::writable::list to_dataframe_parallel( return tbl; } +#if defined(HAS_ALTREP) +SEXP ArrayVector__as_vector_altrep(const std::shared_ptr& array) { + // TODO: instead of using ArrayVector__as_vector() create an ALTREP vector + // that is backed by the memory of array + return ArrayVector__as_vector(array->length(), array->type(), {array}); +} +#endif + } // namespace r } // namespace arrow // [[arrow::export]] SEXP Array__as_vector(const std::shared_ptr& array) { - return arrow::r::ArrayVector__as_vector(array->length(), array->type(), {array}); + auto type = array->type(); + +#if defined(HAS_ALTREP) + if (array->null_count() == 0) { + switch (type->id()) { + case arrow::Type::INT32: + case arrow::Type::DOUBLE: + return arrow::r::ArrayVector__as_vector_altrep(array); + default: + break; + } + } +#endif + + return arrow::r::ArrayVector__as_vector(array->length(), type, {array}); } // [[arrow::export]] From 9f5067c775de99a0c6becec57b19caf85b24f630 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 7 Jun 2021 11:41:07 +0200 Subject: [PATCH 02/22] initial implementation for converting an Array of DoubleType that has no nulls into an altrep R numeric vector w/ no copy --- r/R/arrowExports.R | 4 ++ r/data-raw/codegen.R | 1 + r/src/altrep.cpp | 126 ++++++++++++++++++++++++++++++++++++++ r/src/array_to_vector.cpp | 11 +--- r/src/arrowExports.cpp | 16 +++++ r/src/arrow_types.h | 3 + 6 files changed, 151 insertions(+), 10 deletions(-) create mode 100644 r/src/altrep.cpp diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 577773c42bd..ca0f22e5319 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1,5 +1,9 @@ # Generated by using data-raw/codegen.R -> do not edit by hand +Test_array_nonull_dbl_vector <- function(){ + .Call(`_arrow_Test_array_nonull_dbl_vector`) +} + Array__Slice1 <- function(array, offset){ .Call(`_arrow_Array__Slice1`, array, offset) } diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R index 9b25cb1842c..4455bd503db 100644 --- a/r/data-raw/codegen.R +++ b/r/data-raw/codegen.R @@ -214,6 +214,7 @@ glue::glue('\n 'extern "C" void R_init_arrow(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); + arrow::r::Init_Altrep_classes(dll); } \n') diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp new file mode 100644 index 00000000000..05e6e1bb747 --- /dev/null +++ b/r/src/altrep.cpp @@ -0,0 +1,126 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include + +#include "./arrow_types.h" + +#if defined(HAS_ALTREP) + +#include +#include +#include + +// [[arrow::export]] +std::shared_ptr Test_array_nonull_dbl_vector() { + // TODO: maybe there is a simpler way to build this array ? + arrow::NumericBuilder builder(arrow::float64(), + arrow::default_memory_pool()); + StopIfNotOk(builder.AppendValues({1.0, 2.0, 3.0})); + auto array = builder.Finish(); + + return array.ValueUnsafe(); +} + +namespace arrow { +namespace r { + +struct array_nonull_dbl_vector { + // altrep object around an Array of type Double with no nulls + // data1: an external pointer to a shared pointer to the Array + // data2: not used + + static R_altrep_class_t class_t; + + static SEXP Make(const std::shared_ptr& array) { + // we don't need the whole r6 object, just an external pointer + // that retain the array + cpp11::external_pointer> xp(new std::shared_ptr(array)); + + SEXP res = R_new_altrep(class_t, xp, R_NilValue); + return res; + } + + static std::shared_ptr& Get(SEXP vec) { + return *cpp11::external_pointer>(R_altrep_data1(vec)); + } + + static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); } + + static Rboolean Inspect(SEXP x, int pre, int deep, int pvec, + void (*inspect_subtree)(SEXP, int, int, int)) { + Rprintf("std::shared_ptr (len=%d, ptr=%p)\n", Length(x), + Get(x).get()); + return TRUE; + } + + static const void* Dataptr_or_null(SEXP vec) { + return reinterpret_cast(Get(vec)->data()->buffers[1]->data()); + } + + static void* Dataptr(SEXP vec, Rboolean) { + return const_cast(Dataptr_or_null(vec)); + } + + // by definition, there are no NA + static int No_NA(SEXP vec) { return 1; } + + static void Init(DllInfo* dll) { + class_t = R_make_altreal_class("array_nonull_dbl_vector", "arrow", dll); + + // altrep + R_set_altrep_Length_method(class_t, Length); + R_set_altrep_Inspect_method(class_t, Inspect); + + // altvec + R_set_altvec_Dataptr_method(class_t, Dataptr); + R_set_altvec_Dataptr_or_null_method(class_t, Dataptr_or_null); + + // altreal + R_set_altreal_No_NA_method(class_t, No_NA); + } +}; + +R_altrep_class_t array_nonull_dbl_vector::class_t; + +void Init_Altrep_classes(DllInfo* dll) { array_nonull_dbl_vector::Init(dll); } + +SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { + return array_nonull_dbl_vector::Make(array); +} + +} // namespace r +} // namespace arrow + +// TODO: when arrow depends on R 3.5 we can eliminate this +#else + +namespace arrow { +namespace r { + +void Init_Altrep_classes(DllInfo* dll) { + // nothing +} + +SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { + return R_NilValue; +} + +} // namespace r +} // namespace arrow + +#endif diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index de69caa1d2f..6f5af8ac012 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1271,14 +1271,6 @@ cpp11::writable::list to_dataframe_parallel( return tbl; } -#if defined(HAS_ALTREP) -SEXP ArrayVector__as_vector_altrep(const std::shared_ptr& array) { - // TODO: instead of using ArrayVector__as_vector() create an ALTREP vector - // that is backed by the memory of array - return ArrayVector__as_vector(array->length(), array->type(), {array}); -} -#endif - } // namespace r } // namespace arrow @@ -1289,9 +1281,8 @@ SEXP Array__as_vector(const std::shared_ptr& array) { #if defined(HAS_ALTREP) if (array->null_count() == 0) { switch (type->id()) { - case arrow::Type::INT32: case arrow::Type::DOUBLE: - return arrow::r::ArrayVector__as_vector_altrep(array); + return arrow::r::Make_array_nonull_dbl_vector(array); default: break; } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 024e5c58b0e..d9ac56a468e 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -4,6 +4,20 @@ #include "./arrow_types.h" +// altrep.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Test_array_nonull_dbl_vector(); +extern "C" SEXP _arrow_Test_array_nonull_dbl_vector(){ +BEGIN_CPP11 + return cpp11::as_sexp(Test_array_nonull_dbl_vector()); +END_CPP11 +} +#else +extern "C" SEXP _arrow_Test_array_nonull_dbl_vector(){ + Rf_error("Cannot call Test_array_nonull_dbl_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // array.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Array__Slice1(const std::shared_ptr& array, R_xlen_t offset); @@ -6893,6 +6907,7 @@ static const R_CallMethodDef CallEntries[] = { { "_dataset_available", (DL_FUNC)& _dataset_available, 0 }, { "_parquet_available", (DL_FUNC)& _parquet_available, 0 }, { "_s3_available", (DL_FUNC)& _s3_available, 0 }, + { "_arrow_Test_array_nonull_dbl_vector", (DL_FUNC) &_arrow_Test_array_nonull_dbl_vector, 0}, { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, @@ -7334,6 +7349,7 @@ static const R_CallMethodDef CallEntries[] = { extern "C" void R_init_arrow(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); + arrow::r::Init_Altrep_classes(dll); } diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 09511e32e87..a77f8d628f4 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -165,6 +165,9 @@ arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields, arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, std::shared_ptr& schema); +void Init_Altrep_classes(DllInfo* dll); +SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array); + } // namespace r } // namespace arrow From 445627bdaa602be1e0ebcf34712fcfe8ace30ea7 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Jun 2021 09:57:12 +0200 Subject: [PATCH 03/22] + altrep integer vector --- r/R/arrowExports.R | 8 ++- r/src/altrep.cpp | 127 ++++++++++++++++++++++++++++---------- r/src/array_to_vector.cpp | 3 + r/src/arrowExports.cpp | 27 ++++++-- r/src/arrow_types.h | 1 + 5 files changed, 127 insertions(+), 39 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index ca0f22e5319..998ecd886ed 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1,7 +1,11 @@ # Generated by using data-raw/codegen.R -> do not edit by hand -Test_array_nonull_dbl_vector <- function(){ - .Call(`_arrow_Test_array_nonull_dbl_vector`) +Test_array_nonull_dbl_vector <- function(size){ + .Call(`_arrow_Test_array_nonull_dbl_vector`, size) +} + +Test_array_nonull_int_vector <- function(size){ + .Call(`_arrow_Test_array_nonull_int_vector`, size) } Array__Slice1 <- function(array, offset){ diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 05e6e1bb747..d2d149671fe 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -25,82 +25,141 @@ #include #include +namespace arrow { +namespace r { + +template +Status GenerateArray(int64_t size, typename Type::c_type value, + std::shared_ptr* out) { + NumericBuilder builder(std::make_shared(), default_memory_pool()); + RETURN_NOT_OK(builder.Resize(size)); + for (int64_t i = 0; i < size; i++) { + RETURN_NOT_OK(builder.Append(value)); + } + return builder.Finish(out); +} + +} // namespace r +} // namespace arrow + // [[arrow::export]] -std::shared_ptr Test_array_nonull_dbl_vector() { - // TODO: maybe there is a simpler way to build this array ? - arrow::NumericBuilder builder(arrow::float64(), - arrow::default_memory_pool()); - StopIfNotOk(builder.AppendValues({1.0, 2.0, 3.0})); - auto array = builder.Finish(); - - return array.ValueUnsafe(); +std::shared_ptr Test_array_nonull_dbl_vector(int size) { + std::shared_ptr out; + StopIfNotOk(arrow::r::GenerateArray(size, 42.0, &out)); + return out; +} + +// [[arrow::export]] +std::shared_ptr Test_array_nonull_int_vector(int size) { + std::shared_ptr out; + StopIfNotOk(arrow::r::GenerateArray(size, 42, &out)); + return out; } namespace arrow { namespace r { -struct array_nonull_dbl_vector { - // altrep object around an Array of type Double with no nulls +struct array_nonull { + // altrep object around an Array with no nulls // data1: an external pointer to a shared pointer to the Array // data2: not used - static R_altrep_class_t class_t; - - static SEXP Make(const std::shared_ptr& array) { + static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr& array) { // we don't need the whole r6 object, just an external pointer // that retain the array cpp11::external_pointer> xp(new std::shared_ptr(array)); SEXP res = R_new_altrep(class_t, xp, R_NilValue); + MARK_NOT_MUTABLE(res); + return res; } + static Rboolean Inspect(SEXP x, int pre, int deep, int pvec, + void (*inspect_subtree)(SEXP, int, int, int)) { + auto& array = Get(x); + Rprintf("std::shared_ptr (len=%d, ptr=%p)\n", + array->type()->ToString().c_str(), array->length(), array.get()); + return TRUE; + } + static std::shared_ptr& Get(SEXP vec) { return *cpp11::external_pointer>(R_altrep_data1(vec)); } static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); } - static Rboolean Inspect(SEXP x, int pre, int deep, int pvec, - void (*inspect_subtree)(SEXP, int, int, int)) { - Rprintf("std::shared_ptr (len=%d, ptr=%p)\n", Length(x), - Get(x).get()); - return TRUE; - } - static const void* Dataptr_or_null(SEXP vec) { return reinterpret_cast(Get(vec)->data()->buffers[1]->data()); } - static void* Dataptr(SEXP vec, Rboolean) { + static SEXP Duplicate(SEXP vec, Rboolean) { + auto& array = Get(vec); + auto size = array->length(); + bool dbl = array->type_id() == Type::DOUBLE; + + SEXP copy = PROTECT(Rf_allocVector(dbl ? REALSXP : INTSXP, array->length())); + + memcpy(DATAPTR(copy), Dataptr_or_null(vec), + dbl ? (size * sizeof(double)) : (size * sizeof(int))); + + UNPROTECT(1); + return copy; + } + + static void* Dataptr(SEXP vec, Rboolean writeable) { return const_cast(Dataptr_or_null(vec)); } // by definition, there are no NA static int No_NA(SEXP vec) { return 1; } - static void Init(DllInfo* dll) { - class_t = R_make_altreal_class("array_nonull_dbl_vector", "arrow", dll); - + static void Init(R_altrep_class_t class_t, DllInfo* dll) { // altrep - R_set_altrep_Length_method(class_t, Length); - R_set_altrep_Inspect_method(class_t, Inspect); + R_set_altrep_Length_method(class_t, array_nonull::Length); + R_set_altrep_Inspect_method(class_t, array_nonull::Inspect); + R_set_altrep_Duplicate_method(class_t, array_nonull::Duplicate); // altvec - R_set_altvec_Dataptr_method(class_t, Dataptr); - R_set_altvec_Dataptr_or_null_method(class_t, Dataptr_or_null); + R_set_altvec_Dataptr_method(class_t, array_nonull::Dataptr); + R_set_altvec_Dataptr_or_null_method(class_t, array_nonull::Dataptr_or_null); + } +}; - // altreal - R_set_altreal_No_NA_method(class_t, No_NA); +struct array_nonull_dbl_vector { + static R_altrep_class_t class_t; + + static void Init(DllInfo* dll) { + class_t = R_make_altreal_class("array_nonull_dbl_vector", "arrow", dll); + array_nonull::Init(class_t, dll); + R_set_altreal_No_NA_method(class_t, array_nonull::No_NA); + } +}; + +struct array_nonull_int_vector { + static R_altrep_class_t class_t; + + static void Init(DllInfo* dll) { + class_t = R_make_altinteger_class("array_nonull_int_vector", "arrow", dll); + array_nonull::Init(class_t, dll); + R_set_altinteger_No_NA_method(class_t, array_nonull::No_NA); } }; R_altrep_class_t array_nonull_dbl_vector::class_t; +R_altrep_class_t array_nonull_int_vector::class_t; -void Init_Altrep_classes(DllInfo* dll) { array_nonull_dbl_vector::Init(dll); } +void Init_Altrep_classes(DllInfo* dll) { + array_nonull_dbl_vector::Init(dll); + array_nonull_int_vector::Init(dll); +} SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { - return array_nonull_dbl_vector::Make(array); + return array_nonull::Make(array_nonull_dbl_vector::class_t, array); +} + +SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { + return array_nonull::Make(array_nonull_int_vector::class_t, array); } } // namespace r @@ -120,6 +179,10 @@ SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { return R_NilValue; } +SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { + return R_NilValue; +} + } // namespace r } // namespace arrow diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 6f5af8ac012..70a65da4842 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1283,6 +1283,9 @@ SEXP Array__as_vector(const std::shared_ptr& array) { switch (type->id()) { case arrow::Type::DOUBLE: return arrow::r::Make_array_nonull_dbl_vector(array); + case arrow::Type::INT32: + return arrow::r::Make_array_nonull_int_vector(array); + default: break; } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index d9ac56a468e..02056020705 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -6,18 +6,34 @@ // altrep.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Test_array_nonull_dbl_vector(); -extern "C" SEXP _arrow_Test_array_nonull_dbl_vector(){ +std::shared_ptr Test_array_nonull_dbl_vector(int size); +extern "C" SEXP _arrow_Test_array_nonull_dbl_vector(SEXP size_sexp){ BEGIN_CPP11 - return cpp11::as_sexp(Test_array_nonull_dbl_vector()); + arrow::r::Input::type size(size_sexp); + return cpp11::as_sexp(Test_array_nonull_dbl_vector(size)); END_CPP11 } #else -extern "C" SEXP _arrow_Test_array_nonull_dbl_vector(){ +extern "C" SEXP _arrow_Test_array_nonull_dbl_vector(SEXP size_sexp){ Rf_error("Cannot call Test_array_nonull_dbl_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif +// altrep.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Test_array_nonull_int_vector(int size); +extern "C" SEXP _arrow_Test_array_nonull_int_vector(SEXP size_sexp){ +BEGIN_CPP11 + arrow::r::Input::type size(size_sexp); + return cpp11::as_sexp(Test_array_nonull_int_vector(size)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_Test_array_nonull_int_vector(SEXP size_sexp){ + Rf_error("Cannot call Test_array_nonull_int_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // array.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Array__Slice1(const std::shared_ptr& array, R_xlen_t offset); @@ -6907,7 +6923,8 @@ static const R_CallMethodDef CallEntries[] = { { "_dataset_available", (DL_FUNC)& _dataset_available, 0 }, { "_parquet_available", (DL_FUNC)& _parquet_available, 0 }, { "_s3_available", (DL_FUNC)& _s3_available, 0 }, - { "_arrow_Test_array_nonull_dbl_vector", (DL_FUNC) &_arrow_Test_array_nonull_dbl_vector, 0}, + { "_arrow_Test_array_nonull_dbl_vector", (DL_FUNC) &_arrow_Test_array_nonull_dbl_vector, 1}, + { "_arrow_Test_array_nonull_int_vector", (DL_FUNC) &_arrow_Test_array_nonull_int_vector, 1}, { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index a77f8d628f4..81274f0bdf2 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -167,6 +167,7 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, void Init_Altrep_classes(DllInfo* dll); SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array); +SEXP Make_array_nonull_int_vector(const std::shared_ptr& array); } // namespace r } // namespace arrow From 57d6cf745e66a560ad0bef08b5c3a02a2f73c896 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Jun 2021 12:17:19 +0200 Subject: [PATCH 04/22] handle offset in Dataptr() --- r/R/arrowExports.R | 4 ++++ r/src/altrep.cpp | 45 +++++++++++++++++++++++++++++++++++---- r/src/array_to_vector.cpp | 2 ++ r/src/arrowExports.cpp | 16 ++++++++++++++ r/src/arrow_types.h | 1 + 5 files changed, 64 insertions(+), 4 deletions(-) diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 998ecd886ed..8aa25cc7be7 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -8,6 +8,10 @@ Test_array_nonull_int_vector <- function(size){ .Call(`_arrow_Test_array_nonull_int_vector`, size) } +Test_array_nonull_int64_vector <- function(size){ + .Call(`_arrow_Test_array_nonull_int64_vector`, size) +} + Array__Slice1 <- function(array, offset){ .Call(`_arrow_Array__Slice1`, array, offset) } diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index d2d149671fe..249a774dd52 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -56,6 +56,13 @@ std::shared_ptr Test_array_nonull_int_vector(int size) { return out; } +// [[arrow::export]] +std::shared_ptr Test_array_nonull_int64_vector(int size) { + std::shared_ptr out; + StopIfNotOk(arrow::r::GenerateArray(size, 42, &out)); + return out; +} + namespace arrow { namespace r { @@ -90,18 +97,22 @@ struct array_nonull { static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); } static const void* Dataptr_or_null(SEXP vec) { - return reinterpret_cast(Get(vec)->data()->buffers[1]->data()); + auto& array = Get(vec); + + int size = array->type_id() == Type::INT32 ? sizeof(int) : sizeof(double); + return reinterpret_cast(array->data()->buffers[1]->data() + size * array->offset()); } static SEXP Duplicate(SEXP vec, Rboolean) { auto& array = Get(vec); auto size = array->length(); - bool dbl = array->type_id() == Type::DOUBLE; - SEXP copy = PROTECT(Rf_allocVector(dbl ? REALSXP : INTSXP, array->length())); + bool int_array = array->type_id() == Type::INT32; + + SEXP copy = PROTECT(Rf_allocVector(int_array ? INTSXP : REALSXP, array->length())); memcpy(DATAPTR(copy), Dataptr_or_null(vec), - dbl ? (size * sizeof(double)) : (size * sizeof(int))); + REALSXP ? (size * sizeof(int)) : (size * sizeof(double))); UNPROTECT(1); return copy; @@ -136,6 +147,23 @@ struct array_nonull_dbl_vector { } }; +struct array_nonull_int64_vector { + static R_altrep_class_t class_t; + + static SEXP Make(const std::shared_ptr& array) { + SEXP res = PROTECT(array_nonull::Make(class_t, array)); + Rf_setAttrib(res, R_ClassSymbol, Rf_mkString("integer64")); + UNPROTECT(1); + return res; + } + + static void Init(DllInfo* dll) { + class_t = R_make_altreal_class("array_nonull_int64_vector", "arrow", dll); + array_nonull::Init(class_t, dll); + R_set_altreal_No_NA_method(class_t, array_nonull::No_NA); + } +}; + struct array_nonull_int_vector { static R_altrep_class_t class_t; @@ -148,6 +176,7 @@ struct array_nonull_int_vector { R_altrep_class_t array_nonull_dbl_vector::class_t; R_altrep_class_t array_nonull_int_vector::class_t; +R_altrep_class_t array_nonull_int64_vector::class_t; void Init_Altrep_classes(DllInfo* dll) { array_nonull_dbl_vector::Init(dll); @@ -162,6 +191,10 @@ SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { return array_nonull::Make(array_nonull_int_vector::class_t, array); } +SEXP Make_array_nonull_int64_vector(const std::shared_ptr& array) { + return array_nonull::Make(array_nonull_int64_vector::class_t, array); +} + } // namespace r } // namespace arrow @@ -183,6 +216,10 @@ SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { return R_NilValue; } +SEXP Make_array_nonull_int64_vector(const std::shared_ptr& array) { + return R_NilValue; +} + } // namespace r } // namespace arrow diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 70a65da4842..020cc9e9405 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1285,6 +1285,8 @@ SEXP Array__as_vector(const std::shared_ptr& array) { return arrow::r::Make_array_nonull_dbl_vector(array); case arrow::Type::INT32: return arrow::r::Make_array_nonull_int_vector(array); + // case arrow::Type::INT64: + // return arrow::r::Make_array_nonull_int64_vector(array); default: break; diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 02056020705..4d69f479558 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -34,6 +34,21 @@ extern "C" SEXP _arrow_Test_array_nonull_int_vector(SEXP size_sexp){ } #endif +// altrep.cpp +#if defined(ARROW_R_WITH_ARROW) +std::shared_ptr Test_array_nonull_int64_vector(int size); +extern "C" SEXP _arrow_Test_array_nonull_int64_vector(SEXP size_sexp){ +BEGIN_CPP11 + arrow::r::Input::type size(size_sexp); + return cpp11::as_sexp(Test_array_nonull_int64_vector(size)); +END_CPP11 +} +#else +extern "C" SEXP _arrow_Test_array_nonull_int64_vector(SEXP size_sexp){ + Rf_error("Cannot call Test_array_nonull_int64_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +} +#endif + // array.cpp #if defined(ARROW_R_WITH_ARROW) std::shared_ptr Array__Slice1(const std::shared_ptr& array, R_xlen_t offset); @@ -6925,6 +6940,7 @@ static const R_CallMethodDef CallEntries[] = { { "_s3_available", (DL_FUNC)& _s3_available, 0 }, { "_arrow_Test_array_nonull_dbl_vector", (DL_FUNC) &_arrow_Test_array_nonull_dbl_vector, 1}, { "_arrow_Test_array_nonull_int_vector", (DL_FUNC) &_arrow_Test_array_nonull_int_vector, 1}, + { "_arrow_Test_array_nonull_int64_vector", (DL_FUNC) &_arrow_Test_array_nonull_int64_vector, 1}, { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 81274f0bdf2..678130f6604 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -168,6 +168,7 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, void Init_Altrep_classes(DllInfo* dll); SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array); SEXP Make_array_nonull_int_vector(const std::shared_ptr& array); +SEXP Make_array_nonull_int64_vector(const std::shared_ptr& array); } // namespace r } // namespace arrow From 42878181d4e3a8c8ae3ffcaea356b01f33410042 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Jun 2021 12:29:08 +0200 Subject: [PATCH 05/22] fix int64 support --- r/src/altrep.cpp | 6 ++++-- r/src/array_to_vector.cpp | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 249a774dd52..da7b72e95b8 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -100,7 +100,8 @@ struct array_nonull { auto& array = Get(vec); int size = array->type_id() == Type::INT32 ? sizeof(int) : sizeof(double); - return reinterpret_cast(array->data()->buffers[1]->data() + size * array->offset()); + return reinterpret_cast(array->data()->buffers[1]->data() + + size * array->offset()); } static SEXP Duplicate(SEXP vec, Rboolean) { @@ -181,6 +182,7 @@ R_altrep_class_t array_nonull_int64_vector::class_t; void Init_Altrep_classes(DllInfo* dll) { array_nonull_dbl_vector::Init(dll); array_nonull_int_vector::Init(dll); + array_nonull_int64_vector::Init(dll); } SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { @@ -192,7 +194,7 @@ SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { } SEXP Make_array_nonull_int64_vector(const std::shared_ptr& array) { - return array_nonull::Make(array_nonull_int64_vector::class_t, array); + return array_nonull_int64_vector::Make(array); } } // namespace r diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 020cc9e9405..caa71e5788f 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1285,8 +1285,8 @@ SEXP Array__as_vector(const std::shared_ptr& array) { return arrow::r::Make_array_nonull_dbl_vector(array); case arrow::Type::INT32: return arrow::r::Make_array_nonull_int_vector(array); - // case arrow::Type::INT64: - // return arrow::r::Make_array_nonull_int64_vector(array); + case arrow::Type::INT64: + return arrow::r::Make_array_nonull_int64_vector(array); default: break; From f9c55d0184fa54685b3c9787121b261ab7206b4d Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Jun 2021 13:07:57 +0200 Subject: [PATCH 06/22] turning off int64 altrep for now --- r/src/array_to_vector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index caa71e5788f..8dd5df7fda7 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1285,8 +1285,8 @@ SEXP Array__as_vector(const std::shared_ptr& array) { return arrow::r::Make_array_nonull_dbl_vector(array); case arrow::Type::INT32: return arrow::r::Make_array_nonull_int_vector(array); - case arrow::Type::INT64: - return arrow::r::Make_array_nonull_int64_vector(array); + // case arrow::Type::INT64: + // return arrow::r::Make_array_nonull_int64_vector(array); default: break; From 3ffc90f655dbe9eb845797f2e96bc5c880fc40c4 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Jun 2021 13:30:16 +0200 Subject: [PATCH 07/22] lint --- r/src/array_to_vector.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 8dd5df7fda7..020cc9e9405 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1285,8 +1285,8 @@ SEXP Array__as_vector(const std::shared_ptr& array) { return arrow::r::Make_array_nonull_dbl_vector(array); case arrow::Type::INT32: return arrow::r::Make_array_nonull_int_vector(array); - // case arrow::Type::INT64: - // return arrow::r::Make_array_nonull_int64_vector(array); + // case arrow::Type::INT64: + // return arrow::r::Make_array_nonull_int64_vector(array); default: break; From 935ebbbe5551b480cdabd681a71cf2ba5c634d3b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 8 Jun 2021 14:22:22 +0200 Subject: [PATCH 08/22] test for altrep-ness --- r/R/arrowExports.R | 12 +++---- r/src/altrep.cpp | 59 +++++++++++----------------------- r/src/arrowExports.cpp | 44 ++++++++----------------- r/tests/testthat/test-altrep.R | 32 ++++++++++++++++++ 4 files changed, 69 insertions(+), 78 deletions(-) create mode 100644 r/tests/testthat/test-altrep.R diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R index 8aa25cc7be7..9257f5787b1 100644 --- a/r/R/arrowExports.R +++ b/r/R/arrowExports.R @@ -1,15 +1,11 @@ # Generated by using data-raw/codegen.R -> do not edit by hand -Test_array_nonull_dbl_vector <- function(size){ - .Call(`_arrow_Test_array_nonull_dbl_vector`, size) +is_altrep_int_nonull <- function(x){ + .Call(`_arrow_is_altrep_int_nonull`, x) } -Test_array_nonull_int_vector <- function(size){ - .Call(`_arrow_Test_array_nonull_int_vector`, size) -} - -Test_array_nonull_int64_vector <- function(size){ - .Call(`_arrow_Test_array_nonull_int64_vector`, size) +is_altrep_dbl_nonull <- function(x){ + .Call(`_arrow_is_altrep_dbl_nonull`, x) } Array__Slice1 <- function(array, offset){ diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index da7b72e95b8..d5e38781ca5 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -23,45 +23,6 @@ #include #include -#include - -namespace arrow { -namespace r { - -template -Status GenerateArray(int64_t size, typename Type::c_type value, - std::shared_ptr* out) { - NumericBuilder builder(std::make_shared(), default_memory_pool()); - RETURN_NOT_OK(builder.Resize(size)); - for (int64_t i = 0; i < size; i++) { - RETURN_NOT_OK(builder.Append(value)); - } - return builder.Finish(out); -} - -} // namespace r -} // namespace arrow - -// [[arrow::export]] -std::shared_ptr Test_array_nonull_dbl_vector(int size) { - std::shared_ptr out; - StopIfNotOk(arrow::r::GenerateArray(size, 42.0, &out)); - return out; -} - -// [[arrow::export]] -std::shared_ptr Test_array_nonull_int_vector(int size) { - std::shared_ptr out; - StopIfNotOk(arrow::r::GenerateArray(size, 42, &out)); - return out; -} - -// [[arrow::export]] -std::shared_ptr Test_array_nonull_int64_vector(int size) { - std::shared_ptr out; - StopIfNotOk(arrow::r::GenerateArray(size, 42, &out)); - return out; -} namespace arrow { namespace r { @@ -175,8 +136,8 @@ struct array_nonull_int_vector { } }; -R_altrep_class_t array_nonull_dbl_vector::class_t; R_altrep_class_t array_nonull_int_vector::class_t; +R_altrep_class_t array_nonull_dbl_vector::class_t; R_altrep_class_t array_nonull_int64_vector::class_t; void Init_Altrep_classes(DllInfo* dll) { @@ -226,3 +187,21 @@ SEXP Make_array_nonull_int64_vector(const std::shared_ptr& array) { } // namespace arrow #endif + +// [[arrow::export]] +bool is_altrep_int_nonull(SEXP x) { +#if defined(HAS_ALTREP) + return R_altrep_inherits(x, arrow::r::array_nonull_int_vector::class_t); +#else + return false; +#endif +} + +// [[arrow::export]] +bool is_altrep_dbl_nonull(SEXP x) { +#if defined(HAS_ALTREP) + return R_altrep_inherits(x, arrow::r::array_nonull_dbl_vector::class_t); +#else + return false; +#endif +} diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 4d69f479558..15da08c7f01 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -6,46 +6,31 @@ // altrep.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Test_array_nonull_dbl_vector(int size); -extern "C" SEXP _arrow_Test_array_nonull_dbl_vector(SEXP size_sexp){ +bool is_altrep_int_nonull(SEXP x); +extern "C" SEXP _arrow_is_altrep_int_nonull(SEXP x_sexp){ BEGIN_CPP11 - arrow::r::Input::type size(size_sexp); - return cpp11::as_sexp(Test_array_nonull_dbl_vector(size)); -END_CPP11 -} -#else -extern "C" SEXP _arrow_Test_array_nonull_dbl_vector(SEXP size_sexp){ - Rf_error("Cannot call Test_array_nonull_dbl_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); -} -#endif - -// altrep.cpp -#if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Test_array_nonull_int_vector(int size); -extern "C" SEXP _arrow_Test_array_nonull_int_vector(SEXP size_sexp){ -BEGIN_CPP11 - arrow::r::Input::type size(size_sexp); - return cpp11::as_sexp(Test_array_nonull_int_vector(size)); + arrow::r::Input::type x(x_sexp); + return cpp11::as_sexp(is_altrep_int_nonull(x)); END_CPP11 } #else -extern "C" SEXP _arrow_Test_array_nonull_int_vector(SEXP size_sexp){ - Rf_error("Cannot call Test_array_nonull_int_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_is_altrep_int_nonull(SEXP x_sexp){ + Rf_error("Cannot call is_altrep_int_nonull(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif // altrep.cpp #if defined(ARROW_R_WITH_ARROW) -std::shared_ptr Test_array_nonull_int64_vector(int size); -extern "C" SEXP _arrow_Test_array_nonull_int64_vector(SEXP size_sexp){ +bool is_altrep_dbl_nonull(SEXP x); +extern "C" SEXP _arrow_is_altrep_dbl_nonull(SEXP x_sexp){ BEGIN_CPP11 - arrow::r::Input::type size(size_sexp); - return cpp11::as_sexp(Test_array_nonull_int64_vector(size)); + arrow::r::Input::type x(x_sexp); + return cpp11::as_sexp(is_altrep_dbl_nonull(x)); END_CPP11 } #else -extern "C" SEXP _arrow_Test_array_nonull_int64_vector(SEXP size_sexp){ - Rf_error("Cannot call Test_array_nonull_int64_vector(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); +extern "C" SEXP _arrow_is_altrep_dbl_nonull(SEXP x_sexp){ + Rf_error("Cannot call is_altrep_dbl_nonull(). See https://arrow.apache.org/docs/r/articles/install.html for help installing Arrow C++ libraries. "); } #endif @@ -6938,9 +6923,8 @@ static const R_CallMethodDef CallEntries[] = { { "_dataset_available", (DL_FUNC)& _dataset_available, 0 }, { "_parquet_available", (DL_FUNC)& _parquet_available, 0 }, { "_s3_available", (DL_FUNC)& _s3_available, 0 }, - { "_arrow_Test_array_nonull_dbl_vector", (DL_FUNC) &_arrow_Test_array_nonull_dbl_vector, 1}, - { "_arrow_Test_array_nonull_int_vector", (DL_FUNC) &_arrow_Test_array_nonull_int_vector, 1}, - { "_arrow_Test_array_nonull_int64_vector", (DL_FUNC) &_arrow_Test_array_nonull_int64_vector, 1}, + { "_arrow_is_altrep_int_nonull", (DL_FUNC) &_arrow_is_altrep_int_nonull, 1}, + { "_arrow_is_altrep_dbl_nonull", (DL_FUNC) &_arrow_is_altrep_dbl_nonull, 1}, { "_arrow_Array__Slice1", (DL_FUNC) &_arrow_Array__Slice1, 2}, { "_arrow_Array__Slice2", (DL_FUNC) &_arrow_Array__Slice2, 3}, { "_arrow_Array__IsNull", (DL_FUNC) &_arrow_Array__IsNull, 2}, diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R new file mode 100644 index 00000000000..43a404ef0f6 --- /dev/null +++ b/r/tests/testthat/test-altrep.R @@ -0,0 +1,32 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +context("altrep") + +skip_if(getRversion() <= "3.5.0") + +test_that("altrep vectors from int32 arrays with no nulls", { + v <- Array$create(1:1000) + expect_true(is_altrep_int_nonull(as.vector(v))) + expect_true(is_altrep_int_nonull(as.vector(v$Slice(1)))) +}) + +test_that("altrep vectors from double arrays with no nulls", { + v <- Array$create(as.numeric(1:1000)) + expect_true(is_altrep_dbl_nonull(as.vector(v))) + expect_true(is_altrep_dbl_nonull(as.vector(v$Slice(1)))) +}) From 8721259963e335d2098768dfdc7dc5e2690c2230 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 9 Jun 2021 13:03:19 +0200 Subject: [PATCH 09/22] remove int64 specific altrep code and condition altrepification on the `arrow.use_altrep` option (with default to TRUE: opt out) --- r/src/altrep.cpp | 29 +---------------------------- r/src/array_to_vector.cpp | 5 +---- r/src/arrow_types.h | 1 - r/tests/testthat/test-altrep.R | 20 ++++++++++++++++++++ 4 files changed, 22 insertions(+), 33 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index d5e38781ca5..2a1a2138f25 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -74,7 +74,7 @@ struct array_nonull { SEXP copy = PROTECT(Rf_allocVector(int_array ? INTSXP : REALSXP, array->length())); memcpy(DATAPTR(copy), Dataptr_or_null(vec), - REALSXP ? (size * sizeof(int)) : (size * sizeof(double))); + int_array ? (size * sizeof(int)) : (size * sizeof(double))); UNPROTECT(1); return copy; @@ -109,23 +109,6 @@ struct array_nonull_dbl_vector { } }; -struct array_nonull_int64_vector { - static R_altrep_class_t class_t; - - static SEXP Make(const std::shared_ptr& array) { - SEXP res = PROTECT(array_nonull::Make(class_t, array)); - Rf_setAttrib(res, R_ClassSymbol, Rf_mkString("integer64")); - UNPROTECT(1); - return res; - } - - static void Init(DllInfo* dll) { - class_t = R_make_altreal_class("array_nonull_int64_vector", "arrow", dll); - array_nonull::Init(class_t, dll); - R_set_altreal_No_NA_method(class_t, array_nonull::No_NA); - } -}; - struct array_nonull_int_vector { static R_altrep_class_t class_t; @@ -138,12 +121,10 @@ struct array_nonull_int_vector { R_altrep_class_t array_nonull_int_vector::class_t; R_altrep_class_t array_nonull_dbl_vector::class_t; -R_altrep_class_t array_nonull_int64_vector::class_t; void Init_Altrep_classes(DllInfo* dll) { array_nonull_dbl_vector::Init(dll); array_nonull_int_vector::Init(dll); - array_nonull_int64_vector::Init(dll); } SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { @@ -154,10 +135,6 @@ SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { return array_nonull::Make(array_nonull_int_vector::class_t, array); } -SEXP Make_array_nonull_int64_vector(const std::shared_ptr& array) { - return array_nonull_int64_vector::Make(array); -} - } // namespace r } // namespace arrow @@ -179,10 +156,6 @@ SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { return R_NilValue; } -SEXP Make_array_nonull_int64_vector(const std::shared_ptr& array) { - return R_NilValue; -} - } // namespace r } // namespace arrow diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 020cc9e9405..9af1eeff503 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1279,15 +1279,12 @@ SEXP Array__as_vector(const std::shared_ptr& array) { auto type = array->type(); #if defined(HAS_ALTREP) - if (array->null_count() == 0) { + if (array->null_count() == 0 && arrow::r::GetBoolOption("arrow.use_altrep", true)) { switch (type->id()) { case arrow::Type::DOUBLE: return arrow::r::Make_array_nonull_dbl_vector(array); case arrow::Type::INT32: return arrow::r::Make_array_nonull_int_vector(array); - // case arrow::Type::INT64: - // return arrow::r::Make_array_nonull_int64_vector(array); - default: break; } diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 678130f6604..81274f0bdf2 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -168,7 +168,6 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, void Init_Altrep_classes(DllInfo* dll); SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array); SEXP Make_array_nonull_int_vector(const std::shared_ptr& array); -SEXP Make_array_nonull_int64_vector(const std::shared_ptr& array); } // namespace r } // namespace arrow diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index 43a404ef0f6..01642f7a84f 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -20,13 +20,33 @@ context("altrep") skip_if(getRversion() <= "3.5.0") test_that("altrep vectors from int32 arrays with no nulls", { + withr::local_options(list(arrow.use_altrep = TRUE)) v <- Array$create(1:1000) expect_true(is_altrep_int_nonull(as.vector(v))) expect_true(is_altrep_int_nonull(as.vector(v$Slice(1)))) + + withr::local_options(list(arrow.use_altrep = NULL)) + v <- Array$create(1:1000) + expect_true(is_altrep_int_nonull(as.vector(v))) + expect_true(is_altrep_int_nonull(as.vector(v$Slice(1)))) + + withr::local_options(list(arrow.use_altrep = FALSE)) + expect_false(is_altrep_int_nonull(as.vector(v))) + expect_false(is_altrep_int_nonull(as.vector(v$Slice(1)))) }) test_that("altrep vectors from double arrays with no nulls", { + withr::local_options(list(arrow.use_altrep = TRUE)) v <- Array$create(as.numeric(1:1000)) expect_true(is_altrep_dbl_nonull(as.vector(v))) expect_true(is_altrep_dbl_nonull(as.vector(v$Slice(1)))) + + withr::local_options(list(arrow.use_altrep = NULL)) + expect_true(is_altrep_dbl_nonull(as.vector(v))) + expect_true(is_altrep_dbl_nonull(as.vector(v$Slice(1)))) + + withr::local_options(list(arrow.use_altrep = FALSE)) + expect_false(is_altrep_dbl_nonull(as.vector(v))) + expect_false(is_altrep_dbl_nonull(as.vector(v$Slice(1)))) }) + From 0485ce1b8dbb94093168b478ec18b272a534f176 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 17 Jun 2021 14:50:47 +0200 Subject: [PATCH 10/22] - Make array_nonnull a template - const std::shared_ptr& Get() --- r/src/altrep.cpp | 41 ++++++++++++++++++++++++----------------- 1 file changed, 24 insertions(+), 17 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 2a1a2138f25..82c10df39d4 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -27,7 +27,10 @@ namespace arrow { namespace r { +template struct array_nonull { + using data_type = typename std::conditional::type; + // altrep object around an Array with no nulls // data1: an external pointer to a shared pointer to the Array // data2: not used @@ -45,36 +48,32 @@ struct array_nonull { static Rboolean Inspect(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)) { - auto& array = Get(x); + const auto& array = Get(x); Rprintf("std::shared_ptr (len=%d, ptr=%p)\n", array->type()->ToString().c_str(), array->length(), array.get()); return TRUE; } - static std::shared_ptr& Get(SEXP vec) { + static const std::shared_ptr& Get(SEXP vec) { return *cpp11::external_pointer>(R_altrep_data1(vec)); } static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); } static const void* Dataptr_or_null(SEXP vec) { - auto& array = Get(vec); + const auto& array = Get(vec); - int size = array->type_id() == Type::INT32 ? sizeof(int) : sizeof(double); return reinterpret_cast(array->data()->buffers[1]->data() + - size * array->offset()); + sizeof(data_type) * array->offset()); } static SEXP Duplicate(SEXP vec, Rboolean) { - auto& array = Get(vec); + const auto& array = Get(vec); auto size = array->length(); - bool int_array = array->type_id() == Type::INT32; - - SEXP copy = PROTECT(Rf_allocVector(int_array ? INTSXP : REALSXP, array->length())); + SEXP copy = PROTECT(Rf_allocVector(sexp_type, array->length())); - memcpy(DATAPTR(copy), Dataptr_or_null(vec), - int_array ? (size * sizeof(int)) : (size * sizeof(double))); + memcpy(DATAPTR(copy), Dataptr_or_null(vec), size * sizeof(data_type)); UNPROTECT(1); return copy; @@ -104,8 +103,12 @@ struct array_nonull_dbl_vector { static void Init(DllInfo* dll) { class_t = R_make_altreal_class("array_nonull_dbl_vector", "arrow", dll); - array_nonull::Init(class_t, dll); - R_set_altreal_No_NA_method(class_t, array_nonull::No_NA); + array_nonull::Init(class_t, dll); + R_set_altreal_No_NA_method(class_t, array_nonull::No_NA); + } + + static SEXP Make(const std::shared_ptr& array) { + return array_nonull::Make(class_t, array); } }; @@ -114,8 +117,12 @@ struct array_nonull_int_vector { static void Init(DllInfo* dll) { class_t = R_make_altinteger_class("array_nonull_int_vector", "arrow", dll); - array_nonull::Init(class_t, dll); - R_set_altinteger_No_NA_method(class_t, array_nonull::No_NA); + array_nonull::Init(class_t, dll); + R_set_altinteger_No_NA_method(class_t, array_nonull::No_NA); + } + + static SEXP Make(const std::shared_ptr& array) { + return array_nonull::Make(class_t, array); } }; @@ -128,11 +135,11 @@ void Init_Altrep_classes(DllInfo* dll) { } SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { - return array_nonull::Make(array_nonull_dbl_vector::class_t, array); + return array_nonull_dbl_vector::Make(array); } SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { - return array_nonull::Make(array_nonull_int_vector::class_t, array); + return array_nonull_int_vector::Make(array); } } // namespace r From 5af3bdb41f5caa01603e046afde49f4e416d8f3a Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 17 Jun 2021 15:13:47 +0200 Subject: [PATCH 11/22] use GetValues<>(1) --- r/src/altrep.cpp | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 82c10df39d4..d0e61be91f8 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -61,10 +61,7 @@ struct array_nonull { static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); } static const void* Dataptr_or_null(SEXP vec) { - const auto& array = Get(vec); - - return reinterpret_cast(array->data()->buffers[1]->data() + - sizeof(data_type) * array->offset()); + return Get(vec)->data()->GetValues(1); } static SEXP Duplicate(SEXP vec, Rboolean) { From cb714673374f53af35d181724037cec7b8eeb6a5 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 17 Jun 2021 15:36:05 +0200 Subject: [PATCH 12/22] more tests --- r/tests/testthat/test-altrep.R | 54 ++++++++++++++++++++++------------ 1 file changed, 35 insertions(+), 19 deletions(-) diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index 01642f7a84f..c19a225d9f5 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -19,34 +19,50 @@ context("altrep") skip_if(getRversion() <= "3.5.0") -test_that("altrep vectors from int32 arrays with no nulls", { +test_that("altrep vectors from int32 and dbl arrays with no nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) - v <- Array$create(1:1000) - expect_true(is_altrep_int_nonull(as.vector(v))) - expect_true(is_altrep_int_nonull(as.vector(v$Slice(1)))) + v_int <- Array$create(1:1000) + v_dbl <- Array$create(as.numeric(1:1000)) + + expect_true(is_altrep_int_nonull(as.vector(v_int))) + expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(1)))) + expect_true(is_altrep_dbl_nonull(as.vector(v_dbl))) + expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1)))) withr::local_options(list(arrow.use_altrep = NULL)) - v <- Array$create(1:1000) - expect_true(is_altrep_int_nonull(as.vector(v))) - expect_true(is_altrep_int_nonull(as.vector(v$Slice(1)))) + expect_true(is_altrep_int_nonull(as.vector(v_int))) + expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(1)))) + expect_true(is_altrep_dbl_nonull(as.vector(v_dbl))) + expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1)))) withr::local_options(list(arrow.use_altrep = FALSE)) - expect_false(is_altrep_int_nonull(as.vector(v))) - expect_false(is_altrep_int_nonull(as.vector(v$Slice(1)))) + expect_false(is_altrep_int_nonull(as.vector(v_int))) + expect_false(is_altrep_int_nonull(as.vector(v_int$Slice(1)))) + expect_false(is_altrep_dbl_nonull(as.vector(v_dbl))) + expect_false(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1)))) }) -test_that("altrep vectors from double arrays with no nulls", { +test_that("altrep vectors from int32 and dbl arrays with nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) - v <- Array$create(as.numeric(1:1000)) - expect_true(is_altrep_dbl_nonull(as.vector(v))) - expect_true(is_altrep_dbl_nonull(as.vector(v$Slice(1)))) + v_int <- Array$create(c(1L, NA, 3L)) + v_dbl <- Array$create(c(1, NA, 3)) - withr::local_options(list(arrow.use_altrep = NULL)) - expect_true(is_altrep_dbl_nonull(as.vector(v))) - expect_true(is_altrep_dbl_nonull(as.vector(v$Slice(1)))) + # cannot be altrep because one NA + expect_false(is_altrep_int_nonull(as.vector(v_int))) + expect_false(is_altrep_int_nonull(as.vector(v_int$Slice(1)))) + expect_false(is_altrep_dbl_nonull(as.vector(v_dbl))) + expect_false(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1)))) - withr::local_options(list(arrow.use_altrep = FALSE)) - expect_false(is_altrep_dbl_nonull(as.vector(v))) - expect_false(is_altrep_dbl_nonull(as.vector(v$Slice(1)))) + # but then, no NA beyond, so can be altrep again + expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(2)))) + expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(2)))) }) +test_that("empty vectors are not altrep", { + withr::local_options(list(arrow.use_altrep = TRUE)) + v_int <- Array$create(integer()) + v_dbl <- Array$create(numeric()) + + expect_false(is_altrep_int_nonull(as.vector(v_int))) + expect_false(is_altrep_dbl_nonull(as.vector(v_dbl))) +}) From ac371d234ce9e6a65337b1b78d893c387cb85fb2 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 17 Jun 2021 15:36:26 +0200 Subject: [PATCH 13/22] swap again option and null_count, only altrep non empty --- r/src/array_to_vector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 9af1eeff503..c9f10927000 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -1279,7 +1279,8 @@ SEXP Array__as_vector(const std::shared_ptr& array) { auto type = array->type(); #if defined(HAS_ALTREP) - if (array->null_count() == 0 && arrow::r::GetBoolOption("arrow.use_altrep", true)) { + if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0 && + array->null_count() == 0) { switch (type->id()) { case arrow::Type::DOUBLE: return arrow::r::Make_array_nonull_dbl_vector(array); From 26d4721d36b26a2389a842edea9c8fade97f7aa9 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 18 Jun 2021 11:20:58 +0200 Subject: [PATCH 14/22] chunked arrays with only one array get altrep'ed --- r/src/array_to_vector.cpp | 40 +++++++++++++++++++--------------- r/tests/testthat/test-altrep.R | 28 ++++++++++++++++++++++++ 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index c9f10927000..67d831f1d93 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -144,6 +144,24 @@ Status IngestSome(const std::shared_ptr& array, R_xlen_t n, // Allocate + Ingest SEXP ArrayVector__as_vector(R_xlen_t n, const std::shared_ptr& type, const ArrayVector& arrays) { + // special case when there is only one array +#if defined(HAS_ALTREP) + if (arrays.size() == 1) { + const auto& array = arrays[0]; + if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0 && + array->null_count() == 0) { + switch (type->id()) { + case arrow::Type::DOUBLE: + return arrow::r::Make_array_nonull_dbl_vector(array); + case arrow::Type::INT32: + return arrow::r::Make_array_nonull_int_vector(array); + default: + break; + } + } + } +#endif + auto converter = Converter::Make(type, arrays); SEXP data = PROTECT(converter->Allocate(n)); StopIfNotOk(converter->IngestSerial(data)); @@ -1276,27 +1294,15 @@ cpp11::writable::list to_dataframe_parallel( // [[arrow::export]] SEXP Array__as_vector(const std::shared_ptr& array) { - auto type = array->type(); - -#if defined(HAS_ALTREP) - if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0 && - array->null_count() == 0) { - switch (type->id()) { - case arrow::Type::DOUBLE: - return arrow::r::Make_array_nonull_dbl_vector(array); - case arrow::Type::INT32: - return arrow::r::Make_array_nonull_int_vector(array); - default: - break; - } - } -#endif - - return arrow::r::ArrayVector__as_vector(array->length(), type, {array}); + return arrow::r::ArrayVector__as_vector(array->length(), array->type(), {array}); } // [[arrow::export]] SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array) { + if (chunked_array->num_chunks() == 1) { + return Array__as_vector(chunked_array->chunk(0)); + } + return arrow::r::ArrayVector__as_vector(chunked_array->length(), chunked_array->type(), chunked_array->chunks()); } diff --git a/r/tests/testthat/test-altrep.R b/r/tests/testthat/test-altrep.R index c19a225d9f5..ec1c671b12e 100644 --- a/r/tests/testthat/test-altrep.R +++ b/r/tests/testthat/test-altrep.R @@ -23,12 +23,22 @@ test_that("altrep vectors from int32 and dbl arrays with no nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) v_int <- Array$create(1:1000) v_dbl <- Array$create(as.numeric(1:1000)) + c_int <- ChunkedArray$create(1:1000) + c_dbl <- ChunkedArray$create(as.numeric(1:1000)) expect_true(is_altrep_int_nonull(as.vector(v_int))) expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(1)))) expect_true(is_altrep_dbl_nonull(as.vector(v_dbl))) expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1)))) + expect_equal(c_int$num_chunks, 1L) + expect_true(is_altrep_int_nonull(as.vector(c_int))) + expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(1)))) + + expect_equal(c_dbl$num_chunks, 1L) + expect_true(is_altrep_dbl_nonull(as.vector(c_dbl))) + expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(1)))) + withr::local_options(list(arrow.use_altrep = NULL)) expect_true(is_altrep_int_nonull(as.vector(v_int))) expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(1)))) @@ -46,16 +56,34 @@ test_that("altrep vectors from int32 and dbl arrays with nulls", { withr::local_options(list(arrow.use_altrep = TRUE)) v_int <- Array$create(c(1L, NA, 3L)) v_dbl <- Array$create(c(1, NA, 3)) + c_int <- ChunkedArray$create(c(1L, NA, 3L)) + c_dbl <- ChunkedArray$create(c(1, NA, 3)) # cannot be altrep because one NA expect_false(is_altrep_int_nonull(as.vector(v_int))) expect_false(is_altrep_int_nonull(as.vector(v_int$Slice(1)))) expect_false(is_altrep_dbl_nonull(as.vector(v_dbl))) expect_false(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(1)))) + expect_false(is_altrep_int_nonull(as.vector(c_int))) + expect_false(is_altrep_int_nonull(as.vector(c_int$Slice(1)))) + expect_false(is_altrep_dbl_nonull(as.vector(c_dbl))) + expect_false(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(1)))) # but then, no NA beyond, so can be altrep again expect_true(is_altrep_int_nonull(as.vector(v_int$Slice(2)))) expect_true(is_altrep_dbl_nonull(as.vector(v_dbl$Slice(2)))) + expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(2)))) + expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(2)))) + + # chunked array with 2 chunks cannot be altrep + c_int <- ChunkedArray$create(0L, c(1L, NA, 3L)) + c_dbl <- ChunkedArray$create(0, c(1, NA, 3)) + expect_equal(c_int$num_chunks, 2L) + expect_equal(c_dbl$num_chunks, 2L) + expect_false(is_altrep_int_nonull(as.vector(c_int))) + expect_false(is_altrep_dbl_nonull(as.vector(c_dbl))) + expect_true(is_altrep_int_nonull(as.vector(c_int$Slice(3)))) + expect_true(is_altrep_dbl_nonull(as.vector(c_dbl$Slice(3)))) }) test_that("empty vectors are not altrep", { From 1dc89eb0d4c9fda181aa1b581ad0c1b44cf5ec73 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 18 Jun 2021 15:00:59 +0200 Subject: [PATCH 15/22] + template keyword --- r/src/altrep.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index d0e61be91f8..5cb9f3a6e61 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -61,7 +61,7 @@ struct array_nonull { static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); } static const void* Dataptr_or_null(SEXP vec) { - return Get(vec)->data()->GetValues(1); + return Get(vec)->data()->template GetValues(1); } static SEXP Duplicate(SEXP vec, Rboolean) { From cee2e60b1b8237e013083967ac96d22193456d9b Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Fri, 18 Jun 2021 15:37:47 +0200 Subject: [PATCH 16/22] simplify and CamelCase --- r/data-raw/codegen.R | 4 +++ r/src/altrep.cpp | 69 ++++++++++++++------------------------- r/src/array_to_vector.cpp | 4 +-- r/src/arrowExports.cpp | 4 +++ r/src/arrow_types.h | 6 ++-- 5 files changed, 38 insertions(+), 49 deletions(-) diff --git a/r/data-raw/codegen.R b/r/data-raw/codegen.R index 4455bd503db..1a49ffc80fa 100644 --- a/r/data-raw/codegen.R +++ b/r/data-raw/codegen.R @@ -214,7 +214,11 @@ glue::glue('\n 'extern "C" void R_init_arrow(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); + + #if defined(HAS_ALTREP) arrow::r::Init_Altrep_classes(dll); + #endif + } \n') diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 5cb9f3a6e61..03765d4520a 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -28,7 +28,7 @@ namespace arrow { namespace r { template -struct array_nonull { +struct ArrayNoNull { using data_type = typename std::conditional::type; // altrep object around an Array with no nulls @@ -85,79 +85,58 @@ struct array_nonull { static void Init(R_altrep_class_t class_t, DllInfo* dll) { // altrep - R_set_altrep_Length_method(class_t, array_nonull::Length); - R_set_altrep_Inspect_method(class_t, array_nonull::Inspect); - R_set_altrep_Duplicate_method(class_t, array_nonull::Duplicate); + R_set_altrep_Length_method(class_t, ArrayNoNull::Length); + R_set_altrep_Inspect_method(class_t, ArrayNoNull::Inspect); + R_set_altrep_Duplicate_method(class_t, ArrayNoNull::Duplicate); // altvec - R_set_altvec_Dataptr_method(class_t, array_nonull::Dataptr); - R_set_altvec_Dataptr_or_null_method(class_t, array_nonull::Dataptr_or_null); + R_set_altvec_Dataptr_method(class_t, ArrayNoNull::Dataptr); + R_set_altvec_Dataptr_or_null_method(class_t, ArrayNoNull::Dataptr_or_null); } }; -struct array_nonull_dbl_vector { +struct DoubleArrayNoNull { static R_altrep_class_t class_t; static void Init(DllInfo* dll) { class_t = R_make_altreal_class("array_nonull_dbl_vector", "arrow", dll); - array_nonull::Init(class_t, dll); - R_set_altreal_No_NA_method(class_t, array_nonull::No_NA); + ArrayNoNull::Init(class_t, dll); + R_set_altreal_No_NA_method(class_t, ArrayNoNull::No_NA); } static SEXP Make(const std::shared_ptr& array) { - return array_nonull::Make(class_t, array); + return ArrayNoNull::Make(class_t, array); } }; -struct array_nonull_int_vector { +struct Int32ArrayNoNull { static R_altrep_class_t class_t; static void Init(DllInfo* dll) { class_t = R_make_altinteger_class("array_nonull_int_vector", "arrow", dll); - array_nonull::Init(class_t, dll); - R_set_altinteger_No_NA_method(class_t, array_nonull::No_NA); + ArrayNoNull::Init(class_t, dll); + R_set_altinteger_No_NA_method(class_t, ArrayNoNull::No_NA); } static SEXP Make(const std::shared_ptr& array) { - return array_nonull::Make(class_t, array); + return ArrayNoNull::Make(class_t, array); } }; -R_altrep_class_t array_nonull_int_vector::class_t; -R_altrep_class_t array_nonull_dbl_vector::class_t; +R_altrep_class_t Int32ArrayNoNull::class_t; +R_altrep_class_t DoubleArrayNoNull::class_t; void Init_Altrep_classes(DllInfo* dll) { - array_nonull_dbl_vector::Init(dll); - array_nonull_int_vector::Init(dll); + DoubleArrayNoNull::Init(dll); + Int32ArrayNoNull::Init(dll); } -SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { - return array_nonull_dbl_vector::Make(array); +SEXP MakeDoubleArrayNoNull(const std::shared_ptr& array) { + return DoubleArrayNoNull::Make(array); } -SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { - return array_nonull_int_vector::Make(array); -} - -} // namespace r -} // namespace arrow - -// TODO: when arrow depends on R 3.5 we can eliminate this -#else - -namespace arrow { -namespace r { - -void Init_Altrep_classes(DllInfo* dll) { - // nothing -} - -SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array) { - return R_NilValue; -} - -SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { - return R_NilValue; +SEXP MakeInt32ArrayNoNull(const std::shared_ptr& array) { + return Int32ArrayNoNull::Make(array); } } // namespace r @@ -168,7 +147,7 @@ SEXP Make_array_nonull_int_vector(const std::shared_ptr& array) { // [[arrow::export]] bool is_altrep_int_nonull(SEXP x) { #if defined(HAS_ALTREP) - return R_altrep_inherits(x, arrow::r::array_nonull_int_vector::class_t); + return R_altrep_inherits(x, arrow::r::Int32ArrayNoNull::class_t); #else return false; #endif @@ -177,7 +156,7 @@ bool is_altrep_int_nonull(SEXP x) { // [[arrow::export]] bool is_altrep_dbl_nonull(SEXP x) { #if defined(HAS_ALTREP) - return R_altrep_inherits(x, arrow::r::array_nonull_dbl_vector::class_t); + return R_altrep_inherits(x, arrow::r::DoubleArrayNoNull::class_t); #else return false; #endif diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 67d831f1d93..3d6c403b29d 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -152,9 +152,9 @@ SEXP ArrayVector__as_vector(R_xlen_t n, const std::shared_ptr& type, array->null_count() == 0) { switch (type->id()) { case arrow::Type::DOUBLE: - return arrow::r::Make_array_nonull_dbl_vector(array); + return arrow::r::MakeDoubleArrayNoNull(array); case arrow::Type::INT32: - return arrow::r::Make_array_nonull_int_vector(array); + return arrow::r::MakeInt32ArrayNoNull(array); default: break; } diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp index 15da08c7f01..427844a3c8e 100644 --- a/r/src/arrowExports.cpp +++ b/r/src/arrowExports.cpp @@ -7366,7 +7366,11 @@ static const R_CallMethodDef CallEntries[] = { extern "C" void R_init_arrow(DllInfo* dll){ R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); + + #if defined(HAS_ALTREP) arrow::r::Init_Altrep_classes(dll); + #endif + } diff --git a/r/src/arrow_types.h b/r/src/arrow_types.h index 81274f0bdf2..68e1c8659c4 100644 --- a/r/src/arrow_types.h +++ b/r/src/arrow_types.h @@ -165,9 +165,11 @@ arrow::Status InferSchemaFromDots(SEXP lst, SEXP schema_sxp, int num_fields, arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, std::shared_ptr& schema); +#if defined(HAS_ALTREP) void Init_Altrep_classes(DllInfo* dll); -SEXP Make_array_nonull_dbl_vector(const std::shared_ptr& array); -SEXP Make_array_nonull_int_vector(const std::shared_ptr& array); +SEXP MakeInt32ArrayNoNull(const std::shared_ptr& array); +SEXP MakeDoubleArrayNoNull(const std::shared_ptr& array); +#endif } // namespace r } // namespace arrow From 9456dfaa28eb84f5a675e72721d12e73160d3b69 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Romain=20Fran=C3=A7ois?= Date: Mon, 21 Jun 2021 09:16:08 +0200 Subject: [PATCH 17/22] Update r/src/array_to_vector.cpp Co-authored-by: Benjamin Kietzman --- r/src/array_to_vector.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 3d6c403b29d..a8f7191bf18 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -144,8 +144,8 @@ Status IngestSome(const std::shared_ptr& array, R_xlen_t n, // Allocate + Ingest SEXP ArrayVector__as_vector(R_xlen_t n, const std::shared_ptr& type, const ArrayVector& arrays) { - // special case when there is only one array #if defined(HAS_ALTREP) + // special case when there is only one array if (arrays.size() == 1) { const auto& array = arrays[0]; if (arrow::r::GetBoolOption("arrow.use_altrep", true) && array->length() > 0 && From 598f752bf2a7aa597dd366ea2bf924623ff0aaa5 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 21 Jun 2021 09:39:58 +0200 Subject: [PATCH 18/22] show the external pointer in inspect --- r/src/altrep.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 03765d4520a..67acef7df05 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -49,8 +49,9 @@ struct ArrayNoNull { static Rboolean Inspect(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)) { const auto& array = Get(x); - Rprintf("std::shared_ptr (len=%d, ptr=%p)\n", - array->type()->ToString().c_str(), array->length(), array.get()); + Rprintf("arrow::Array<%s, NONULL> len=%d, Array=<%p>, xp=<%p>)\n", + array->type()->ToString().c_str(), array->length(), array.get(), + R_altrep_data1(x)); return TRUE; } From e756d7c223a6c7daf369b97bd0af85b83a85a4fe Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 21 Jun 2021 09:57:38 +0200 Subject: [PATCH 19/22] inspect the subtree --- r/src/altrep.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 67acef7df05..d8dd6dd32c7 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -49,9 +49,9 @@ struct ArrayNoNull { static Rboolean Inspect(SEXP x, int pre, int deep, int pvec, void (*inspect_subtree)(SEXP, int, int, int)) { const auto& array = Get(x); - Rprintf("arrow::Array<%s, NONULL> len=%d, Array=<%p>, xp=<%p>)\n", - array->type()->ToString().c_str(), array->length(), array.get(), - R_altrep_data1(x)); + Rprintf("arrow::Array<%s, NONULL> len=%d, Array=<%p>\n", + array->type()->ToString().c_str(), array->length(), array.get()); + inspect_subtree(R_altrep_data1(x), pre, deep + 1, pvec); return TRUE; } From adb0d4a317247f89baad76c18e4eb1a7bba24390 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Mon, 21 Jun 2021 19:06:56 +0200 Subject: [PATCH 20/22] being explicit about the Deleter --- r/src/altrep.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index d8dd6dd32c7..2759217c8ff 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -30,6 +30,8 @@ namespace r { template struct ArrayNoNull { using data_type = typename std::conditional::type; + using Pointer = cpp11::external_pointer, + cpp11::default_deleter>>; // altrep object around an Array with no nulls // data1: an external pointer to a shared pointer to the Array @@ -38,7 +40,7 @@ struct ArrayNoNull { static SEXP Make(R_altrep_class_t class_t, const std::shared_ptr& array) { // we don't need the whole r6 object, just an external pointer // that retain the array - cpp11::external_pointer> xp(new std::shared_ptr(array)); + Pointer xp(new std::shared_ptr(array)); SEXP res = R_new_altrep(class_t, xp, R_NilValue); MARK_NOT_MUTABLE(res); @@ -56,7 +58,7 @@ struct ArrayNoNull { } static const std::shared_ptr& Get(SEXP vec) { - return *cpp11::external_pointer>(R_altrep_data1(vec)); + return *Pointer(R_altrep_data1(vec)); } static R_xlen_t Length(SEXP vec) { return Get(vec)->length(); } From bfb5de11b464bc33948ee0ac3bbc8299ed7c76be Mon Sep 17 00:00:00 2001 From: Benjamin Kietzman Date: Mon, 21 Jun 2021 14:32:16 -0400 Subject: [PATCH 21/22] try using non-default_deleter --- r/src/altrep.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 2759217c8ff..6bc13f2f6ea 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -30,8 +30,10 @@ namespace r { template struct ArrayNoNull { using data_type = typename std::conditional::type; - using Pointer = cpp11::external_pointer, - cpp11::default_deleter>>; + static void DeleteArray(std::shared_ptr* ptr) { + delete ptr; + } + using Pointer = cpp11::external_pointer, DeleteArray>; // altrep object around an Array with no nulls // data1: an external pointer to a shared pointer to the Array From a5f025342c2259cc593fceb696df7b388e812298 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Tue, 22 Jun 2021 11:51:15 +0200 Subject: [PATCH 22/22] lint --- r/src/altrep.cpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/r/src/altrep.cpp b/r/src/altrep.cpp index 6bc13f2f6ea..33e30aa3ffb 100644 --- a/r/src/altrep.cpp +++ b/r/src/altrep.cpp @@ -30,9 +30,7 @@ namespace r { template struct ArrayNoNull { using data_type = typename std::conditional::type; - static void DeleteArray(std::shared_ptr* ptr) { - delete ptr; - } + static void DeleteArray(std::shared_ptr* ptr) { delete ptr; } using Pointer = cpp11::external_pointer, DeleteArray>; // altrep object around an Array with no nulls