From f2412f6d7ce01964d81a6cf54a91b53b5c5e7bda Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Mon, 22 Oct 2018 22:29:33 -0700 Subject: [PATCH 1/5] add num_fields and field methods to schema r6 class --- r/R/Schema.R | 4 +++- r/src/datatype.cpp | 15 +++++++++++++++ 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/r/R/Schema.R b/r/R/Schema.R index 064a7f0bd99..f2262bfc593 100644 --- a/r/R/Schema.R +++ b/r/R/Schema.R @@ -20,7 +20,9 @@ `arrow::Schema` <- R6Class("arrow::Schema", inherit = `arrow::Object`, public = list( - ToString = function() Schema__ToString(self) + ToString = function() Schema__ToString(self), + num_fields = function() Schema__num_fields(self), + field = function(i) construct(`arrow::Field`, Schema__field(self, i)) ) ) diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index 90378f78757..f073b79ecf4 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -173,6 +173,21 @@ std::string Schema__ToString(const std::shared_ptr& s) { return s->ToString(); } +// [[Rcpp::export]] +int Schema__num_fields(const std::shared_ptr& s) { + return s->num_fields(); +} + +// [[Rcpp::export]] +std::shared_ptr Schema__field(const std::shared_ptr& s, + int i) { + if (i >= s->num_fields()) { + Rcpp::stop("Invalid field index for schema."); + } + + return s->field(i); +} + // [[Rcpp::export]] std::string ListType__ToString(const std::shared_ptr& type) { return type->ToString(); From 437fed57310ad6a43d340b2f3390e89c860a3864 Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Mon, 22 Oct 2018 22:30:05 -0700 Subject: [PATCH 2/5] support collecting decimal types --- r/src/array.cpp | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/r/src/array.cpp b/r/src/array.cpp index e885b6646dc..2395ab5da85 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -698,6 +698,38 @@ SEXP Int64Array(const std::shared_ptr& array) { return vec; } +SEXP DecimalArray(const std::shared_ptr& array) { + auto n = array->length(); + NumericVector vec(n); + + if (n == 0) return vec; + + auto null_count = array->null_count(); + if (null_count == n) { + std::fill(vec.begin(), vec.end(), NA_REAL); + return vec; + } + + auto p_vec = reinterpret_cast(vec.begin()); + const auto& decimals_arr = internal::checked_cast(*array); + + if (array->null_count()) { + internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), + n); + + for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { + p_vec[i] = bitmap_reader.IsNotSet() ? NA_REAL : std::stod(decimals_arr.FormatValue(i).c_str()); + } + } + else { + for (size_t i = 0; i < n; i++) { + p_vec[i] = std::stod(decimals_arr.FormatValue(i).c_str()); + } + } + + return vec; +} + } // namespace r } // namespace arrow @@ -748,6 +780,8 @@ SEXP Array__as_vector(const std::shared_ptr& array) { // lossy promotions to numeric vector case Type::INT64: return arrow::r::Int64Array(array); + case Type::DECIMAL: + return arrow::r::DecimalArray(array); default: break; From 5487cb3b7efa3048f9e2e25888fa290af9bb0858 Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Mon, 22 Oct 2018 22:35:04 -0700 Subject: [PATCH 3/5] rebuild rcpp bindings --- r/R/RcppExports.R | 8 ++++++++ r/src/RcppExports.cpp | 25 +++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/r/R/RcppExports.R b/r/R/RcppExports.R index 7a9f8725e88..2d90f339584 100644 --- a/r/R/RcppExports.R +++ b/r/R/RcppExports.R @@ -301,6 +301,14 @@ Schema__ToString <- function(s) { .Call(`_arrow_Schema__ToString`, s) } +Schema__num_fields <- function(s) { + .Call(`_arrow_Schema__num_fields`, s) +} + +Schema__field <- function(s, i) { + .Call(`_arrow_Schema__field`, s, i) +} + ListType__ToString <- function(type) { .Call(`_arrow_ListType__ToString`, type) } diff --git a/r/src/RcppExports.cpp b/r/src/RcppExports.cpp index 949ce71d71a..3e61cee3ad2 100644 --- a/r/src/RcppExports.cpp +++ b/r/src/RcppExports.cpp @@ -832,6 +832,29 @@ BEGIN_RCPP return rcpp_result_gen; END_RCPP } +// Schema__num_fields +int Schema__num_fields(const std::shared_ptr& s); +RcppExport SEXP _arrow_Schema__num_fields(SEXP sSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type s(sSEXP); + rcpp_result_gen = Rcpp::wrap(Schema__num_fields(s)); + return rcpp_result_gen; +END_RCPP +} +// Schema__field +std::shared_ptr Schema__field(const std::shared_ptr& s, int i); +RcppExport SEXP _arrow_Schema__field(SEXP sSEXP, SEXP iSEXP) { +BEGIN_RCPP + Rcpp::RObject rcpp_result_gen; + Rcpp::RNGScope rcpp_rngScope_gen; + Rcpp::traits::input_parameter< const std::shared_ptr& >::type s(sSEXP); + Rcpp::traits::input_parameter< int >::type i(iSEXP); + rcpp_result_gen = Rcpp::wrap(Schema__field(s, i)); + return rcpp_result_gen; +END_RCPP +} // ListType__ToString std::string ListType__ToString(const std::shared_ptr& type); RcppExport SEXP _arrow_ListType__ToString(SEXP typeSEXP) { @@ -1737,6 +1760,8 @@ static const R_CallMethodDef CallEntries[] = { {"_arrow_DataType__id", (DL_FUNC) &_arrow_DataType__id, 1}, {"_arrow_schema_", (DL_FUNC) &_arrow_schema_, 1}, {"_arrow_Schema__ToString", (DL_FUNC) &_arrow_Schema__ToString, 1}, + {"_arrow_Schema__num_fields", (DL_FUNC) &_arrow_Schema__num_fields, 1}, + {"_arrow_Schema__field", (DL_FUNC) &_arrow_Schema__field, 2}, {"_arrow_ListType__ToString", (DL_FUNC) &_arrow_ListType__ToString, 1}, {"_arrow_FixedWidthType__bit_width", (DL_FUNC) &_arrow_FixedWidthType__bit_width, 1}, {"_arrow_DateType__unit", (DL_FUNC) &_arrow_DateType__unit, 1}, From d6ea85bf89d9d40beec9841cbf2f320f21f6cdf1 Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Tue, 23 Oct 2018 20:44:25 -0700 Subject: [PATCH 4/5] code review feedback --- r/src/array.cpp | 2 +- r/src/datatype.cpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/r/src/array.cpp b/r/src/array.cpp index 2395ab5da85..5a915a3cb7d 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -700,7 +700,7 @@ SEXP Int64Array(const std::shared_ptr& array) { SEXP DecimalArray(const std::shared_ptr& array) { auto n = array->length(); - NumericVector vec(n); + NumericVector vec(no_init(n)); if (n == 0) return vec; diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index f073b79ecf4..ef942e2e517 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -181,7 +181,7 @@ int Schema__num_fields(const std::shared_ptr& s) { // [[Rcpp::export]] std::shared_ptr Schema__field(const std::shared_ptr& s, int i) { - if (i >= s->num_fields()) { + if (i >= s->num_fields() || i < 0) { Rcpp::stop("Invalid field index for schema."); } From efabe274a37da56abf93bd69d46d926e05b1e7d1 Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Tue, 23 Oct 2018 20:45:03 -0700 Subject: [PATCH 5/5] fix lint style suggestions --- r/src/array.cpp | 10 ++++++---- r/src/datatype.cpp | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/r/src/array.cpp b/r/src/array.cpp index 5a915a3cb7d..0cd059100a5 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -711,17 +711,19 @@ SEXP DecimalArray(const std::shared_ptr& array) { } auto p_vec = reinterpret_cast(vec.begin()); - const auto& decimals_arr = internal::checked_cast(*array); + const auto& decimals_arr = + internal::checked_cast(*array); if (array->null_count()) { internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), n); for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { - p_vec[i] = bitmap_reader.IsNotSet() ? NA_REAL : std::stod(decimals_arr.FormatValue(i).c_str()); + p_vec[i] = bitmap_reader.IsNotSet() + ? NA_REAL + : std::stod(decimals_arr.FormatValue(i).c_str()); } - } - else { + } else { for (size_t i = 0; i < n; i++) { p_vec[i] = std::stod(decimals_arr.FormatValue(i).c_str()); } diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp index ef942e2e517..64ca1c92b5b 100644 --- a/r/src/datatype.cpp +++ b/r/src/datatype.cpp @@ -180,7 +180,7 @@ int Schema__num_fields(const std::shared_ptr& s) { // [[Rcpp::export]] std::shared_ptr Schema__field(const std::shared_ptr& s, - int i) { + int i) { if (i >= s->num_fields() || i < 0) { Rcpp::stop("Invalid field index for schema."); }