From 4012a930815b737f2f3650f49a46526c34154899 Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Wed, 24 Oct 2018 15:17:02 -0700 Subject: [PATCH 1/4] [R] ARROW-3604 support to cast int64 to ints --- r/src/array.cpp | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/r/src/array.cpp b/r/src/array.cpp index e885b6646dc..10cb3c370a8 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -698,6 +698,39 @@ SEXP Int64Array(const std::shared_ptr& array) { return vec; } +SEXP IntFromInt64Array(const std::shared_ptr& array) { + auto n = array->length(); + IntegerVector vec(no_init(n)); + + if (n == 0) return vec; + + auto null_count = array->null_count(); + if (null_count == n) { + std::fill(vec.begin(), vec.end(), NA_INTEGER); + return vec; + } + + auto p_values = GetValuesSafely(array->data(), 1, array->offset()); + auto p_vec = reinterpret_cast(vec.begin()); + + if (array->null_count()) { + internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), + n); + + for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { + p_vec[i] = bitmap_reader.IsNotSet() + ? NA_INTEGER + : p_values[i]; + } + } else { + for (size_t i = 0; i < n; i++) { + p_vec[i] = p_values[i]; + } + } + + return vec; +} + } // namespace r } // namespace arrow @@ -747,7 +780,7 @@ SEXP Array__as_vector(const std::shared_ptr& array) { // lossy promotions to numeric vector case Type::INT64: - return arrow::r::Int64Array(array); + return arrow::r::IntFromInt64Array(array); default: break; From 01e7c139d332c19126019d8ad4bbfcab9ee6932e Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Wed, 24 Oct 2018 15:39:32 -0700 Subject: [PATCH 2/4] handle integer overflow and warn users --- r/src/array.cpp | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/r/src/array.cpp b/r/src/array.cpp index 10cb3c370a8..e95ceb027c3 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -25,6 +25,8 @@ namespace r { // the integer64 sentinel static const int64_t NA_INT64 = std::numeric_limits::min(); +static const int64_t MAX_INT32 = std::numeric_limits::max(); +static const int64_t MIN_INT32 = std::numeric_limits::min(); template std::shared_ptr SimpleArray(SEXP x) { @@ -713,21 +715,38 @@ SEXP IntFromInt64Array(const std::shared_ptr& array) { auto p_values = GetValuesSafely(array->data(), 1, array->offset()); auto p_vec = reinterpret_cast(vec.begin()); + size_t overflowed = 0; if (array->null_count()) { internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), n); for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { - p_vec[i] = bitmap_reader.IsNotSet() - ? NA_INTEGER - : p_values[i]; + if (p_values[i] > MAX_INT32 || p_values[i] < MIN_INT32) { + overflowed++; + p_vec[i] = NA_INTEGER; + } + else { + p_vec[i] = bitmap_reader.IsNotSet() + ? NA_INTEGER + : p_values[i]; + } } } else { for (size_t i = 0; i < n; i++) { - p_vec[i] = p_values[i]; + if (p_values[i] > MAX_INT32 || p_values[i] < MIN_INT32) { + overflowed++; + p_vec[i] = NA_INTEGER; + } + else { + p_vec[i] = p_values[i]; + } } } + if (overflowed > 0) { + Rcpp::warning(tfm::format("Integer overflow, %i values replaced with NAs. Consider using 'options(arrow.int64 = \"bit64\")'.", overflowed)); + } + return vec; } From ca0fd80626dee24d620656e6b5fd5a30659fd7c6 Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Wed, 24 Oct 2018 16:38:29 -0700 Subject: [PATCH 3/4] support arrow.int64 set to integer --- r/src/array.cpp | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/r/src/array.cpp b/r/src/array.cpp index e95ceb027c3..65b27b36ff7 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -798,8 +798,16 @@ SEXP Array__as_vector(const std::shared_ptr& array) { return arrow::r::promotion_Array_to_Vector(array); // lossy promotions to numeric vector - case Type::INT64: - return arrow::r::IntFromInt64Array(array); + case Type::INT64: { + Function get_option("getOption"); + SEXP option = get_option("arrow.int64"); + if (!Rf_isNull(option) && std::string("integer") == CHAR(STRING_ELT(option, 0))) { + return arrow::r::IntFromInt64Array(array); + } + else { + return arrow::r::Int64Array(array); + } + } default: break; From f222ba28128bfd9ff6da2707ed5de185f7cf49fc Mon Sep 17 00:00:00 2001 From: Javier Luraschi Date: Wed, 24 Oct 2018 16:42:12 -0700 Subject: [PATCH 4/4] fix R linting issues --- r/src/array.cpp | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/r/src/array.cpp b/r/src/array.cpp index 65b27b36ff7..f378be6d1a2 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -724,11 +724,8 @@ SEXP IntFromInt64Array(const std::shared_ptr& array) { if (p_values[i] > MAX_INT32 || p_values[i] < MIN_INT32) { overflowed++; p_vec[i] = NA_INTEGER; - } - else { - p_vec[i] = bitmap_reader.IsNotSet() - ? NA_INTEGER - : p_values[i]; + } else { + p_vec[i] = bitmap_reader.IsNotSet() ? NA_INTEGER : p_values[i]; } } } else { @@ -736,15 +733,17 @@ SEXP IntFromInt64Array(const std::shared_ptr& array) { if (p_values[i] > MAX_INT32 || p_values[i] < MIN_INT32) { overflowed++; p_vec[i] = NA_INTEGER; - } - else { + } else { p_vec[i] = p_values[i]; } } } if (overflowed > 0) { - Rcpp::warning(tfm::format("Integer overflow, %i values replaced with NAs. Consider using 'options(arrow.int64 = \"bit64\")'.", overflowed)); + Rcpp::warning( + tfm::format("Integer overflow, %i values replaced with NAs. Consider using " + "'options(arrow.int64 = \"bit64\")'.", + overflowed)); } return vec; @@ -803,8 +802,7 @@ SEXP Array__as_vector(const std::shared_ptr& array) { SEXP option = get_option("arrow.int64"); if (!Rf_isNull(option) && std::string("integer") == CHAR(STRING_ELT(option, 0))) { return arrow::r::IntFromInt64Array(array); - } - else { + } else { return arrow::r::Int64Array(array); } }