diff --git a/r/src/array.cpp b/r/src/array.cpp index e885b6646dc..f378be6d1a2 100644 --- a/r/src/array.cpp +++ b/r/src/array.cpp @@ -25,6 +25,8 @@ namespace r { // the integer64 sentinel static const int64_t NA_INT64 = std::numeric_limits::min(); +static const int64_t MAX_INT32 = std::numeric_limits::max(); +static const int64_t MIN_INT32 = std::numeric_limits::min(); template std::shared_ptr SimpleArray(SEXP x) { @@ -698,6 +700,55 @@ SEXP Int64Array(const std::shared_ptr& array) { return vec; } +SEXP IntFromInt64Array(const std::shared_ptr& array) { + auto n = array->length(); + IntegerVector vec(no_init(n)); + + if (n == 0) return vec; + + auto null_count = array->null_count(); + if (null_count == n) { + std::fill(vec.begin(), vec.end(), NA_INTEGER); + return vec; + } + + auto p_values = GetValuesSafely(array->data(), 1, array->offset()); + auto p_vec = reinterpret_cast(vec.begin()); + + size_t overflowed = 0; + if (array->null_count()) { + internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(), + n); + + for (size_t i = 0; i < n; i++, bitmap_reader.Next()) { + if (p_values[i] > MAX_INT32 || p_values[i] < MIN_INT32) { + overflowed++; + p_vec[i] = NA_INTEGER; + } else { + p_vec[i] = bitmap_reader.IsNotSet() ? NA_INTEGER : p_values[i]; + } + } + } else { + for (size_t i = 0; i < n; i++) { + if (p_values[i] > MAX_INT32 || p_values[i] < MIN_INT32) { + overflowed++; + p_vec[i] = NA_INTEGER; + } else { + p_vec[i] = p_values[i]; + } + } + } + + if (overflowed > 0) { + Rcpp::warning( + tfm::format("Integer overflow, %i values replaced with NAs. Consider using " + "'options(arrow.int64 = \"bit64\")'.", + overflowed)); + } + + return vec; +} + } // namespace r } // namespace arrow @@ -746,8 +797,15 @@ SEXP Array__as_vector(const std::shared_ptr& array) { return arrow::r::promotion_Array_to_Vector(array); // lossy promotions to numeric vector - case Type::INT64: - return arrow::r::Int64Array(array); + case Type::INT64: { + Function get_option("getOption"); + SEXP option = get_option("arrow.int64"); + if (!Rf_isNull(option) && std::string("integer") == CHAR(STRING_ELT(option, 0))) { + return arrow::r::IntFromInt64Array(array); + } else { + return arrow::r::Int64Array(array); + } + } default: break;