Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 60 additions & 2 deletions r/src/array.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ namespace r {

// the integer64 sentinel
static const int64_t NA_INT64 = std::numeric_limits<int64_t>::min();
static const int64_t MAX_INT32 = std::numeric_limits<int32_t>::max();
static const int64_t MIN_INT32 = std::numeric_limits<int32_t>::min();
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

change these to constexpr?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure. Does that mean they have to be functions ?

Otherwise, since ::minuend ::maxare themselvesconstexpr` perhaps we don't even need the constants here.

Also, not sure what MIN_INT32 is used for, but we need to be careful because it's also the NA sentinel for int in R.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yes they are constexpr, e.g.

static constexpr hash_slot_t kHashSlotEmpty = std::numeric_limits<int32_t>::max();


template <int RTYPE, typename Type>
std::shared_ptr<Array> SimpleArray(SEXP x) {
Expand Down Expand Up @@ -698,6 +700,55 @@ SEXP Int64Array(const std::shared_ptr<Array>& array) {
return vec;
}

SEXP IntFromInt64Array(const std::shared_ptr<Array>& array) {
auto n = array->length();
IntegerVector vec(no_init(n));

if (n == 0) return vec;

auto null_count = array->null_count();
if (null_count == n) {
std::fill(vec.begin(), vec.end(), NA_INTEGER);
return vec;
}

auto p_values = GetValuesSafely<int64_t>(array->data(), 1, array->offset());
auto p_vec = reinterpret_cast<int32_t*>(vec.begin());

size_t overflowed = 0;
if (array->null_count()) {
internal::BitmapReader bitmap_reader(array->null_bitmap()->data(), array->offset(),
n);

for (size_t i = 0; i < n; i++, bitmap_reader.Next()) {
if (p_values[i] > MAX_INT32 || p_values[i] < MIN_INT32) {
overflowed++;
p_vec[i] = NA_INTEGER;
} else {
p_vec[i] = bitmap_reader.IsNotSet() ? NA_INTEGER : p_values[i];
}
}
} else {
for (size_t i = 0; i < n; i++) {
if (p_values[i] > MAX_INT32 || p_values[i] < MIN_INT32) {
overflowed++;
p_vec[i] = NA_INTEGER;
} else {
p_vec[i] = p_values[i];
}
}
}

if (overflowed > 0) {
Rcpp::warning(
tfm::format("Integer overflow, %i values replaced with NAs. Consider using "
"'options(arrow.int64 = \"bit64\")'.",
overflowed));
}

return vec;
}

} // namespace r
} // namespace arrow

Expand Down Expand Up @@ -746,8 +797,15 @@ SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {
return arrow::r::promotion_Array_to_Vector<REALSXP, arrow::UInt32Type>(array);

// lossy promotions to numeric vector
case Type::INT64:
return arrow::r::Int64Array(array);
case Type::INT64: {
Function get_option("getOption");
SEXP option = get_option("arrow.int64");
if (!Rf_isNull(option) && std::string("integer") == CHAR(STRING_ELT(option, 0))) {
return arrow::r::IntFromInt64Array(array);
} else {
return arrow::r::Int64Array(array);
}
}

default:
break;
Expand Down