From 161b37bad625ba87d0b99cde6bc2b8e92d26ea72 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 7 Apr 2021 09:36:07 +0200 Subject: [PATCH 1/2] layer about vctrs::::short_vec_size() --- r/src/arrow_vctrs.h | 2 +- r/src/imports.cpp | 8 +++++++- r/src/r_to_arrow.cpp | 14 +++----------- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/r/src/arrow_vctrs.h b/r/src/arrow_vctrs.h index 1f8a4d7f965..b91c0819909 100644 --- a/r/src/arrow_vctrs.h +++ b/r/src/arrow_vctrs.h @@ -18,5 +18,5 @@ #pragma once namespace vctrs { -R_len_t short_vec_size(SEXP); +R_len_t vec_size(SEXP); } diff --git a/r/src/imports.cpp b/r/src/imports.cpp index 5c77d6cb5b0..f4174bab5f4 100644 --- a/r/src/imports.cpp +++ b/r/src/imports.cpp @@ -32,6 +32,12 @@ const vctrs_api_ptrs_t& vctrs_api() { return ptrs; } -R_len_t short_vec_size(SEXP x) { return vctrs_api().short_vec_size(x); } +R_len_t vec_size(SEXP x) { + if (Rf_inherits(x, "data.frame") || TYPEOF(x) != VECSXP || Rf_inherits(x, "POSIXlt")) { + return vctrs_api().short_vec_size(x); + } else { + return Rf_length(x); + } +} } // namespace vctrs diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp index c6d15757e05..0ab9718da26 100644 --- a/r/src/r_to_arrow.cpp +++ b/r/src/r_to_arrow.cpp @@ -809,15 +809,7 @@ class RListConverter : public ListConverter { } auto append_value = [this](SEXP value) { - // TODO: this should always use vctrs::short_vec_size - // but that introduced a regression: - // https://github.com/apache/arrow/pull/8650#issuecomment-786940734 - int n; - if (TYPEOF(value) == VECSXP && !Rf_inherits(value, "data.frame")) { - n = Rf_length(value); - } else { - n = vctrs::short_vec_size(value); - } + int n = vctrs::vec_size(value); RETURN_NOT_OK(this->list_builder_->ValidateOverflow(n)); RETURN_NOT_OK(this->list_builder_->Append()); @@ -879,7 +871,7 @@ class RStructConverter : public StructConverter { for (R_xlen_t i = 0; i < n_columns; i++) { SEXP x_i = VECTOR_ELT(x, i); - if (vctrs::short_vec_size(x_i) < size) { + if (vctrs::vec_size(x_i) < size) { return Status::RError("Degenerated data frame"); } } @@ -1011,7 +1003,7 @@ std::shared_ptr vec_to_arrow(SEXP x, RConversionOptions options; options.strict = !type_inferred; options.type = type; - options.size = vctrs::short_vec_size(x); + options.size = vctrs::vec_size(x); // maybe short circuit when zero-copy is possible if (can_reuse_memory(x, options.type)) { From 24232d19fe2ab66077a052087ff221a546d2ae36 Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Wed, 7 Apr 2021 09:39:32 +0200 Subject: [PATCH 2/2] test --- r/tests/testthat/test-Array.R | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index 18e26207ca3..ecb02193bb3 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -457,6 +457,11 @@ test_that("Array$create() handles data frame -> struct arrays (ARROW-3811)", { a <- Array$create(df) expect_type_equal(a$type, struct(x = int32(), y = float64(), z = utf8())) expect_equivalent(as.vector(a), df) + + df <- structure(list(col = structure(list(structure(list(list(structure(1))), class = "inner")), class = "outer")), class = "data.frame", row.names = c(NA, -1L)) + a <- Array$create(df) + expect_type_equal(a$type, struct(col = list_of(list_of(list_of(float64()))))) + expect_equivalent(as.vector(a), df) }) test_that("StructArray methods", {