diff --git a/r/R/table.R b/r/R/table.R index dd3461b7ed3..b899117c148 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -194,7 +194,7 @@ Table$create <- function(..., schema = NULL) { } #' @export -as.data.frame.Table <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){ +as.data.frame.Table <- function(x, row.names = NULL, optional = FALSE, ...) { Table__to_dataframe(x, use_threads = option_use_threads()) } diff --git a/r/src/array_from_vector.cpp b/r/src/array_from_vector.cpp index 5e3e9350be9..35bfbae8e9d 100644 --- a/r/src/array_from_vector.cpp +++ b/r/src/array_from_vector.cpp @@ -295,7 +295,9 @@ std::shared_ptr MakeStructArray(SEXP df, const std::shared_ptr& for (int i = 0; i < n; i++) { children[i] = Array__from_vector(VECTOR_ELT(df, i), type->child(i)->type(), true); } - return std::make_shared(type, children[0]->length(), children); + + int64_t rows = n ? children[0]->length() : 0; + return std::make_shared(type, rows, children); } std::shared_ptr MakeListArray(SEXP x, const std::shared_ptr& type) { diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp index 4162c824672..5aefaeefde3 100644 --- a/r/src/array_to_vector.cpp +++ b/r/src/array_to_vector.cpp @@ -656,8 +656,15 @@ class Converter_Null : public Converter { return Status::OK(); } }; + std::shared_ptr Converter::Make(const ArrayVector& arrays) { - switch (arrays[0]->type_id()) { + if (arrays.empty()) { + Rcpp::stop(tfm::format("Must have at least one array to create a converter")); + } + + auto type = arrays[0]->type(); + + switch (type->id()) { // direct support case Type::INT32: return std::make_shared>(arrays); @@ -742,7 +749,7 @@ std::shared_ptr Converter::Make(const ArrayVector& arrays) { break; } - Rcpp::stop(tfm::format("cannot handle Array of type %s", arrays[0]->type()->name())); + Rcpp::stop(tfm::format("cannot handle Array of type %s", type->name())); return nullptr; } @@ -813,7 +820,6 @@ SEXP Array__as_vector(const std::shared_ptr& array) { // [[arrow::export]] SEXP ChunkedArray__as_vector(const std::shared_ptr& chunked_array) { - // NB: this segfaults if there are 0 chunks (presumably something tries chunks[0]) return arrow::r::ArrayVector__as_vector(chunked_array->length(), chunked_array->chunks()); } @@ -849,7 +855,6 @@ Rcpp::List Table__to_dataframe(const std::shared_ptr& table, std::vector> converters(nc); for (int64_t i = 0; i < nc; i++) { - // This crashes if num_chunks == 0 converters[i] = arrow::r::Converter::Make(table->column(i)->chunks()); names[i] = table->field(i)->name(); } diff --git a/r/src/compute.cpp b/r/src/compute.cpp index eecbda9ead8..212b27c8a60 100644 --- a/r/src/compute.cpp +++ b/r/src/compute.cpp @@ -234,7 +234,14 @@ std::shared_ptr Table__Filter(const std::shared_ptr& options.null_selection_behavior = arrow::compute::FilterOptions::EMIT_NULL; } STOP_IF_NOT_OK(arrow::compute::Filter(&context, table, filter, options, &out)); - return out.table(); + std::shared_ptr tab = out.table(); + if (tab->num_rows() == 0) { + // Slight hack: if there are no rows in the result, instead do a 0-length + // slice so that we get chunked arrays with 1 chunk (itself length 0). + // We need that because the Arrow-to-R converter fails when there are 0 chunks. + return table->Slice(0, 0); + } + return tab; } // [[arrow::export]] @@ -249,6 +256,13 @@ std::shared_ptr Table__FilterChunked( options.null_selection_behavior = arrow::compute::FilterOptions::EMIT_NULL; } STOP_IF_NOT_OK(arrow::compute::Filter(&context, table, filter, options, &out)); - return out.table(); + std::shared_ptr tab = out.table(); + if (tab->num_rows() == 0) { + // Slight hack: if there are no rows in the result, instead do a 0-length + // slice so that we get chunked arrays with 1 chunk (itself length 0). + // We need that because the Arrow-to-R converter fails when there are 0 chunks. + return table->Slice(0, 0); + } + return tab; } #endif diff --git a/r/tests/testthat/helper-arrow.R b/r/tests/testthat/helper-arrow.R index bd35e1cb4c4..4e6a47d9672 100644 --- a/r/tests/testthat/helper-arrow.R +++ b/r/tests/testthat/helper-arrow.R @@ -23,6 +23,8 @@ if (tolower(Sys.info()[["sysname"]]) == "windows") { options(arrow.use_threads = FALSE) } +set.seed(1) + test_that <- function(what, code) { testthat::test_that(what, { skip_if(getOption("..skip.tests", TRUE), "arrow C++ library not available") diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R index faf1e741a92..ad8068cc8b8 100644 --- a/r/tests/testthat/test-Array.R +++ b/r/tests/testthat/test-Array.R @@ -413,6 +413,11 @@ test_that("Array$create() can handle data frame with custom struct type (not inf expect_error(Array$create(df, type = type), regexp = "Expecting a character vector") }) +test_that("Array$create() supports tibble with no columns (ARROW-8354)", { + df <- tibble::tibble() + expect_equal(Array$create(df)$as_vector(), df) +}) + test_that("Array$create() handles vector -> list arrays (ARROW-7662)", { # Should be able to create an empty list with a type hint. expect_is(Array$create(list(), list_of(bool())), "ListArray") diff --git a/r/tests/testthat/test-dplyr.R b/r/tests/testthat/test-dplyr.R index e77bfa3ad07..e531fb30cf6 100644 --- a/r/tests/testthat/test-dplyr.R +++ b/r/tests/testthat/test-dplyr.R @@ -80,6 +80,7 @@ tbl <- tibble::tibble( int = 1:10, dbl = as.numeric(1:10), lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE), + false = logical(10), chr = letters[1:10], fct = factor(letters[1:10]) ) @@ -118,6 +119,16 @@ test_that("filter() with NAs in selection", { ) }) +test_that("Filter returning an empty Table should not segfault (ARROW-8354)", { + expect_dplyr_equal( + input %>% + filter(false) %>% + select(chr, int, lgl) %>% + collect(), + tbl + ) +}) + test_that("filtering with expression", { char_sym <- "b" expect_dplyr_equal(