Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion r/R/table.R
Original file line number Diff line number Diff line change
Expand Up @@ -194,7 +194,7 @@ Table$create <- function(..., schema = NULL) {
}

#' @export
as.data.frame.Table <- function(x, row.names = NULL, optional = FALSE, use_threads = TRUE, ...){
as.data.frame.Table <- function(x, row.names = NULL, optional = FALSE, ...) {
Table__to_dataframe(x, use_threads = option_use_threads())
}

Expand Down
4 changes: 3 additions & 1 deletion r/src/array_from_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,9 @@ std::shared_ptr<Array> MakeStructArray(SEXP df, const std::shared_ptr<DataType>&
for (int i = 0; i < n; i++) {
children[i] = Array__from_vector(VECTOR_ELT(df, i), type->child(i)->type(), true);
}
return std::make_shared<StructArray>(type, children[0]->length(), children);

int64_t rows = n ? children[0]->length() : 0;
return std::make_shared<StructArray>(type, rows, children);
}

std::shared_ptr<Array> MakeListArray(SEXP x, const std::shared_ptr<DataType>& type) {
Expand Down
13 changes: 9 additions & 4 deletions r/src/array_to_vector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -656,8 +656,15 @@ class Converter_Null : public Converter {
return Status::OK();
}
};

std::shared_ptr<Converter> Converter::Make(const ArrayVector& arrays) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should accept the type of arrays explicitly; a Converter should not fail when arrays is empty. Instead the result of conversion should also be empty.

Copy link
Contributor Author

@fsaintjacques fsaintjacques Apr 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated https://issues.apache.org/jira/browse/ARROW-7798 to include this. I tried to quickly refactor what you suggest but it bubbles to rewriting almost all methods/functions of the file.

Copy link
Contributor Author

@fsaintjacques fsaintjacques Apr 8, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It also lead me to find a bug with handling Dictionary ChunkedArray, https://issues.apache.org/jira/browse/ARROW-8374

switch (arrays[0]->type_id()) {
if (arrays.empty()) {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To save us future pain, should we hunt for other places that assume >0 length vectors? grepping [0] in r/src hits a few places that look similarly suspect.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked them all and found one bug, I'll add a test and the fix in the current PR.

Rcpp::stop(tfm::format("Must have at least one array to create a converter"));
}

auto type = arrays[0]->type();

switch (type->id()) {
// direct support
case Type::INT32:
return std::make_shared<arrow::r::Converter_SimpleArray<INTSXP>>(arrays);
Expand Down Expand Up @@ -742,7 +749,7 @@ std::shared_ptr<Converter> Converter::Make(const ArrayVector& arrays) {
break;
}

Rcpp::stop(tfm::format("cannot handle Array of type %s", arrays[0]->type()->name()));
Rcpp::stop(tfm::format("cannot handle Array of type %s", type->name()));
return nullptr;
}

Expand Down Expand Up @@ -813,7 +820,6 @@ SEXP Array__as_vector(const std::shared_ptr<arrow::Array>& array) {

// [[arrow::export]]
SEXP ChunkedArray__as_vector(const std::shared_ptr<arrow::ChunkedArray>& chunked_array) {
// NB: this segfaults if there are 0 chunks (presumably something tries chunks[0])
return arrow::r::ArrayVector__as_vector(chunked_array->length(),
chunked_array->chunks());
}
Expand Down Expand Up @@ -849,7 +855,6 @@ Rcpp::List Table__to_dataframe(const std::shared_ptr<arrow::Table>& table,
std::vector<std::shared_ptr<arrow::r::Converter>> converters(nc);

for (int64_t i = 0; i < nc; i++) {
// This crashes if num_chunks == 0
converters[i] = arrow::r::Converter::Make(table->column(i)->chunks());
names[i] = table->field(i)->name();
}
Expand Down
18 changes: 16 additions & 2 deletions r/src/compute.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -234,7 +234,14 @@ std::shared_ptr<arrow::Table> Table__Filter(const std::shared_ptr<arrow::Table>&
options.null_selection_behavior = arrow::compute::FilterOptions::EMIT_NULL;
}
STOP_IF_NOT_OK(arrow::compute::Filter(&context, table, filter, options, &out));
return out.table();
std::shared_ptr<arrow::Table> tab = out.table();
if (tab->num_rows() == 0) {
// Slight hack: if there are no rows in the result, instead do a 0-length
// slice so that we get chunked arrays with 1 chunk (itself length 0).
// We need that because the Arrow-to-R converter fails when there are 0 chunks.
return table->Slice(0, 0);
}
return tab;
}

// [[arrow::export]]
Expand All @@ -249,6 +256,13 @@ std::shared_ptr<arrow::Table> Table__FilterChunked(
options.null_selection_behavior = arrow::compute::FilterOptions::EMIT_NULL;
}
STOP_IF_NOT_OK(arrow::compute::Filter(&context, table, filter, options, &out));
return out.table();
std::shared_ptr<arrow::Table> tab = out.table();
if (tab->num_rows() == 0) {
// Slight hack: if there are no rows in the result, instead do a 0-length
// slice so that we get chunked arrays with 1 chunk (itself length 0).
// We need that because the Arrow-to-R converter fails when there are 0 chunks.
return table->Slice(0, 0);
}
return tab;
}
#endif
2 changes: 2 additions & 0 deletions r/tests/testthat/helper-arrow.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ if (tolower(Sys.info()[["sysname"]]) == "windows") {
options(arrow.use_threads = FALSE)
}

set.seed(1)

test_that <- function(what, code) {
testthat::test_that(what, {
skip_if(getOption("..skip.tests", TRUE), "arrow C++ library not available")
Expand Down
5 changes: 5 additions & 0 deletions r/tests/testthat/test-Array.R
Original file line number Diff line number Diff line change
Expand Up @@ -413,6 +413,11 @@ test_that("Array$create() can handle data frame with custom struct type (not inf
expect_error(Array$create(df, type = type), regexp = "Expecting a character vector")
})

test_that("Array$create() supports tibble with no columns (ARROW-8354)", {
df <- tibble::tibble()
expect_equal(Array$create(df)$as_vector(), df)
})

test_that("Array$create() handles vector -> list arrays (ARROW-7662)", {
# Should be able to create an empty list with a type hint.
expect_is(Array$create(list(), list_of(bool())), "ListArray")
Expand Down
11 changes: 11 additions & 0 deletions r/tests/testthat/test-dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ tbl <- tibble::tibble(
int = 1:10,
dbl = as.numeric(1:10),
lgl = sample(c(TRUE, FALSE, NA), 10, replace = TRUE),
false = logical(10),
chr = letters[1:10],
fct = factor(letters[1:10])
)
Expand Down Expand Up @@ -118,6 +119,16 @@ test_that("filter() with NAs in selection", {
)
})

test_that("Filter returning an empty Table should not segfault (ARROW-8354)", {
expect_dplyr_equal(
input %>%
filter(false) %>%
select(chr, int, lgl) %>%
collect(),
tbl
)
})

test_that("filtering with expression", {
char_sym <- "b"
expect_dplyr_equal(
Expand Down