diff --git a/r/R/array.R b/r/R/array.R index 109f6daaa2b..ca03d2b5b96 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -472,9 +472,13 @@ names.StructArray <- function(x, ...) StructType__field_names(x$type) #' @export dim.StructArray <- function(x, ...) c(length(x), x$type$num_fields) +as_df.StructArray <- function(x, ...) { + as.vector(x) +} + #' @export as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) { - as.vector(x) + as.data.frame(as_df(x), row.names = row.names, optional = optional, ...) } #' @rdname array diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index ae68cc2118f..9e5b3c4a1a4 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -93,6 +93,10 @@ ArrowTabular <- R6Class("ArrowTabular", #' @export as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ...) { + as.data.frame(as_df(x), row.names = row.names, optional = optional, ...) +} + +as_df.ArrowTabular <- function(x, ...) { df <- x$to_data_frame() apply_arrow_r_metadata(df, x$metadata$r) } @@ -259,3 +263,7 @@ na.omit.ArrowTabular <- function(object, ...) { #' @export na.exclude.ArrowTabular <- na.omit.ArrowTabular + +as_df <- function(x) { + UseMethod("as_df") +} diff --git a/r/R/csv.R b/r/R/csv.R index 82243238662..237a2153e5b 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -248,7 +248,7 @@ read_delim_arrow <- function(file, } if (isTRUE(as_data_frame)) { - tab <- as.data.frame(tab) + tab <- as_df(tab) } tab diff --git a/r/R/dataset.R b/r/R/dataset.R index 30d3ed5ae15..fe9cdeba4ac 100644 --- a/r/R/dataset.R +++ b/r/R/dataset.R @@ -522,9 +522,13 @@ dim.Dataset <- function(x) c(x$num_rows, x$num_cols) #' @export c.Dataset <- function(...) Dataset$create(list(...)) +as_df.Dataset <- function(x, ...) { + collect.Dataset(x) +} + #' @export as.data.frame.Dataset <- function(x, row.names = NULL, optional = FALSE, ...) { - collect.Dataset(x) + as.data.frame(as_df(x), row.names = row.names, optional = optional, ...) } #' @export diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index 9205a31b14f..61db1d2a1bd 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -22,9 +22,10 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) { out <- compute.arrow_dplyr_query(x) collect.ArrowTabular(out, as_data_frame) } + collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) { if (as_data_frame) { - as.data.frame(x, ...) + as_df(x, ...) } else { x } diff --git a/r/R/dplyr-glimpse.R b/r/R/dplyr-glimpse.R index 8a70f4c5b7b..ff7a3ef2508 100644 --- a/r/R/dplyr-glimpse.R +++ b/r/R/dplyr-glimpse.R @@ -71,7 +71,7 @@ glimpse.ArrowTabular <- function(x, var_headings <- paste("$", center_pad(tickify(names(x)), var_types)) # Assemble the data glimpse - df <- as.data.frame(head_tab) + df <- as_df(head_tab) formatted_data <- map_chr(df, function(.) { tryCatch( paste(pillar::format_glimpse(.), collapse = ", "), diff --git a/r/R/dplyr.R b/r/R/dplyr.R index 72e74809689..a52124befbb 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -216,6 +216,10 @@ unique.RecordBatchReader <- unique.arrow_dplyr_query #' @export as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALSE, ...) { + as.data.frame(as_df(x), row.names = row.names, optional = optional, ...) +} + +as_df.arrow_dplyr_query <- function(x, ...) { collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...) } diff --git a/r/R/feather.R b/r/R/feather.R index 1488db29eb7..09b04a1f0a6 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -196,7 +196,7 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, mmap = T ) if (isTRUE(as_data_frame)) { - out <- as.data.frame(out) + out <- as_df(out) } out } diff --git a/r/R/ipc-stream.R b/r/R/ipc-stream.R index f0b4a6aae0e..59d690db5dc 100644 --- a/r/R/ipc-stream.R +++ b/r/R/ipc-stream.R @@ -106,7 +106,7 @@ read_ipc_stream <- function(file, as_data_frame = TRUE, ...) { # https://issues.apache.org/jira/browse/ARROW-6830 out <- RecordBatchStreamReader$create(file)$read_table() if (as_data_frame) { - out <- as.data.frame(out) + out <- as_df(out) } out } diff --git a/r/R/json.R b/r/R/json.R index cdbe850b32f..470b52f3f88 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -84,7 +84,7 @@ read_json_arrow <- function(file, } if (isTRUE(as_data_frame)) { - tab <- as.data.frame(tab) + tab <- as_df(tab) } tab } diff --git a/r/R/parquet.R b/r/R/parquet.R index f3d384e8c25..586f6cd3c88 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -70,7 +70,7 @@ read_parquet <- function(file, } if (as_data_frame) { - tab <- as.data.frame(tab) + tab <- as_df(tab) } tab } diff --git a/r/R/record-batch-reader.R b/r/R/record-batch-reader.R index 184a77df36b..88118a2f70a 100644 --- a/r/R/record-batch-reader.R +++ b/r/R/record-batch-reader.R @@ -129,7 +129,11 @@ dim.RecordBatchReader <- function(x) c(NA_integer_, length(x$schema)) #' @export as.data.frame.RecordBatchReader <- function(x, row.names = NULL, optional = FALSE, ...) { - as.data.frame(x$read_table(), row.names = row.names, optional = optional, ...) + as.data.frame(as_df(x), row.names = row.names, optional = optional, ...) +} + +as_df.RecordBatchReader <- function(x, ...){ + x$read_table() } #' @export diff --git a/r/R/schema.R b/r/R/schema.R index 93e826eff28..39ffddfea83 100644 --- a/r/R/schema.R +++ b/r/R/schema.R @@ -386,5 +386,5 @@ as_schema.StructType <- function(x, ...) { #' @export as.data.frame.Schema <- function(x, row.names = NULL, optional = FALSE, ...) { - as.data.frame(Table__from_schema(x)) + as.data.frame(Table__from_schema(x), row.names = row.names, optional = optional, ...) } diff --git a/r/src/table.cpp b/r/src/table.cpp index 498141cc2f2..04537000f5d 100644 --- a/r/src/table.cpp +++ b/r/src/table.cpp @@ -228,7 +228,8 @@ arrow::Status AddMetadataFromDots(SEXP lst, int num_fields, // "top level" attributes, only relevant if the first object is not named and a data // frame cpp11::strings names = Rf_getAttrib(lst, R_NamesSymbol); - if (names[0] == "" && Rf_inherits(VECTOR_ELT(lst, 0), "data.frame")) { + if (names[0] == "" && Rf_inherits(VECTOR_ELT(lst, 0), "data.frame") && + Rf_xlength(lst) == 1) { SEXP top_level = metadata[0] = arrow_attributes(VECTOR_ELT(lst, 0), true); if (!Rf_isNull(top_level) && XLENGTH(top_level) > 0) { has_top_level_metadata = true; diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 7e7084542d3..4198628986e 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -346,7 +346,7 @@ test_that("record_batch() handles data frame columns", { b = struct(x = int32(), y = int32()) ) ) - out <- as.data.frame(batch) + out <- as_tibble(batch) expect_equal(out, tibble::tibble(a = 1:10, b = tib)) # if not named, columns from tib are auto spliced @@ -355,7 +355,7 @@ test_that("record_batch() handles data frame columns", { batch2$schema, schema(a = int32(), x = int32(), y = int32()) ) - out <- as.data.frame(batch2) + out <- as_tibble(batch2) expect_equal(out, tibble::tibble(a = 1:10, !!!tib)) }) @@ -366,7 +366,7 @@ test_that("record_batch() handles data frame columns with schema spec", { schema <- schema(a = int32(), b = struct(x = int16(), y = float64())) batch <- record_batch(a = 1:10, b = tib, schema = schema) expect_equal(batch$schema, schema) - out <- as.data.frame(batch) + out <- as_tibble(batch) expect_equal(out, tibble::tibble(a = 1:10, b = tib_float)) schema <- schema(a = int32(), b = struct(x = int16(), y = utf8())) @@ -386,7 +386,7 @@ test_that("record_batch() auto splices (ARROW-5718)", { expect_equal(batch3, batch4) expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32())) expect_equal( - as.data.frame(batch3), + as_tibble(batch3), tibble::as_tibble(cbind(df, data.frame(z = 1:10))) ) @@ -395,7 +395,7 @@ test_that("record_batch() auto splices (ARROW-5718)", { batch6 <- record_batch(!!!df, schema = s) expect_equal(batch5, batch6) expect_equal(batch5$schema, s) - expect_equal(as.data.frame(batch5), df) + expect_equal(as_tibble(batch5), df) s2 <- schema(x = float64(), y = utf8(), z = int16()) batch7 <- record_batch(df, z = 1:10, schema = s2) @@ -403,7 +403,7 @@ test_that("record_batch() auto splices (ARROW-5718)", { expect_equal(batch7, batch8) expect_equal(batch7$schema, s2) expect_equal( - as.data.frame(batch7), + as_tibble(batch7), tibble::as_tibble(cbind(df, data.frame(z = 1:10))) ) }) @@ -627,7 +627,7 @@ test_that("Handling string data with embedded nuls", { # altrep. Without it (i.e. 3.5.0 and below, the error would trigger immediately # on `as.vector()` where as with it, the error only happens on materialization) skip_on_r_older_than("3.6") - df <- as.data.frame(batch_with_nul) + df <- as_tibble(batch_with_nul) expect_error( df$b[], @@ -648,7 +648,7 @@ test_that("Handling string data with embedded nuls", { suppressWarnings( expect_warning( expect_equal( - as.data.frame(batch_with_nul)$b, + as_tibble(batch_with_nul)$b, c("person", "woman", "man", "camera", "tv"), ignore_attr = TRUE ), diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 817b645fad9..1dd1c1faf15 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -265,7 +265,7 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", { tab$schema, schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8()) ) - res <- as.data.frame(tab) + res <- as_tibble(tab) expect_equal(names(res), c("a", "b", "c", "x", "y")) expect_equal( res, @@ -280,14 +280,14 @@ test_that("table() auto splices (ARROW-5718)", { tab2 <- Table$create(!!!df) expect_equal(tab1, tab2) expect_equal(tab1$schema, schema(x = int32(), y = utf8())) - expect_equal(as.data.frame(tab1), df) + expect_equal(as_tibble(tab1), df) s <- schema(x = float64(), y = utf8()) tab3 <- Table$create(df, schema = s) tab4 <- Table$create(!!!df, schema = s) expect_equal(tab3, tab4) expect_equal(tab3$schema, s) - expect_equal(as.data.frame(tab3), df) + expect_equal(as_tibble(tab3), df) }) test_that("Validation when creating table with schema (ARROW-10953)", { @@ -366,7 +366,7 @@ test_that("Can create table with specific dictionary types", { expect_equal(sch, tab$schema) if (i != int64()) { # TODO: same downcast to int32 as we do for int64() type elsewhere - expect_identical(as.data.frame(tab), fact) + expect_identical(as_tibble(tab), fact) } } }) @@ -380,7 +380,7 @@ test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", { res <- tibble::tibble(f = factor(c("a", "c", NA), levels = c("a", "b", "c", "d"))) tab <- Table$create(b1, b2, b3, b4) - expect_identical(as.data.frame(tab), res) + expect_identical(as_tibble(tab), res) }) test_that("Table$SelectColumns()", { @@ -711,3 +711,24 @@ test_that("as_arrow_table() errors on data.frame with NULL names", { names(df) <- NULL expect_error(as_arrow_table(df), "Input data frame columns must be named") }) + +test_that("as.data.frame() on an ArrowTabular object returns a vanilla data.frame and not a tibble", { + df <- data.frame(x = 1) + out1 <- as.data.frame(arrow::arrow_table(df, name = "1")) + out2 <- as.data.frame(arrow::arrow_table(name = "1", df)) + out3 <- as.data.frame(arrow::arrow_table(df)) + + expect_s3_class(out1, "data.frame", exact = TRUE) + expect_s3_class(out2, "data.frame", exact = TRUE) + expect_s3_class(out3, "data.frame", exact = TRUE) +}) + +test_that("as_tibble.ArrowTabular retains groups", { + # calling as_tibble.default on ArrowTabular objects results in any grouping being dropped, which is why + # we need as_tibble.ArrowTabular + df <- data.frame(x = 1:4, y = c("a", "b")) + df_grouped <- dplyr::group_by(df, y) + arrow_grouped <- arrow_table(df_grouped) + expect_data_frame(arrow_grouped, df_grouped) + +}) diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R index 98face44ff9..a8de6bfc36c 100644 --- a/r/tests/testthat/test-compute-aggregate.R +++ b/r/tests/testthat/test-compute-aggregate.R @@ -368,7 +368,7 @@ test_that("is_in", { test_that("value_counts", { a <- Array$create(c(1, 4, 3, 1, 1, 3, 4)) - result_df <- tibble::tibble( + result_df <- data.frame( values = c(1, 4, 3), counts = c(3L, 2L, 2L) ) diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R index f521efeddc5..0b2f6c2e6da 100644 --- a/r/tests/testthat/test-compute-sort.R +++ b/r/tests/testthat/test-compute-sort.R @@ -141,7 +141,7 @@ test_that("Table$SortIndices()", { sort(tbl$chr, na.last = TRUE) ) expect_identical( - as.data.frame(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))), + as_tibble(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))), tbl %>% arrange(int, dbl) ) }) @@ -149,7 +149,7 @@ test_that("Table$SortIndices()", { test_that("RecordBatch$SortIndices()", { x <- record_batch(tbl) expect_identical( - as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))), + as_tibble(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))), tbl %>% arrange(desc(chr), desc(int), desc(dbl)) ) }) diff --git a/r/tests/testthat/test-dataset-csv.R b/r/tests/testthat/test-dataset-csv.R index df58f853a1e..a8cd5bfe33b 100644 --- a/r/tests/testthat/test-dataset-csv.R +++ b/r/tests/testthat/test-dataset-csv.R @@ -91,7 +91,7 @@ test_that("CSV scan options", { sb$FragmentScanOptions(options) tab <- sb$Finish()$ToTable() - expect_equal(as.data.frame(tab), tibble(chr = c("foo", NA))) + expect_equal(as_tibble(tab), tibble(chr = c("foo", NA))) # Set default convert options in CsvFileFormat csv_format <- CsvFileFormat$create( diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 91b405fc01c..06ecc3db43b 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -86,7 +86,7 @@ expect_scan_result <- function(ds, schm) { expect_r6_class(tab, "Table") expect_equal( - as.data.frame(tab), + as_tibble(tab), df1[8, c("chr", "lgl")] ) } @@ -706,7 +706,7 @@ test_that("streaming map_batches into an ExecPlan", { select(int) %>% map_batches( # as_mapper() can't handle %>%? - ~ mutate(as.data.frame(.), lets = letters[int]) + ~ mutate(as_tibble(.), lets = letters[int]) ) %>% arrange(int) %>% collect(), @@ -806,19 +806,19 @@ test_that("head/tail", { big_df <- rbind(df1, df2) # No n provided (default is 6, all from one batch) - expect_equal(as.data.frame(head(ds)), head(df1)) - expect_equal(as.data.frame(tail(ds)), tail(df2)) + expect_equal(as_tibble(head(ds)), head(df1)) + expect_equal(as_tibble(tail(ds)), tail(df2)) # n = 0: have to drop `fct` because factor levels don't come through from # arrow when there are 0 rows zero_df <- big_df[FALSE, names(big_df) != "fct"] - expect_equal(as.data.frame(head(ds, 0))[, names(ds) != "fct"], zero_df) - expect_equal(as.data.frame(tail(ds, 0))[, names(ds) != "fct"], zero_df) + expect_equal(as_tibble(head(ds, 0))[, names(ds) != "fct"], zero_df) + expect_equal(as_tibble(tail(ds, 0))[, names(ds) != "fct"], zero_df) # Two more cases: more than 1 batch, and more than nrow for (n in c(12, 1000)) { - expect_equal(as.data.frame(head(ds, n)), head(big_df, n)) - expect_equal(as.data.frame(tail(ds, n)), tail(big_df, n)) + expect_equal(as_tibble(head(ds, n)), head(big_df, n)) + expect_equal(as_tibble(tail(ds, n)), tail(big_df, n)) } expect_error(head(ds, -1)) # Not yet implemented expect_error(tail(ds, -1)) # Not yet implemented @@ -865,17 +865,17 @@ test_that("Dataset [ (take by index)", { ds <- open_dataset(dataset_dir) # Taking only from one file expect_equal( - as.data.frame(ds[c(4, 5, 9), 3:4]), + as_tibble(ds[c(4, 5, 9), 3:4]), df1[c(4, 5, 9), 3:4] ) # Taking from more than one expect_equal( - as.data.frame(ds[c(4, 5, 9, 12, 13), 3:4]), + as_tibble(ds[c(4, 5, 9, 12, 13), 3:4]), rbind(df1[c(4, 5, 9), 3:4], df2[2:3, 3:4]) ) # Taking out of order expect_equal( - as.data.frame(ds[c(4, 13, 9, 12, 5), ]), + as_tibble(ds[c(4, 13, 9, 12, 5), ]), rbind( df1[4, ], df2[3, ], @@ -890,7 +890,7 @@ test_that("Dataset [ (take by index)", { filter(int > 6) %>% select(int, lgl) expect_equal( - as.data.frame(ds2[c(2, 5), ]), + as_tibble(ds2[c(2, 5), ]), rbind( df1[8, c("int", "lgl")], df2[1, c("int", "lgl")] @@ -985,11 +985,11 @@ test_that("Scanner$ScanBatches", { ds <- open_dataset(ipc_dir, format = "feather") batches <- ds$NewScan()$Finish()$ScanBatches() table <- Table$create(!!!batches) - expect_equal(as.data.frame(table), rbind(df1, df2)) + expect_equal(as_tibble(table), rbind(df1, df2)) batches <- ds$NewScan()$Finish()$ScanBatches() table <- Table$create(!!!batches) - expect_equal(as.data.frame(table), rbind(df1, df2)) + expect_equal(as_tibble(table), rbind(df1, df2)) }) test_that("Scanner$ToRecordBatchReader()", { @@ -1002,7 +1002,7 @@ test_that("Scanner$ToRecordBatchReader()", { reader <- scan$ToRecordBatchReader() expect_r6_class(reader, "RecordBatchReader") expect_identical( - as.data.frame(reader$read_table()), + as_tibble(reader$read_table()), df1[df1$int > 6, c("int", "lgl")] ) }) @@ -1298,7 +1298,6 @@ test_that("FileSystemFactoryOptions with DirectoryPartitioning", { expect_equal( ds %>% summarize(sum(gear)) %>% - collect() %>% as.data.frame(), mtcars %>% summarize(sum(gear)) diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index 0b2b23ec860..d3c08d6856b 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -117,14 +117,14 @@ test_that("collect(as_data_frame=FALSE)", { # collect(as_data_frame = FALSE) always returns Table now expect_r6_class(b2, "Table") expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")] - expect_equal(as.data.frame(b2), expected) + expect_equal(as_tibble(b2), expected) b3 <- batch %>% select(int, strng = chr) %>% filter(int > 5) %>% collect(as_data_frame = FALSE) expect_r6_class(b3, "Table") - expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) + expect_equal(as_tibble(b3), set_names(expected, c("int", "strng"))) b4 <- batch %>% select(int, strng = chr) %>% @@ -133,7 +133,7 @@ test_that("collect(as_data_frame=FALSE)", { collect(as_data_frame = FALSE) expect_r6_class(b4, "Table") expect_equal( - as.data.frame(b4), + as_tibble(b4), expected %>% rename(strng = chr) %>% group_by(int) @@ -154,14 +154,14 @@ test_that("compute()", { expect_r6_class(b2, "Table") expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")] - expect_equal(as.data.frame(b2), expected) + expect_equal(as_tibble(b2), expected) b3 <- batch %>% select(int, strng = chr) %>% filter(int > 5) %>% compute() expect_r6_class(b3, "Table") - expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) + expect_equal(as_tibble(b3), set_names(expected, c("int", "strng"))) b4 <- batch %>% select(int, strng = chr) %>% @@ -170,7 +170,7 @@ test_that("compute()", { compute() expect_r6_class(b4, "Table") expect_equal( - as.data.frame(b4), + as_tibble(b4), expected %>% rename(strng = chr) %>% group_by(int) @@ -193,7 +193,7 @@ test_that("head", { filter(int > 5) %>% head(2) expect_s3_class(b3, "arrow_dplyr_query") - expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) + expect_equal(as_tibble(b3), set_names(expected, c("int", "strng"))) b4 <- batch %>% select(int, strng = chr) %>% @@ -202,7 +202,7 @@ test_that("head", { head(2) expect_s3_class(b4, "arrow_dplyr_query") expect_equal( - as.data.frame(b4), + as_tibble(b4), expected %>% rename(strng = chr) %>% group_by(int) @@ -268,7 +268,7 @@ test_that("tail", { expect_s3_class(b2, "arrow_dplyr_query") expected <- tail(tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")], 2) - expect_equal(as.data.frame(b2), expected) + expect_equal(as_tibble(b2), expected) b3 <- batch %>% select(int, strng = chr) %>% @@ -276,7 +276,7 @@ test_that("tail", { arrange(int) %>% tail(2) expect_s3_class(b3, "arrow_dplyr_query") - expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) + expect_equal(as_tibble(b3), set_names(expected, c("int", "strng"))) b4 <- batch %>% select(int, strng = chr) %>% @@ -286,7 +286,7 @@ test_that("tail", { tail(2) expect_s3_class(b4, "arrow_dplyr_query") expect_equal( - as.data.frame(b4), + as_tibble(b4), expected %>% rename(strng = chr) %>% group_by(int) @@ -598,7 +598,7 @@ test_that("compute() on a grouped query returns a Table with groups in metadata" compute() expect_r6_class(tab1, "Table") expect_equal( - as.data.frame(tab1), + as_tibble(tab1), tbl %>% group_by(int) ) diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 4caadc27c4b..88e890d408d 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -177,7 +177,7 @@ test_that("feather read/write round trip", { tab1 <- read_feather(feather_file, as_data_frame = FALSE) expect_r6_class(tab1, "Table") - expect_equal(tib, as.data.frame(tab1)) + expect_equal(tib, as_tibble(tab1)) }) test_that("Read feather from raw vector", { diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index 4cf8e49af1b..8c8c72b10c0 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -57,7 +57,7 @@ test_that("Table R metadata", { "$r$columns$c$columns$c1$attributes$extra_attr", fixed = TRUE ) - expect_identical(as.data.frame(tab), example_with_metadata) + expect_identical(as_tibble(tab), example_with_metadata) }) test_that("R metadata is not stored for types that map to Arrow types (factor, Date, etc.)", { @@ -94,7 +94,7 @@ test_that("Garbage R metadata doesn't break things", { tab <- Table$create(example_data[1:6]) tab$metadata$r <- "garbage" expect_warning( - expect_identical(as.data.frame(tab), example_data[1:6]), + expect_identical(as_tibble(tab), example_data[1:6]), "Invalid metadata$r", fixed = TRUE ) @@ -103,7 +103,7 @@ test_that("Garbage R metadata doesn't break things", { tab <- Table$create(example_data[1:6]) tab$metadata$r <- rawToChar(serialize("garbage", NULL, ascii = TRUE)) expect_warning( - expect_identical(as.data.frame(tab), example_data[1:6]), + expect_identical(as_tibble(tab), example_data[1:6]), "Invalid metadata$r", fixed = TRUE ) @@ -164,7 +164,7 @@ test_that("RecordBatch metadata", { }) test_that("RecordBatch R metadata", { - expect_identical(as.data.frame(record_batch(example_with_metadata)), example_with_metadata) + expect_identical(as_tibble(record_batch(example_with_metadata)), example_with_metadata) }) test_that("R metadata roundtrip via parquet", { @@ -195,7 +195,7 @@ test_that("haven types roundtrip via feather", { test_that("Date/time type roundtrip", { rb <- record_batch(example_with_times) expect_r6_class(rb$schema$posixlt$type, "VctrsExtensionType") - expect_identical(as.data.frame(rb), example_with_times) + expect_identical(as_tibble(rb), example_with_times) }) test_that("metadata keeps attribute of top level data frame", { @@ -223,7 +223,7 @@ test_that("metadata drops readr's problems attribute", { ) tab <- Table$create(readr_like) - expect_null(attr(as.data.frame(tab), "problems")) + expect_null(attr(as_tibble(tab), "problems")) }) test_that("Row-level metadata (does not by default) roundtrip", { @@ -241,8 +241,8 @@ test_that("Row-level metadata (does not by default) roundtrip", { list("arrow.preserve_row_level_metadata" = TRUE), { tab <- Table$create(df) - expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar") - expect_identical(attr(as.data.frame(tab)$x[[2]], "baz"), "qux") + expect_identical(attr(as_tibble(tab)$x[[1]], "foo"), "bar") + expect_identical(attr(as_tibble(tab)$x[[2]], "baz"), "qux") } ) }) @@ -379,15 +379,3 @@ test_that("grouped_df metadata is recorded (efficiently)", { expect_r6_class(grouped_tab, "Table") expect_equal(grouped_tab$metadata$r$attributes$.group_vars, "a") }) - -test_that("grouped_df non-arrow metadata is preserved", { - simple_tbl <- tibble(a = 1:2, b = 3:4) - attr(simple_tbl, "other_metadata") <- "look I'm still here!" - grouped <- group_by(simple_tbl, a) - grouped_tab <- arrow_table(grouped) - - expect_equal( - attributes(as.data.frame(grouped_tab))$other_metadata, - "look I'm still here!" - ) -}) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index c2d0fd1b71a..4c26d4275b6 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -43,7 +43,7 @@ test_that("na.fail on Array and ChunkedArray", { test_that("na.omit on Table", { tbl <- Table$create(example_data) expect_equal( - as.data.frame(na.omit(tbl)), + as_tibble(na.omit(tbl)), na.omit(example_data), # We don't include an attribute with the rows omitted ignore_attr = "na.action" @@ -53,7 +53,7 @@ test_that("na.omit on Table", { test_that("na.exclude on Table", { tbl <- Table$create(example_data) expect_equal( - as.data.frame(na.exclude(tbl)), + as_tibble(na.exclude(tbl)), na.exclude(example_data), ignore_attr = "na.action" ) @@ -67,7 +67,7 @@ test_that("na.fail on Table", { test_that("na.omit on RecordBatch", { batch <- record_batch(example_data) expect_equal( - as.data.frame(na.omit(batch)), + as_tibble(na.omit(batch)), na.omit(example_data), ignore_attr = "na.action" ) @@ -76,7 +76,7 @@ test_that("na.omit on RecordBatch", { test_that("na.exclude on RecordBatch", { batch <- record_batch(example_data) expect_equal( - as.data.frame(na.exclude(batch)), + as_tibble(na.exclude(batch)), na.omit(example_data), ignore_attr = "na.action" ) diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R index d2b6fd491e1..f8519fcabd3 100644 --- a/r/tests/testthat/test-python-flight.R +++ b/r/tests/testthat/test-python-flight.R @@ -53,13 +53,13 @@ if (process_is_running("demo_flight_server")) { }) test_that("flight_get", { - expect_identical(as.data.frame(flight_get(client, flight_obj)), example_data) + expect_identical(as_tibble(flight_get(client, flight_obj)), example_data) }) test_that("flight_put with RecordBatch", { flight_obj2 <- tempfile() flight_put(client, RecordBatch$create(example_data), path = flight_obj2) - expect_identical(as.data.frame(flight_get(client, flight_obj2)), example_data) + expect_identical(as_tibble(flight_get(client, flight_obj2)), example_data) }) test_that("flight_put with overwrite = FALSE", { @@ -69,7 +69,7 @@ if (process_is_running("demo_flight_server")) { ) # Default is TRUE so this will overwrite flight_put(client, example_with_times, path = flight_obj) - expect_identical(as.data.frame(flight_get(client, flight_obj)), example_with_times) + expect_identical(as_tibble(flight_get(client, flight_obj)), example_with_times) }) test_that("flight_disconnect", { diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R index 968d72119c5..87f364d6e2b 100644 --- a/r/tests/testthat/test-python.R +++ b/r/tests/testthat/test-python.R @@ -108,7 +108,7 @@ test_that("RecordBatch with metadata roundtrip", { expect_identical(rbatch$metadata, batch$metadata) expect_equal(rbatch$a, batch$a) expect_equal(rbatch[c("b", "c", "d")], batch[c("b", "c", "d")]) - expect_identical(as.data.frame(rbatch), example_with_metadata) + expect_identical(as_tibble(rbatch), example_with_metadata) }) test_that("Table with metadata roundtrip", { @@ -123,7 +123,7 @@ test_that("Table with metadata roundtrip", { expect_identical(rtab$metadata, tab$metadata) expect_equal(rtab$a, tab$a) expect_equal(rtab[c("b", "c", "d")], tab[c("b", "c", "d")]) - expect_identical(as.data.frame(rtab), example_with_metadata) + expect_identical(as_tibble(rtab), example_with_metadata) }) test_that("DataType roundtrip", { @@ -161,7 +161,7 @@ test_that("RecordBatchReader to python", { back_to_r <- reticulate::py_to_r(pytab) expect_r6_class(back_to_r, "Table") expect_identical( - as.data.frame(back_to_r), + as_tibble(back_to_r), example_data %>% select(int, lgl) %>% filter(int > 6) @@ -178,7 +178,7 @@ test_that("RecordBatchReader from python", { back_to_r <- reticulate::py_to_r(pyreader) rt_table <- back_to_r$read_table() expect_r6_class(rt_table, "Table") - expect_identical(as.data.frame(rt_table), example_data) + expect_identical(as_tibble(rt_table), example_data) scan <- Scanner$create(tab) reader <- scan$ToRecordBatchReader() diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 66f6db56d90..1bd5826090a 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -115,7 +115,7 @@ test_that("table round trip handles NA in integer and numeric", { test_that("reading/writing a raw vector (sparklyr integration)", { # These are effectively what sparklyr calls to get data to/from Spark read_from_raw_test <- function(x) { - as.data.frame(RecordBatchStreamReader$create(x)$read_next_batch()) + as_tibble(RecordBatchStreamReader$create(x)$read_next_batch()) } bytes <- write_to_raw(example_data) expect_type(bytes, "raw") diff --git a/r/tests/testthat/test-utf.R b/r/tests/testthat/test-utf.R index f7553da5b4a..38de11b7c37 100644 --- a/r/tests/testthat/test-utf.R +++ b/r/tests/testthat/test-utf.R @@ -45,12 +45,12 @@ test_that("We handle non-UTF strings", { expect_identical(as.vector(ChunkedArray$create(df)), df) # Table (including field name) - expect_identical(as.data.frame(Table$create(df)), df) - expect_identical(as.data.frame(Table$create(df_struct)), df_struct) + expect_identical(as_tibble(Table$create(df)), df) + expect_identical(as_tibble(Table$create(df_struct)), df_struct) # RecordBatch - expect_identical(as.data.frame(record_batch(df)), df) - expect_identical(as.data.frame(record_batch(df_struct)), df_struct) + expect_identical(as_tibble(record_batch(df)), df) + expect_identical(as_tibble(record_batch(df_struct)), df_struct) # Schema field name df_schema <- do.call(schema, raw_schema) @@ -59,10 +59,10 @@ test_that("We handle non-UTF strings", { df_struct_schema <- schema(a = do.call(struct, raw_schema)) # Create table/batch with schema - expect_identical(as.data.frame(Table$create(df, schema = df_schema)), df) - expect_identical(as.data.frame(Table$create(df_struct, schema = df_struct_schema)), df_struct) - expect_identical(as.data.frame(record_batch(df, schema = df_schema)), df) - expect_identical(as.data.frame(record_batch(df_struct, schema = df_struct_schema)), df_struct) + expect_identical(as_tibble(Table$create(df, schema = df_schema)), df) + expect_identical(as_tibble(Table$create(df_struct, schema = df_struct_schema)), df_struct) + expect_identical(as_tibble(record_batch(df, schema = df_schema)), df) + expect_identical(as_tibble(record_batch(df_struct, schema = df_struct_schema)), df_struct) # Serialization feather_file <- tempfile()