diff --git a/r/R/array.R b/r/R/array.R index 109f6daaa2b..3e9e0eae1ab 100644 --- a/r/R/array.R +++ b/r/R/array.R @@ -474,7 +474,7 @@ dim.StructArray <- function(x, ...) c(length(x), x$type$num_fields) #' @export as.data.frame.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) { - as.vector(x) + as.data.frame(collect.StructArray(x), row.names = row.names, optional = optional, ...) } #' @rdname array diff --git a/r/R/arrow-tabular.R b/r/R/arrow-tabular.R index ae68cc2118f..e62547d291b 100644 --- a/r/R/arrow-tabular.R +++ b/r/R/arrow-tabular.R @@ -94,7 +94,8 @@ ArrowTabular <- R6Class("ArrowTabular", #' @export as.data.frame.ArrowTabular <- function(x, row.names = NULL, optional = FALSE, ...) { df <- x$to_data_frame() - apply_arrow_r_metadata(df, x$metadata$r) + out <- apply_arrow_r_metadata(df, x$metadata$r) + as.data.frame(out, row.names = row.names, optional = optional, ...) } #' @export diff --git a/r/R/csv.R b/r/R/csv.R index 82243238662..88008bd6822 100644 --- a/r/R/csv.R +++ b/r/R/csv.R @@ -248,7 +248,7 @@ read_delim_arrow <- function(file, } if (isTRUE(as_data_frame)) { - tab <- as.data.frame(tab) + tab <- collect.ArrowTabular(tab) } tab diff --git a/r/R/dplyr-collect.R b/r/R/dplyr-collect.R index 9205a31b14f..970722e86a5 100644 --- a/r/R/dplyr-collect.R +++ b/r/R/dplyr-collect.R @@ -24,7 +24,8 @@ collect.arrow_dplyr_query <- function(x, as_data_frame = TRUE, ...) { } collect.ArrowTabular <- function(x, as_data_frame = TRUE, ...) { if (as_data_frame) { - as.data.frame(x, ...) + df <- x$to_data_frame() + apply_arrow_r_metadata(df, x$metadata$r) } else { x } @@ -34,6 +35,10 @@ collect.Dataset <- function(x, as_data_frame = TRUE, ...) { } collect.RecordBatchReader <- collect.Dataset +collect.StructArray <- function(x, row.names = NULL, optional = FALSE, ...) { + as.vector(x) +} + compute.ArrowTabular <- function(x, ...) x compute.arrow_dplyr_query <- function(x, ...) { # TODO: should this tryCatch move down into as_arrow_table()? diff --git a/r/R/dplyr.R b/r/R/dplyr.R index 54ecc80aad1..62b345e1cea 100644 --- a/r/R/dplyr.R +++ b/r/R/dplyr.R @@ -216,7 +216,8 @@ unique.RecordBatchReader <- unique.arrow_dplyr_query #' @export as.data.frame.arrow_dplyr_query <- function(x, row.names = NULL, optional = FALSE, ...) { - collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...) + out <- collect.arrow_dplyr_query(x, as_data_frame = TRUE, ...) + as.data.frame(out) } #' @export diff --git a/r/R/feather.R b/r/R/feather.R index 1488db29eb7..24971669fc5 100644 --- a/r/R/feather.R +++ b/r/R/feather.R @@ -196,7 +196,8 @@ read_feather <- function(file, col_select = NULL, as_data_frame = TRUE, mmap = T ) if (isTRUE(as_data_frame)) { - out <- as.data.frame(out) + df <- out$to_data_frame() + out <- apply_arrow_r_metadata(df, out$metadata$r) } out } diff --git a/r/R/ipc-stream.R b/r/R/ipc-stream.R index f0b4a6aae0e..71441323934 100644 --- a/r/R/ipc-stream.R +++ b/r/R/ipc-stream.R @@ -106,7 +106,7 @@ read_ipc_stream <- function(file, as_data_frame = TRUE, ...) { # https://issues.apache.org/jira/browse/ARROW-6830 out <- RecordBatchStreamReader$create(file)$read_table() if (as_data_frame) { - out <- as.data.frame(out) + out <- collect.ArrowTabular(out) } out } diff --git a/r/R/json.R b/r/R/json.R index cdbe850b32f..e8131b37f25 100644 --- a/r/R/json.R +++ b/r/R/json.R @@ -84,7 +84,7 @@ read_json_arrow <- function(file, } if (isTRUE(as_data_frame)) { - tab <- as.data.frame(tab) + tab <- collect.ArrowTabular(tab) } tab } diff --git a/r/R/metadata.R b/r/R/metadata.R index 6a54b3e3842..3ae2db4eaa7 100644 --- a/r/R/metadata.R +++ b/r/R/metadata.R @@ -22,6 +22,14 @@ # drop problems attributes (most likely from readr) x[["attributes"]][["problems"]] <- NULL + # remove the class if it's just data.frame + if (identical(x$attributes$class, "data.frame")) { + x$attributes <- x$attributes[names(x$attributes) != "class"] + if (is_empty(x$attributes)) { + x <- x[names(x) != "attributes"] + } + } + out <- serialize(x, NULL, ascii = TRUE) # if the metadata is over 100 kB, compress @@ -62,6 +70,7 @@ apply_arrow_r_metadata <- function(x, r_metadata) { expr = { columns_metadata <- r_metadata$columns if (is.data.frame(x)) { + # if columns metadata exists, apply it here if (length(names(x)) && !is.null(columns_metadata)) { for (name in intersect(names(columns_metadata), names(x))) { x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]]) diff --git a/r/R/parquet.R b/r/R/parquet.R index f3d384e8c25..1335e852192 100644 --- a/r/R/parquet.R +++ b/r/R/parquet.R @@ -70,7 +70,7 @@ read_parquet <- function(file, } if (as_data_frame) { - tab <- as.data.frame(tab) + tab <- collect.ArrowTabular(tab) } tab } diff --git a/r/tests/testthat/helper-expectation.R b/r/tests/testthat/helper-expectation.R index 303a96ead7d..090ed36aa7f 100644 --- a/r/tests/testthat/helper-expectation.R +++ b/r/tests/testthat/helper-expectation.R @@ -19,8 +19,9 @@ expect_as_vector <- function(x, y, ...) { expect_equal(as.vector(x), y, ...) } -expect_data_frame <- function(x, y, ...) { - expect_equal(as.data.frame(x), y, ...) +# expect both objects to contain equal values when converted to data.frame objects +expect_equal_data_frame <- function(x, y, ...) { + expect_equal(as.data.frame(x), as.data.frame(y), ...) } expect_r6_class <- function(object, class) { diff --git a/r/tests/testthat/test-RecordBatch.R b/r/tests/testthat/test-RecordBatch.R index 7e7084542d3..f29b75dbf40 100644 --- a/r/tests/testthat/test-RecordBatch.R +++ b/r/tests/testthat/test-RecordBatch.R @@ -89,7 +89,7 @@ test_that("RecordBatch", { schema(dbl = float64(), lgl = boolean(), chr = utf8(), fct = dictionary(int8(), utf8())) ) expect_equal(batch2$column(0), batch$column(1)) - expect_data_frame(batch2, tbl[, -1]) + expect_equal_data_frame(batch2, tbl[, -1]) # input validation expect_error(batch$RemoveColumn(NA), "'i' cannot be NA") @@ -109,10 +109,10 @@ test_that("RecordBatch S3 methods", { test_that("RecordBatch$Slice", { batch3 <- batch$Slice(5) - expect_data_frame(batch3, tbl[6:10, ]) + expect_equal_data_frame(batch3, tbl[6:10, ]) batch4 <- batch$Slice(5, 2) - expect_data_frame(batch4, tbl[6:7, ]) + expect_equal_data_frame(batch4, tbl[6:7, ]) # Input validation expect_error(batch$Slice("ten")) @@ -131,20 +131,20 @@ test_that("RecordBatch$Slice", { }) test_that("[ on RecordBatch", { - expect_data_frame(batch[6:7, ], tbl[6:7, ]) - expect_data_frame(batch[c(6, 7), ], tbl[6:7, ]) - expect_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4]) - expect_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)]) + expect_equal_data_frame(batch[6:7, ], tbl[6:7, ]) + expect_equal_data_frame(batch[c(6, 7), ], tbl[6:7, ]) + expect_equal_data_frame(batch[6:7, 2:4], tbl[6:7, 2:4]) + expect_equal_data_frame(batch[, c("dbl", "fct")], tbl[, c(2, 5)]) expect_identical(as.vector(batch[, "chr", drop = TRUE]), tbl$chr) - expect_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4]) - expect_data_frame( + expect_equal_data_frame(batch[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4]) + expect_equal_data_frame( batch[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ] ) # bool Array - expect_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ]) + expect_equal_data_frame(batch[batch$lgl, ], tbl[tbl$lgl, ]) # int Array - expect_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4]) + expect_equal_data_frame(batch[Array$create(5:6), 2:4], tbl[6:7, 2:4]) # input validation expect_error(batch[, c("dbl", "NOTACOLUMN")], 'Column not found: "NOTACOLUMN"') @@ -176,15 +176,15 @@ test_that("[[<- assignment", { # can remove a column batch[["chr"]] <- NULL - expect_data_frame(batch, tbl[-4]) + expect_equal_data_frame(batch, tbl[-4]) # can remove a column by index batch[[4]] <- NULL - expect_data_frame(batch, tbl[1:3]) + expect_equal_data_frame(batch, tbl[1:3]) # can add a named column batch[["new"]] <- letters[10:1] - expect_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1])) + expect_equal_data_frame(batch, dplyr::bind_cols(tbl[1:3], new = letters[10:1])) # can replace a column by index batch[[2]] <- as.numeric(10:1) @@ -239,16 +239,16 @@ test_that("head and tail on RecordBatch", { fct = factor(letters[1:10]) ) batch <- RecordBatch$create(tbl) - expect_data_frame(head(batch), head(tbl)) - expect_data_frame(head(batch, 4), head(tbl, 4)) - expect_data_frame(head(batch, 40), head(tbl, 40)) - expect_data_frame(head(batch, -4), head(tbl, -4)) - expect_data_frame(head(batch, -40), head(tbl, -40)) - expect_data_frame(tail(batch), tail(tbl)) - expect_data_frame(tail(batch, 4), tail(tbl, 4)) - expect_data_frame(tail(batch, 40), tail(tbl, 40)) - expect_data_frame(tail(batch, -4), tail(tbl, -4)) - expect_data_frame(tail(batch, -40), tail(tbl, -40)) + expect_equal_data_frame(head(batch), head(tbl)) + expect_equal_data_frame(head(batch, 4), head(tbl, 4)) + expect_equal_data_frame(head(batch, 40), head(tbl, 40)) + expect_equal_data_frame(head(batch, -4), head(tbl, -4)) + expect_equal_data_frame(head(batch, -40), head(tbl, -40)) + expect_equal_data_frame(tail(batch), tail(tbl)) + expect_equal_data_frame(tail(batch, 4), tail(tbl, 4)) + expect_equal_data_frame(tail(batch, 40), tail(tbl, 40)) + expect_equal_data_frame(tail(batch, -4), tail(tbl, -4)) + expect_equal_data_frame(tail(batch, -40), tail(tbl, -40)) }) test_that("RecordBatch print method", { @@ -346,8 +346,8 @@ test_that("record_batch() handles data frame columns", { b = struct(x = int32(), y = int32()) ) ) - out <- as.data.frame(batch) - expect_equal(out, tibble::tibble(a = 1:10, b = tib)) + + expect_equal_data_frame(batch, tibble::tibble(a = 1:10, b = tib)) # if not named, columns from tib are auto spliced batch2 <- record_batch(a = 1:10, tib) @@ -355,8 +355,8 @@ test_that("record_batch() handles data frame columns", { batch2$schema, schema(a = int32(), x = int32(), y = int32()) ) - out <- as.data.frame(batch2) - expect_equal(out, tibble::tibble(a = 1:10, !!!tib)) + + expect_equal_data_frame(batch2, tibble::tibble(a = 1:10, !!!tib)) }) test_that("record_batch() handles data frame columns with schema spec", { @@ -366,8 +366,7 @@ test_that("record_batch() handles data frame columns with schema spec", { schema <- schema(a = int32(), b = struct(x = int16(), y = float64())) batch <- record_batch(a = 1:10, b = tib, schema = schema) expect_equal(batch$schema, schema) - out <- as.data.frame(batch) - expect_equal(out, tibble::tibble(a = 1:10, b = tib_float)) + expect_equal_data_frame(batch, tibble::tibble(a = 1:10, b = tib_float)) schema <- schema(a = int32(), b = struct(x = int16(), y = utf8())) expect_error(record_batch(a = 1:10, b = tib, schema = schema)) @@ -379,15 +378,15 @@ test_that("record_batch() auto splices (ARROW-5718)", { batch2 <- record_batch(!!!df) expect_equal(batch1, batch2) expect_equal(batch1$schema, schema(x = int32(), y = utf8())) - expect_data_frame(batch1, df) + expect_equal_data_frame(batch1, df) batch3 <- record_batch(df, z = 1:10) batch4 <- record_batch(!!!df, z = 1:10) expect_equal(batch3, batch4) expect_equal(batch3$schema, schema(x = int32(), y = utf8(), z = int32())) - expect_equal( - as.data.frame(batch3), - tibble::as_tibble(cbind(df, data.frame(z = 1:10))) + expect_equal_data_frame( + batch3, + cbind(df, data.frame(z = 1:10)) ) s <- schema(x = float64(), y = utf8()) @@ -395,16 +394,16 @@ test_that("record_batch() auto splices (ARROW-5718)", { batch6 <- record_batch(!!!df, schema = s) expect_equal(batch5, batch6) expect_equal(batch5$schema, s) - expect_equal(as.data.frame(batch5), df) + expect_equal_data_frame(batch5, df) s2 <- schema(x = float64(), y = utf8(), z = int16()) batch7 <- record_batch(df, z = 1:10, schema = s2) batch8 <- record_batch(!!!df, z = 1:10, schema = s2) expect_equal(batch7, batch8) expect_equal(batch7$schema, s2) - expect_equal( - as.data.frame(batch7), - tibble::as_tibble(cbind(df, data.frame(z = 1:10))) + expect_equal_data_frame( + batch7, + cbind(df, data.frame(z = 1:10)) ) }) @@ -425,24 +424,24 @@ test_that("record_batch() handles null type (ARROW-7064)", { }) test_that("record_batch() scalar recycling with vectors", { - expect_data_frame( + expect_equal_data_frame( record_batch(a = 1:10, b = 5), tibble::tibble(a = 1:10, b = 5) ) }) test_that("record_batch() scalar recycling with Scalars, Arrays, and ChunkedArrays", { - expect_data_frame( + expect_equal_data_frame( record_batch(a = Array$create(1:10), b = Scalar$create(5)), tibble::tibble(a = 1:10, b = 5) ) - expect_data_frame( + expect_equal_data_frame( record_batch(a = Array$create(1:10), b = Array$create(5)), tibble::tibble(a = 1:10, b = 5) ) - expect_data_frame( + expect_equal_data_frame( record_batch(a = Array$create(1:10), b = ChunkedArray$create(5)), tibble::tibble(a = 1:10, b = 5) ) diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R index 233705323e7..ce3254a158e 100644 --- a/r/tests/testthat/test-Table.R +++ b/r/tests/testthat/test-Table.R @@ -68,26 +68,26 @@ tab <- Table$create(tbl) test_that("[, [[, $ for Table", { expect_identical(names(tab), names(tbl)) - expect_data_frame(tab[6:7, ], tbl[6:7, ]) - expect_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4]) - expect_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)]) + expect_equal_data_frame(tab[6:7, ], tbl[6:7, ]) + expect_equal_data_frame(tab[6:7, 2:4], tbl[6:7, 2:4]) + expect_equal_data_frame(tab[, c("dbl", "fct")], tbl[, c(2, 5)]) expect_as_vector(tab[, "chr", drop = TRUE], tbl$chr) # Take within a single chunk - expect_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4]) - expect_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ]) + expect_equal_data_frame(tab[c(7, 3, 5), 2:4], tbl[c(7, 3, 5), 2:4]) + expect_equal_data_frame(tab[rep(c(FALSE, TRUE), 5), ], tbl[c(2, 4, 6, 8, 10), ]) # bool ChunkedArray (with one chunk) - expect_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ]) + expect_equal_data_frame(tab[tab$lgl, ], tbl[tbl$lgl, ]) # ChunkedArray with multiple chunks c1 <- c(TRUE, FALSE, TRUE, TRUE, FALSE) c2 <- c(FALSE, FALSE, TRUE, TRUE, FALSE) ca <- ChunkedArray$create(c1, c2) - expect_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ]) + expect_equal_data_frame(tab[ca, ], tbl[c(1, 3, 4, 8, 9), ]) # int Array - expect_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4]) + expect_equal_data_frame(tab[Array$create(5:6), 2:4], tbl[6:7, 2:4]) # ChunkedArray - expect_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4]) + expect_equal_data_frame(tab[ChunkedArray$create(5L, 6L), 2:4], tbl[6:7, 2:4]) # Expression - expect_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ]) + expect_equal_data_frame(tab[tab$int > 6, ], tbl[tbl$int > 6, ]) expect_as_vector(tab[["int"]], tbl$int) expect_as_vector(tab$int, tbl$int) @@ -95,9 +95,9 @@ test_that("[, [[, $ for Table", { expect_null(tab$qwerty) expect_null(tab[["asdf"]]) # List-like column slicing - expect_data_frame(tab[2:4], tbl[2:4]) - expect_data_frame(tab[c(2, 1)], tbl[c(2, 1)]) - expect_data_frame(tab[-3], tbl[-3]) + expect_equal_data_frame(tab[2:4], tbl[2:4]) + expect_equal_data_frame(tab[c(2, 1)], tbl[c(2, 1)]) + expect_equal_data_frame(tab[-3], tbl[-3]) expect_error(tab[[c(4, 3)]]) expect_error(tab[[NA]], "'i' must be character or numeric, not logical") @@ -112,21 +112,21 @@ test_that("[, [[, $ for Table", { expect_error(tab[, c(6, NA)], "Column indices cannot be NA") skip("Table with 0 cols doesn't know how many rows it should have") - expect_data_frame(tab[0], tbl[0]) + expect_equal_data_frame(tab[0], tbl[0]) }) test_that("[[<- assignment", { # can remove a column tab[["chr"]] <- NULL - expect_data_frame(tab, tbl[-4]) + expect_equal_data_frame(tab, tbl[-4]) # can remove a column by index tab[[4]] <- NULL - expect_data_frame(tab, tbl[1:3]) + expect_equal_data_frame(tab, tbl[1:3]) # can add a named column tab[["new"]] <- letters[10:1] - expect_data_frame(tab, dplyr::bind_cols(tbl[1:3], new = letters[10:1])) + expect_equal_data_frame(tab, dplyr::bind_cols(tbl[1:3], new = letters[10:1])) # can replace a column by index tab[[2]] <- as.numeric(10:1) @@ -177,10 +177,10 @@ test_that("[[<- assignment", { test_that("Table$Slice", { tab2 <- tab$Slice(5) - expect_data_frame(tab2, tbl[6:10, ]) + expect_equal_data_frame(tab2, tbl[6:10, ]) tab3 <- tab$Slice(5, 2) - expect_data_frame(tab3, tbl[6:7, ]) + expect_equal_data_frame(tab3, tbl[6:7, ]) # Input validation expect_error(tab$Slice("ten")) @@ -199,16 +199,16 @@ test_that("Table$Slice", { }) test_that("head and tail on Table", { - expect_data_frame(head(tab), head(tbl)) - expect_data_frame(head(tab, 4), head(tbl, 4)) - expect_data_frame(head(tab, 40), head(tbl, 40)) - expect_data_frame(head(tab, -4), head(tbl, -4)) - expect_data_frame(head(tab, -40), head(tbl, -40)) - expect_data_frame(tail(tab), tail(tbl)) - expect_data_frame(tail(tab, 4), tail(tbl, 4)) - expect_data_frame(tail(tab, 40), tail(tbl, 40)) - expect_data_frame(tail(tab, -4), tail(tbl, -4)) - expect_data_frame(tail(tab, -40), tail(tbl, -40)) + expect_equal_data_frame(head(tab), head(tbl)) + expect_equal_data_frame(head(tab, 4), head(tbl, 4)) + expect_equal_data_frame(head(tab, 40), head(tbl, 40)) + expect_equal_data_frame(head(tab, -4), head(tbl, -4)) + expect_equal_data_frame(head(tab, -40), head(tbl, -40)) + expect_equal_data_frame(tail(tab), tail(tbl)) + expect_equal_data_frame(tail(tab, 4), tail(tbl, 4)) + expect_equal_data_frame(tail(tab, 40), tail(tbl, 40)) + expect_equal_data_frame(tail(tab, -4), tail(tbl, -4)) + expect_equal_data_frame(tail(tab, -40), tail(tbl, -40)) }) test_that("Table print method", { @@ -265,10 +265,9 @@ test_that("table() handles ... of arrays, chunked arrays, vectors", { tab$schema, schema(a = int32(), b = int32(), c = float64(), x = int32(), y = utf8()) ) - res <- as.data.frame(tab) - expect_equal(names(res), c("a", "b", "c", "x", "y")) - expect_equal( - res, + + expect_equal_data_frame( + tab, tibble::tibble(a = 1:10, b = 1:10, c = v, x = 1:10, y = letters[1:10]) ) }) @@ -280,14 +279,14 @@ test_that("table() auto splices (ARROW-5718)", { tab2 <- Table$create(!!!df) expect_equal(tab1, tab2) expect_equal(tab1$schema, schema(x = int32(), y = utf8())) - expect_equal(as.data.frame(tab1), df) + expect_equal_data_frame(tab1, df) s <- schema(x = float64(), y = utf8()) tab3 <- Table$create(df, schema = s) tab4 <- Table$create(!!!df, schema = s) expect_equal(tab3, tab4) expect_equal(tab3$schema, s) - expect_equal(as.data.frame(tab3), df) + expect_equal_data_frame(tab3, df) }) test_that("Validation when creating table with schema (ARROW-10953)", { @@ -366,7 +365,7 @@ test_that("Can create table with specific dictionary types", { expect_equal(sch, tab$schema) if (i != int64()) { # TODO: same downcast to int32 as we do for int64() type elsewhere - expect_identical(as.data.frame(tab), fact) + expect_equal_data_frame(tab, fact) } } }) @@ -380,7 +379,7 @@ test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", { res <- tibble::tibble(f = factor(c("a", "c", NA), levels = c("a", "b", "c", "d"))) tab <- Table$create(b1, b2, b3, b4) - expect_identical(as.data.frame(tab), res) + expect_equal_data_frame(tab, res) }) test_that("Table$SelectColumns()", { @@ -410,24 +409,24 @@ test_that("Table$create() with different length columns", { }) test_that("Table$create() scalar recycling with vectors", { - expect_data_frame( + expect_equal_data_frame( Table$create(a = 1:10, b = 5), tibble::tibble(a = 1:10, b = 5) ) }) test_that("Table$create() scalar recycling with Scalars, Arrays, and ChunkedArrays", { - expect_data_frame( + expect_equal_data_frame( Table$create(a = Array$create(1:10), b = Scalar$create(5)), tibble::tibble(a = 1:10, b = 5) ) - expect_data_frame( + expect_equal_data_frame( Table$create(a = Array$create(1:10), b = Array$create(5)), tibble::tibble(a = 1:10, b = 5) ) - expect_data_frame( + expect_equal_data_frame( Table$create(a = Array$create(1:10), b = ChunkedArray$create(5)), tibble::tibble(a = 1:10, b = 5) ) @@ -712,21 +711,32 @@ test_that("as_arrow_table() errors on data.frame with NULL names", { expect_error(as_arrow_table(df), "Input data frame columns must be named") }) -test_that("we only preserve metadata of input to arrow_table when passed a single data.frame", { - # data.frame in, data.frame out +test_that("# GH-35038 - passing in multiple arguments doesn't affect return type", { + + df <- data.frame(x = 1) + out1 <- as.data.frame(arrow_table(df, name = "1")) + out2 <- as.data.frame(arrow_table(name = "1", df)) + + expect_s3_class(out1, c("data.frame"), exact = TRUE) + expect_s3_class(out2, c("data.frame"), exact = TRUE) +}) + +test_that("as.data.frame() on ArrowTabular objects returns a base R data.frame regardless of input type", { df <- data.frame(x = 1) out1 <- as.data.frame(arrow_table(df)) expect_s3_class(out1, "data.frame", exact = TRUE) - # tibble in, tibble out tib <- tibble::tibble(x = 1) out2 <- as.data.frame(arrow_table(tib)) - expect_s3_class(out2, c("tbl_df", "tbl", "data.frame"), exact = TRUE) + expect_s3_class(out2, "data.frame", exact = TRUE) +}) - # GH-35038 - passing in multiple arguments doesn't affect return type - out3 <- as.data.frame(arrow_table(df, name = "1")) - out4 <- as.data.frame(arrow_table(name = "1", df)) +test_that("collect() on ArrowTabular objects returns a tibble regardless of input type", { + df <- data.frame(x = 1) + out1 <- dplyr::collect(arrow_table(df)) + expect_s3_class(out1, c("tbl_df", "tbl", "data.frame"), exact = TRUE) - expect_s3_class(out3, c("tbl_df", "tbl", "data.frame"), exact = TRUE) - expect_s3_class(out4, c("tbl_df", "tbl", "data.frame"), exact = TRUE) + tib <- tibble::tibble(x = 1) + out2 <- dplyr::collect(arrow_table(tib)) + expect_s3_class(out2, c("tbl_df", "tbl", "data.frame"), exact = TRUE) }) diff --git a/r/tests/testthat/test-compute-aggregate.R b/r/tests/testthat/test-compute-aggregate.R index 98face44ff9..2732cdef3ed 100644 --- a/r/tests/testthat/test-compute-aggregate.R +++ b/r/tests/testthat/test-compute-aggregate.R @@ -377,7 +377,7 @@ test_that("value_counts", { type = struct(values = float64(), counts = int64()) ) expect_equal(value_counts(a), result) - expect_identical(as.data.frame(value_counts(a)), result_df) + expect_equal_data_frame(value_counts(a), result_df) expect_identical(as.vector(value_counts(a)$counts), result_df$counts) }) diff --git a/r/tests/testthat/test-compute-sort.R b/r/tests/testthat/test-compute-sort.R index ba3039c3313..b8c8482d642 100644 --- a/r/tests/testthat/test-compute-sort.R +++ b/r/tests/testthat/test-compute-sort.R @@ -145,16 +145,16 @@ test_that("Table$SortIndices()", { as.vector(x$Take(x$SortIndices("chr"))$chr), sort(tbl$chr, na.last = TRUE) ) - expect_identical( - as.data.frame(x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE)))), + expect_equal_data_frame( + x$Take(x$SortIndices(c("int", "dbl"), c(FALSE, FALSE))), tbl %>% arrange(int, dbl) ) }) test_that("RecordBatch$SortIndices()", { x <- record_batch(tbl) - expect_identical( - as.data.frame(x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE))), + expect_equal_data_frame( + x$Take(x$SortIndices(c("chr", "int", "dbl"), TRUE)), tbl %>% arrange(desc(chr), desc(int), desc(dbl)) ) }) diff --git a/r/tests/testthat/test-dataset-csv.R b/r/tests/testthat/test-dataset-csv.R index df58f853a1e..98858a7d166 100644 --- a/r/tests/testthat/test-dataset-csv.R +++ b/r/tests/testthat/test-dataset-csv.R @@ -91,7 +91,7 @@ test_that("CSV scan options", { sb$FragmentScanOptions(options) tab <- sb$Finish()$ToTable() - expect_equal(as.data.frame(tab), tibble(chr = c("foo", NA))) + expect_equal_data_frame(tab, data.frame(chr = c("foo", NA))) # Set default convert options in CsvFileFormat csv_format <- CsvFileFormat$create( diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 91b405fc01c..b9972901a70 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -85,8 +85,8 @@ expect_scan_result <- function(ds, schm) { tab <- scn$ToTable() expect_r6_class(tab, "Table") - expect_equal( - as.data.frame(tab), + expect_equal_data_frame( + tab, df1[8, c("chr", "lgl")] ) } @@ -806,19 +806,19 @@ test_that("head/tail", { big_df <- rbind(df1, df2) # No n provided (default is 6, all from one batch) - expect_equal(as.data.frame(head(ds)), head(df1)) - expect_equal(as.data.frame(tail(ds)), tail(df2)) + expect_equal_data_frame(head(ds), head(df1)) + expect_equal_data_frame(tail(ds), tail(df2)) # n = 0: have to drop `fct` because factor levels don't come through from # arrow when there are 0 rows zero_df <- big_df[FALSE, names(big_df) != "fct"] - expect_equal(as.data.frame(head(ds, 0))[, names(ds) != "fct"], zero_df) - expect_equal(as.data.frame(tail(ds, 0))[, names(ds) != "fct"], zero_df) + expect_equal_data_frame(as.data.frame(head(ds, 0))[, names(ds) != "fct"], zero_df) + expect_equal_data_frame(as.data.frame(tail(ds, 0))[, names(ds) != "fct"], zero_df) # Two more cases: more than 1 batch, and more than nrow for (n in c(12, 1000)) { - expect_equal(as.data.frame(head(ds, n)), head(big_df, n)) - expect_equal(as.data.frame(tail(ds, n)), tail(big_df, n)) + expect_equal_data_frame(head(ds, n), head(big_df, n)) + expect_equal_data_frame(tail(ds, n), tail(big_df, n)) } expect_error(head(ds, -1)) # Not yet implemented expect_error(tail(ds, -1)) # Not yet implemented @@ -864,18 +864,18 @@ test_that("unique()", { test_that("Dataset [ (take by index)", { ds <- open_dataset(dataset_dir) # Taking only from one file - expect_equal( - as.data.frame(ds[c(4, 5, 9), 3:4]), + expect_equal_data_frame( + ds[c(4, 5, 9), 3:4], df1[c(4, 5, 9), 3:4] ) # Taking from more than one - expect_equal( - as.data.frame(ds[c(4, 5, 9, 12, 13), 3:4]), + expect_equal_data_frame( + ds[c(4, 5, 9, 12, 13), 3:4], rbind(df1[c(4, 5, 9), 3:4], df2[2:3, 3:4]) ) # Taking out of order - expect_equal( - as.data.frame(ds[c(4, 13, 9, 12, 5), ]), + expect_equal_data_frame( + ds[c(4, 13, 9, 12, 5), ], rbind( df1[4, ], df2[3, ], @@ -889,8 +889,8 @@ test_that("Dataset [ (take by index)", { ds2 <- ds %>% filter(int > 6) %>% select(int, lgl) - expect_equal( - as.data.frame(ds2[c(2, 5), ]), + expect_equal_data_frame( + ds2[c(2, 5), ], rbind( df1[8, c("int", "lgl")], df2[1, c("int", "lgl")] @@ -957,7 +957,9 @@ test_that("Can delete filesystem dataset files after collection", { write_dataset(ds0, dataset_dir2) ds <- open_dataset(dataset_dir2) - collected <- ds %>% arrange(int) %>% collect() + collected <- ds %>% + arrange(int) %>% + collect() unlink(dataset_dir2, recursive = TRUE) expect_false(dir.exists(dataset_dir2)) @@ -971,7 +973,11 @@ test_that("Can delete filesystem dataset files after collection", { # dataset write_dataset(ds0, dataset_dir2) ds <- open_dataset(dataset_dir2) - collected <- ds %>% arrange(int) %>% head() %>% arrange(int) %>% collect() + collected <- ds %>% + arrange(int) %>% + head() %>% + arrange(int) %>% + collect() unlink(dataset_dir2, recursive = TRUE) expect_false(dir.exists(dataset_dir2)) @@ -985,11 +991,11 @@ test_that("Scanner$ScanBatches", { ds <- open_dataset(ipc_dir, format = "feather") batches <- ds$NewScan()$Finish()$ScanBatches() table <- Table$create(!!!batches) - expect_equal(as.data.frame(table), rbind(df1, df2)) + expect_equal_data_frame(table, rbind(df1, df2)) batches <- ds$NewScan()$Finish()$ScanBatches() table <- Table$create(!!!batches) - expect_equal(as.data.frame(table), rbind(df1, df2)) + expect_equal_data_frame(table, rbind(df1, df2)) }) test_that("Scanner$ToRecordBatchReader()", { @@ -1001,8 +1007,8 @@ test_that("Scanner$ToRecordBatchReader()", { Scanner$create() reader <- scan$ToRecordBatchReader() expect_r6_class(reader, "RecordBatchReader") - expect_identical( - as.data.frame(reader$read_table()), + expect_equal_data_frame( + reader$read_table(), df1[df1$int > 6, c("int", "lgl")] ) }) diff --git a/r/tests/testthat/test-dplyr-funcs-datetime.R b/r/tests/testthat/test-dplyr-funcs-datetime.R index d47f9232111..d59356ad659 100644 --- a/r/tests/testthat/test-dplyr-funcs-datetime.R +++ b/r/tests/testthat/test-dplyr-funcs-datetime.R @@ -310,7 +310,7 @@ test_that("timestamp round trip correctly via strftime and strptime", { fmt2 <- paste(base_format2, fmt) fmt <- paste(base_format, paste0("%", fmt)) test_df <- tibble::tibble(x = strftime(times, format = fmt)) - expect_equal( + expect_equal_data_frame( test_df %>% arrow_table() %>% mutate(!!fmt := strptime(x, format = fmt2)) %>% @@ -1028,7 +1028,7 @@ test_that("leap_year mirror lubridate", { .input %>% mutate(x = leap_year(test_year)) %>% collect(), - data.frame( + tibble::tibble( test_year = as.Date(c( "1998-01-01", # not leap year "1996-01-01", # leap year (divide by 4 rule) @@ -1048,7 +1048,9 @@ test_that("am/pm mirror lubridate", { am2 = lubridate::am(test_time), pm2 = lubridate::pm(test_time) ) %>% - collect(), + # can't use collect() here due to how tibbles store datetimes + # TODO: add better explanation above + as.data.frame(), data.frame( test_time = strptime( x = c( diff --git a/r/tests/testthat/test-dplyr-query.R b/r/tests/testthat/test-dplyr-query.R index e478d0e4c40..bab81a463e9 100644 --- a/r/tests/testthat/test-dplyr-query.R +++ b/r/tests/testthat/test-dplyr-query.R @@ -119,14 +119,14 @@ test_that("collect(as_data_frame=FALSE)", { # collect(as_data_frame = FALSE) always returns Table now expect_r6_class(b2, "Table") expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")] - expect_equal(as.data.frame(b2), expected) + expect_equal_data_frame(b2, expected) b3 <- batch %>% select(int, strng = chr) %>% filter(int > 5) %>% collect(as_data_frame = FALSE) expect_r6_class(b3, "Table") - expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) + expect_equal_data_frame(b3, set_names(expected, c("int", "strng"))) b4 <- batch %>% select(int, strng = chr) %>% @@ -134,8 +134,8 @@ test_that("collect(as_data_frame=FALSE)", { group_by(int) %>% collect(as_data_frame = FALSE) expect_r6_class(b4, "Table") - expect_equal( - as.data.frame(b4), + expect_equal_data_frame( + b4, expected %>% rename(strng = chr) %>% group_by(int) @@ -156,14 +156,14 @@ test_that("compute()", { expect_r6_class(b2, "Table") expected <- tbl[tbl$int > 5 & !is.na(tbl$int), c("int", "chr")] - expect_equal(as.data.frame(b2), expected) + expect_equal_data_frame(b2, expected) b3 <- batch %>% select(int, strng = chr) %>% filter(int > 5) %>% compute() expect_r6_class(b3, "Table") - expect_equal(as.data.frame(b3), set_names(expected, c("int", "strng"))) + expect_equal_data_frame(b3, set_names(expected, c("int", "strng"))) b4 <- batch %>% select(int, strng = chr) %>% @@ -171,8 +171,8 @@ test_that("compute()", { group_by(int) %>% compute() expect_r6_class(b4, "Table") - expect_equal( - as.data.frame(b4), + expect_equal_data_frame( + b4, expected %>% rename(strng = chr) %>% group_by(int) @@ -210,8 +210,7 @@ test_that("arrange then head returns the right data (ARROW-14162)", { arrange(mpg, disp) %>% head(4) %>% collect(), - mtcars, - ignore_attr = "row.names" + tibble::as_tibble(mtcars) ) }) @@ -222,8 +221,7 @@ test_that("arrange then tail returns the right data", { arrange(mpg, disp) %>% tail(4) %>% collect(), - mtcars, - ignore_attr = "row.names" + tibble::as_tibble(mtcars) ) }) @@ -559,8 +557,8 @@ test_that("compute() on a grouped query returns a Table with groups in metadata" group_by(int) %>% compute() expect_r6_class(tab1, "Table") - expect_equal( - as.data.frame(tab1), + expect_equal_data_frame( + tab1, tbl %>% group_by(int) ) diff --git a/r/tests/testthat/test-duckdb.R b/r/tests/testthat/test-duckdb.R index 24e8cadf2e3..409e99b70fb 100644 --- a/r/tests/testthat/test-duckdb.R +++ b/r/tests/testthat/test-duckdb.R @@ -168,7 +168,8 @@ test_that("to_arrow roundtrip, with dataset", { filter(int > 5 & part > 1) %>% mutate(dbl_plus = dbl + 1) %>% collect() %>% - arrange(part, int) + arrange(part, int) %>% + as.data.frame() ) }) diff --git a/r/tests/testthat/test-feather.R b/r/tests/testthat/test-feather.R index 4caadc27c4b..188a562fe81 100644 --- a/r/tests/testthat/test-feather.R +++ b/r/tests/testthat/test-feather.R @@ -177,7 +177,7 @@ test_that("feather read/write round trip", { tab1 <- read_feather(feather_file, as_data_frame = FALSE) expect_r6_class(tab1, "Table") - expect_equal(tib, as.data.frame(tab1)) + expect_equal_data_frame(tib, tab1) }) test_that("Read feather from raw vector", { diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index 4cf8e49af1b..e44cd710380 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -57,7 +57,7 @@ test_that("Table R metadata", { "$r$columns$c$columns$c1$attributes$extra_attr", fixed = TRUE ) - expect_identical(as.data.frame(tab), example_with_metadata) + expect_equal_data_frame(tab, example_with_metadata) }) test_that("R metadata is not stored for types that map to Arrow types (factor, Date, etc.)", { @@ -94,7 +94,7 @@ test_that("Garbage R metadata doesn't break things", { tab <- Table$create(example_data[1:6]) tab$metadata$r <- "garbage" expect_warning( - expect_identical(as.data.frame(tab), example_data[1:6]), + as.data.frame(tab), "Invalid metadata$r", fixed = TRUE ) @@ -103,7 +103,7 @@ test_that("Garbage R metadata doesn't break things", { tab <- Table$create(example_data[1:6]) tab$metadata$r <- rawToChar(serialize("garbage", NULL, ascii = TRUE)) expect_warning( - expect_identical(as.data.frame(tab), example_data[1:6]), + as.data.frame(tab), "Invalid metadata$r", fixed = TRUE ) @@ -164,7 +164,7 @@ test_that("RecordBatch metadata", { }) test_that("RecordBatch R metadata", { - expect_identical(as.data.frame(record_batch(example_with_metadata)), example_with_metadata) + expect_equal_data_frame(record_batch(example_with_metadata), example_with_metadata) }) test_that("R metadata roundtrip via parquet", { @@ -195,14 +195,14 @@ test_that("haven types roundtrip via feather", { test_that("Date/time type roundtrip", { rb <- record_batch(example_with_times) expect_r6_class(rb$schema$posixlt$type, "VctrsExtensionType") - expect_identical(as.data.frame(rb), example_with_times) + expect_equal_data_frame(rb, example_with_times) }) test_that("metadata keeps attribute of top level data frame", { df <- structure(data.frame(x = 1, y = 2), foo = "bar") tab <- Table$create(df) expect_identical(attr(as.data.frame(tab), "foo"), "bar") - expect_identical(as.data.frame(tab), df) + expect_equal_data_frame(tab, df) }) @@ -387,7 +387,18 @@ test_that("grouped_df non-arrow metadata is preserved", { grouped_tab <- arrow_table(grouped) expect_equal( - attributes(as.data.frame(grouped_tab))$other_metadata, + attributes(collect.ArrowTabular(grouped_tab))$other_metadata, "look I'm still here!" ) }) + +test_that("data.frame class attribute is not saved", { + df <- data.frame(x = 1:5) + df_arrow <- arrow_table(df) + expect_null(df_arrow$r_metadata$attributes) + + df <- data.frame(x = 1:5) + attributes(df)$foo <- "bar" + df_arrow <- arrow_table(df) + expect_identical(df_arrow$r_metadata, list(attributes = list(foo = "bar"), columns = list(x = NULL))) +}) diff --git a/r/tests/testthat/test-na-omit.R b/r/tests/testthat/test-na-omit.R index c2d0fd1b71a..cfc71445d44 100644 --- a/r/tests/testthat/test-na-omit.R +++ b/r/tests/testthat/test-na-omit.R @@ -42,8 +42,8 @@ test_that("na.fail on Array and ChunkedArray", { test_that("na.omit on Table", { tbl <- Table$create(example_data) - expect_equal( - as.data.frame(na.omit(tbl)), + expect_equal_data_frame( + na.omit(tbl), na.omit(example_data), # We don't include an attribute with the rows omitted ignore_attr = "na.action" @@ -52,8 +52,8 @@ test_that("na.omit on Table", { test_that("na.exclude on Table", { tbl <- Table$create(example_data) - expect_equal( - as.data.frame(na.exclude(tbl)), + expect_equal_data_frame( + na.exclude(tbl), na.exclude(example_data), ignore_attr = "na.action" ) @@ -66,8 +66,8 @@ test_that("na.fail on Table", { test_that("na.omit on RecordBatch", { batch <- record_batch(example_data) - expect_equal( - as.data.frame(na.omit(batch)), + expect_equal_data_frame( + na.omit(batch), na.omit(example_data), ignore_attr = "na.action" ) @@ -75,8 +75,8 @@ test_that("na.omit on RecordBatch", { test_that("na.exclude on RecordBatch", { batch <- record_batch(example_data) - expect_equal( - as.data.frame(na.exclude(batch)), + expect_equal_data_frame( + na.exclude(batch), na.omit(example_data), ignore_attr = "na.action" ) diff --git a/r/tests/testthat/test-python-flight.R b/r/tests/testthat/test-python-flight.R index d2b6fd491e1..2e60957d2ff 100644 --- a/r/tests/testthat/test-python-flight.R +++ b/r/tests/testthat/test-python-flight.R @@ -53,13 +53,13 @@ if (process_is_running("demo_flight_server")) { }) test_that("flight_get", { - expect_identical(as.data.frame(flight_get(client, flight_obj)), example_data) + expect_equal_data_frame(flight_get(client, flight_obj), example_data) }) test_that("flight_put with RecordBatch", { flight_obj2 <- tempfile() flight_put(client, RecordBatch$create(example_data), path = flight_obj2) - expect_identical(as.data.frame(flight_get(client, flight_obj2)), example_data) + expect_equal_data_frame(flight_get(client, flight_obj2), example_data) }) test_that("flight_put with overwrite = FALSE", { @@ -69,7 +69,7 @@ if (process_is_running("demo_flight_server")) { ) # Default is TRUE so this will overwrite flight_put(client, example_with_times, path = flight_obj) - expect_identical(as.data.frame(flight_get(client, flight_obj)), example_with_times) + expect_equal_data_frame(flight_get(client, flight_obj), example_with_times) }) test_that("flight_disconnect", { diff --git a/r/tests/testthat/test-python.R b/r/tests/testthat/test-python.R index 968d72119c5..58eb84f5d65 100644 --- a/r/tests/testthat/test-python.R +++ b/r/tests/testthat/test-python.R @@ -108,7 +108,7 @@ test_that("RecordBatch with metadata roundtrip", { expect_identical(rbatch$metadata, batch$metadata) expect_equal(rbatch$a, batch$a) expect_equal(rbatch[c("b", "c", "d")], batch[c("b", "c", "d")]) - expect_identical(as.data.frame(rbatch), example_with_metadata) + expect_equal_data_frame(rbatch, example_with_metadata) }) test_that("Table with metadata roundtrip", { @@ -123,7 +123,7 @@ test_that("Table with metadata roundtrip", { expect_identical(rtab$metadata, tab$metadata) expect_equal(rtab$a, tab$a) expect_equal(rtab[c("b", "c", "d")], tab[c("b", "c", "d")]) - expect_identical(as.data.frame(rtab), example_with_metadata) + expect_equal_data_frame(rtab, example_with_metadata) }) test_that("DataType roundtrip", { @@ -160,8 +160,8 @@ test_that("RecordBatchReader to python", { expect_s3_class(pytab, "pyarrow.lib.Table") back_to_r <- reticulate::py_to_r(pytab) expect_r6_class(back_to_r, "Table") - expect_identical( - as.data.frame(back_to_r), + expect_equal_data_frame( + back_to_r, example_data %>% select(int, lgl) %>% filter(int > 6) @@ -178,7 +178,7 @@ test_that("RecordBatchReader from python", { back_to_r <- reticulate::py_to_r(pyreader) rt_table <- back_to_r$read_table() expect_r6_class(rt_table, "Table") - expect_identical(as.data.frame(rt_table), example_data) + expect_equal_data_frame(rt_table, example_data) scan <- Scanner$create(tab) reader <- scan$ToRecordBatchReader() diff --git a/r/tests/testthat/test-read-write.R b/r/tests/testthat/test-read-write.R index 66f6db56d90..9475788ee5a 100644 --- a/r/tests/testthat/test-read-write.R +++ b/r/tests/testthat/test-read-write.R @@ -119,7 +119,7 @@ test_that("reading/writing a raw vector (sparklyr integration)", { } bytes <- write_to_raw(example_data) expect_type(bytes, "raw") - expect_identical(read_from_raw_test(bytes), example_data) + expect_equal_data_frame(read_from_raw_test(bytes), example_data) # this could just be `read_ipc_stream(x)`; propose that - expect_identical(read_ipc_stream(bytes), example_data) + expect_equal_data_frame(read_ipc_stream(bytes), example_data) }) diff --git a/r/tests/testthat/test-utf.R b/r/tests/testthat/test-utf.R index f7553da5b4a..660b2a4784f 100644 --- a/r/tests/testthat/test-utf.R +++ b/r/tests/testthat/test-utf.R @@ -45,24 +45,24 @@ test_that("We handle non-UTF strings", { expect_identical(as.vector(ChunkedArray$create(df)), df) # Table (including field name) - expect_identical(as.data.frame(Table$create(df)), df) - expect_identical(as.data.frame(Table$create(df_struct)), df_struct) + expect_equal_data_frame(Table$create(df), df) + expect_equal_data_frame(Table$create(df_struct), df_struct) # RecordBatch - expect_identical(as.data.frame(record_batch(df)), df) - expect_identical(as.data.frame(record_batch(df_struct)), df_struct) + expect_equal_data_frame(record_batch(df), df) + expect_equal_data_frame(record_batch(df_struct), df_struct) # Schema field name - df_schema <- do.call(schema, raw_schema) + df_schema <- schema(raw_schema) expect_identical(names(df_schema), names(df)) df_struct_schema <- schema(a = do.call(struct, raw_schema)) # Create table/batch with schema - expect_identical(as.data.frame(Table$create(df, schema = df_schema)), df) - expect_identical(as.data.frame(Table$create(df_struct, schema = df_struct_schema)), df_struct) - expect_identical(as.data.frame(record_batch(df, schema = df_schema)), df) - expect_identical(as.data.frame(record_batch(df_struct, schema = df_struct_schema)), df_struct) + expect_equal_data_frame(Table$create(df, schema = df_schema), df) + expect_equal_data_frame(Table$create(df_struct, schema = df_struct_schema), df_struct) + expect_equal_data_frame(record_batch(df, schema = df_schema), df) + expect_equal_data_frame(record_batch(df_struct, schema = df_struct_schema), df_struct) # Serialization feather_file <- tempfile()