From 721a9dc29a63c63882a5d9fd1787e570ef9079a4 Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Mon, 26 Apr 2021 14:37:17 -0500 Subject: [PATCH 1/2] Failing test --- r/tests/testthat/test-dataset.R | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 4570c1f5762..961ab1a263c 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -1784,3 +1784,31 @@ test_that("Collecting zero columns from a dataset doesn't return entire dataset" c(32, 0) ) }) + +test_that("metadata of list elements (ARROW-12542)", { + skip_if_not_available("parquet") + df <- tibble::tibble( + part = 1:10, + a = rep(1:2, 5), + x = rep(list(structure(1, foo = "bar"), structure(2, baz = "qux")), 5) + ) + + # write the dataset, and open it + tmp <- tempfile() + write_dataset(df, tmp, partitioning = "part", format = "parquet") + ds <- open_dataset(tmp) + + # arrange is necesary here because of the collation order for + expect_equal( + arrange(df, part, a), + collect(arrange(df, part, a)) + ) + expect_identical(attr(collect(ds)$x[[1]], "foo"), "bar") + expect_identical(attr(collect(ds)$x[[2]], "baz"), "qux") + + expect_equal( + arrange(filter(df, a == 2), part, a), + arrange(collect(filter(ds, a == 2)), part, a) + ) + expect_identical(attr(collect(filter(ds, a == 2))$x[[1]], "baz"), "qux") +}) From ed090a29ae7f25211791bde4f12c1a1f0fe57e3d Mon Sep 17 00:00:00 2001 From: Jonathan Keane Date: Tue, 27 Apr 2021 11:51:56 -0500 Subject: [PATCH 2/2] spelling --- r/tests/testthat/test-dataset.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-dataset.R b/r/tests/testthat/test-dataset.R index 961ab1a263c..88d5bd2858a 100644 --- a/r/tests/testthat/test-dataset.R +++ b/r/tests/testthat/test-dataset.R @@ -1798,7 +1798,7 @@ test_that("metadata of list elements (ARROW-12542)", { write_dataset(df, tmp, partitioning = "part", format = "parquet") ds <- open_dataset(tmp) - # arrange is necesary here because of the collation order for + # arrange is necessary here because of the collation order for expect_equal( arrange(df, part, a), collect(arrange(df, part, a))