From 4c413c39dad6fe703bbc519f6332e3496dff091c Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 29 Oct 2020 12:17:28 +0100 Subject: [PATCH 1/2] store metadata for each element of a list column too, not just the list itself. ARROW-10386. --- r/R/record-batch.R | 20 ++++++++++++++------ r/R/table.R | 19 ++++++++++++++++--- r/tests/testthat/test-metadata.R | 7 +++++++ 3 files changed, 37 insertions(+), 9 deletions(-) diff --git a/r/R/record-batch.R b/r/R/record-batch.R index 331a7a77253..2d20a93e85f 100644 --- a/r/R/record-batch.R +++ b/r/R/record-batch.R @@ -286,6 +286,20 @@ as.data.frame.RecordBatch <- function(x, row.names = NULL, optional = FALSE, ... apply_arrow_r_metadata <- function(x, r_metadata) { tryCatch({ + columns_metadata <- r_metadata$columns + if (is.data.frame(x)) { + if (length(names(x)) && !is.null(columns_metadata)) { + for (name in intersect(names(columns_metadata), names(x))) { + x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]]) + } + } + } else if(is.list(x) && !inherits(x, "POSIXlt") && !is.null(columns_metadata)) { + x <- map2(x, columns_metadata, function(.x, .y) { + apply_arrow_r_metadata(.x, .y) + }) + x + } + if (!is.null(r_metadata$attributes)) { attributes(x)[names(r_metadata$attributes)] <- r_metadata$attributes if (inherits(x, "POSIXlt")) { @@ -297,12 +311,6 @@ apply_arrow_r_metadata <- function(x, r_metadata) { } } - columns_metadata <- r_metadata$columns - if (length(names(x)) && !is.null(columns_metadata)) { - for (name in intersect(names(columns_metadata), names(x))) { - x[[name]] <- apply_arrow_r_metadata(x[[name]], columns_metadata[[name]]) - } - } }, error = function(e) { warning("Invalid metadata$r", call. = FALSE) }) diff --git a/r/R/table.R b/r/R/table.R index 1d2190589f7..172e7bceab7 100644 --- a/r/R/table.R +++ b/r/R/table.R @@ -210,11 +210,24 @@ arrow_attributes <- function(x, only_top_level = FALSE) { if (is.data.frame(x)) { columns <- map(x, arrow_attributes) - if (length(att) || !all(map_lgl(columns, is.null))) { + out <- if (length(att) || !all(map_lgl(columns, is.null))) { list(attributes = att, columns = columns) } - } else if (length(att)) { - list(attributes = att, columns = NULL) + return(out) + } + + columns <- NULL + if (is.list(x) && !inherits(x, "POSIXlt")) { + # for list columns, we also keep attributes of each + # element in columns + columns <- map(x, arrow_attributes) + if (all(map_lgl(columns, is.null))) { + columns <- NULL + } + } + + if (length(att) || !is.null(columns)) { + list(attributes = att, columns = columns) } else { NULL } diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index 1cd6fbc4599..e94c989ffae 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -134,3 +134,10 @@ test_that("metadata keeps attribute of top level data frame", { expect_identical(attr(as.data.frame(tab), "foo"), "bar") expect_identical(as.data.frame(tab), df) }) + +test_that("metadata of list elements (ARROW-10386)", { + df <- data.frame(x = list(structure(1, foo = "bar"), structure(2, foo = "bar"))) + tab <- Table$create(df) + expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar") + expect_identical(attr(as.data.frame(tab)$x[[2]], "foo"), "bar") +}) From 4d1e73d7c1f6fb8a3b5a51ce367d1f7de65bfd3c Mon Sep 17 00:00:00 2001 From: Romain Francois Date: Thu, 29 Oct 2020 12:25:54 +0100 Subject: [PATCH 2/2] update test --- r/tests/testthat/test-metadata.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/r/tests/testthat/test-metadata.R b/r/tests/testthat/test-metadata.R index e94c989ffae..9b6dd0fddc6 100644 --- a/r/tests/testthat/test-metadata.R +++ b/r/tests/testthat/test-metadata.R @@ -136,7 +136,7 @@ test_that("metadata keeps attribute of top level data frame", { }) test_that("metadata of list elements (ARROW-10386)", { - df <- data.frame(x = list(structure(1, foo = "bar"), structure(2, foo = "bar"))) + df <- data.frame(x = I(list(structure(1, foo = "bar"), structure(2, foo = "bar")))) tab <- Table$create(df) expect_identical(attr(as.data.frame(tab)$x[[1]], "foo"), "bar") expect_identical(attr(as.data.frame(tab)$x[[2]], "foo"), "bar")