Skip to content

revdeps fail after merging new as.data.frame #5533

@tdhock

Description

@tdhock

Hi all, I found a revdep that is failing/erroring for the following test using data.table master, but the test passes using data.table CRAN release. Code below is a simplified version of the expstudy test case, https://github.com/cb12991/expstudy/blob/e89e216b1190ea327414cd41bd755b0985408583/tests/testthat/test-dplyr.R#L58

library(testthat)
library(expstudy)
library(rlang)
es <- expstudy(
  data = mortexp,
  actuals = ACTUAL_DEATHS,
  expecteds = EXPECTED_DEATHS,
  exposures = EXPOSURE,
  variances = VARIANCE_DEATHS,
  keys = c(AS_OF_DATE, POLICY_HOLDER)
)
attr_preserved <- function(x, f, ...) {
  x_meta <- attributes(x)[c('metric_variables', 'metrics_applied')]
  y <- do.call(f, list2(x, ...))
  y_meta <- attributes(y)[c('metric_variables', 'metrics_applied')]
  testthat::expect_equal(x_meta, y_meta)
}
class_preserved <- function(x, f, ...) {
  expect_s3_class(do.call(f, list2(x, ...)), class = 'tbl_es')
}
purrr::walk(
  c(attr_preserved, class_preserved),
  exec,
  !!!list(es, group_map, .f = as.data.frame)
)

The (abbreviated) output I get from running the above code using data.table master is

> purrr::walk(
+ c(attr_preserved, class_preserved),
+ exec,
+ !!!list(es, group_map, .f = as.data.frame)
+ )
Error in anyDuplicated(rownames) : object '.BY' not found
> traceback()
24: anyDuplicated(rownames)
23: setDF(copy(x), rownames = row.names)
22: as.data.frame.data.table(.SD, .BY, ...)
21: .f(.SD, .BY, ...)
20: eval(jsub, SDenv, parent.frame())
19: eval(jsub, SDenv, parent.frame())
18: `[.data.table`(dt, , list(list(.f(.SD, .BY, ...))), by = eval(.tbl$groups))
17: dt[, list(list(.f(.SD, .BY, ...))), by = eval(.tbl$groups)]
16: group_map.dtplyr_step(structure(list(parent = structure(list(
        AS_OF_DATE = structure(c(4413, 4441, 4472, 4502, 4533, 4563, 
        0.000859406691408917, 0.00107091737934993, 0.00117278962626522, 
        0.00245967348470632, 0.00142052640403464)), row.names = c(NA, 
    -175491L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x00000233c7731060>, sorted = c("AS_OF_DATE", 
    "POLICY_HOLDER")), vars = c("AS_OF_DATE", "POLICY_HOLDER", "GENDER", 
    "SMOKING_STATUS", "UNDERWRITING_CLASS", "INSURED_DOB", "ISSUE_DATE", 
    "ISSUE_AGE", "ATTAINED_AGE", "DURATION_MONTH", "DURATION_YEAR", 
    "POLICY_STATUS", "TERMINATION_DATE", "EXPOSURE", "ACTUAL_DEATHS", 
    "EXPECTED_MORTALITY_RT", "EXPECTED_DEATHS", "VARIANCE_DEATHS"
    ), groups = character(0), locals = list(), implicit_copy = FALSE, 
        needs_copy = FALSE, env = <environment>, name = `_DT1`), class = c("tbl_es", 
    "dtplyr_step_first", "dtplyr_step"), metric_vars = list(actuals = "ACTUAL_DEATHS", 
        expecteds = "EXPECTED_DEATHS", exposures = "EXPOSURE", variances = "VARIANCE_DEATHS"), metrics_applied = list(
        name = character(0), format = character(0))), .f = function (x, 
        row.names = NULL, optional = FALSE, ...) 
    {
        if (is.null(x)) 
            return(as.data.frame(list()))
        UseMethod("as.data.frame")
    })
15: NextMethod()
14: structure(.Data = new, class = unique(c("tbl_es", class(new))), 
        metric_vars = attr(old, "metric_vars"), metrics_applied = attr(old, 
            "metrics_applied"))
13: update_meta(NextMethod(), .data)
12: group_map.tbl_es(structure(list(parent = structure(list(AS_OF_DATE = structure(c(4413, 
    4441, 4472, 4502, 4533, 4563, 4594, 4625, 4655, 4686, 4716, 4747, 
        0.000859406691408917, 0.00107091737934993, 0.00117278962626522, 
        0.00245967348470632, 0.00142052640403464)), row.names = c(NA, 
    -175491L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x00000233c7731060>, sorted = c("AS_OF_DATE", 
    "POLICY_HOLDER")), vars = c("AS_OF_DATE", "POLICY_HOLDER", "GENDER", 
    "SMOKING_STATUS", "UNDERWRITING_CLASS", "INSURED_DOB", "ISSUE_DATE", 
    "ISSUE_AGE", "ATTAINED_AGE", "DURATION_MONTH", "DURATION_YEAR", 
    "POLICY_STATUS", "TERMINATION_DATE", "EXPOSURE", "ACTUAL_DEATHS", 
    "EXPECTED_MORTALITY_RT", "EXPECTED_DEATHS", "VARIANCE_DEATHS"
    ), groups = character(0), locals = list(), implicit_copy = FALSE, 
        needs_copy = FALSE, env = <environment>, name = `_DT1`), class = c("tbl_es", 
    "dtplyr_step_first", "dtplyr_step"), metric_vars = list(actuals = "ACTUAL_DEATHS", 
        expecteds = "EXPECTED_DEATHS", exposures = "EXPOSURE", variances = "VARIANCE_DEATHS"), metrics_applied = list(
        name = character(0), format = character(0))), .f = function (x, 
        row.names = NULL, optional = FALSE, ...) 
    {
        if (is.null(x)) 
            return(as.data.frame(list()))
        UseMethod("as.data.frame")
    })
11: (function (.data, .f, ..., .keep = FALSE) 
    {
        lifecycle::signal_stage("experimental", "group_map()")
        UseMethod("group_map")
    })(structure(list(parent = structure(list(AS_OF_DATE = structure(c(4413, 
    4441, 4472, 4502, 4533, 4563, 4594, 4625, 4655, 4686, 4716, 4747, 
        0.000859406691408917, 0.00107091737934993, 0.00117278962626522, 
        0.00245967348470632, 0.00142052640403464)), row.names = c(NA, 
    -175491L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x00000233c7731060>, sorted = c("AS_OF_DATE", 
    "POLICY_HOLDER")), vars = c("AS_OF_DATE", "POLICY_HOLDER", "GENDER", 
    "SMOKING_STATUS", "UNDERWRITING_CLASS", "INSURED_DOB", "ISSUE_DATE", 
    "ISSUE_AGE", "ATTAINED_AGE", "DURATION_MONTH", "DURATION_YEAR", 
    "POLICY_STATUS", "TERMINATION_DATE", "EXPOSURE", "ACTUAL_DEATHS", 
    "EXPECTED_MORTALITY_RT", "EXPECTED_DEATHS", "VARIANCE_DEATHS"
    ), groups = character(0), locals = list(), implicit_copy = FALSE, 
        needs_copy = FALSE, env = <environment>, name = `_DT1`), class = c("tbl_es", 
    "dtplyr_step_first", "dtplyr_step"), metric_vars = list(actuals = "ACTUAL_DEATHS", 
        expecteds = "EXPECTED_DEATHS", exposures = "EXPOSURE", variances = "VARIANCE_DEATHS"), metrics_applied = list(
        name = character(0), format = character(0))), .f = function (x, 
        row.names = NULL, optional = FALSE, ...) 
    {
        if (is.null(x)) 
            return(as.data.frame(list()))
        UseMethod("as.data.frame")
    })
10: do.call(f, list2(x, ...)) at expstudy-test-fail.R#19
9: (function (x, f, ...) 
   {
       x_meta <- attributes(x)[c("metric_variables", "metrics_applied")]
       y <- do.call(f, list2(x, ...))
       y_meta <- attributes(y)[c("metric_variables", "metrics_applied")]
       testthat::expect_equal(x_meta, y_meta)
   })(structure(list(parent = structure(list(AS_OF_DATE = structure(c(4413, 
   4441, 4472, 4502, 4533, 4563, 4594, 4625, 4655, 4686, 4716, 4747, 
       0.000859406691408917, 0.00107091737934993, 0.00117278962626522, 
       0.00245967348470632, 0.00142052640403464)), row.names = c(NA, 
   -175491L), class = c("data.table", "data.frame"), .internal.selfref = <pointer: 0x00000233c7731060>, sorted = c("AS_OF_DATE", 
   "POLICY_HOLDER")), vars = c("AS_OF_DATE", "POLICY_HOLDER", "GENDER", 
   "SMOKING_STATUS", "UNDERWRITING_CLASS", "INSURED_DOB", "ISSUE_DATE", 
   "ISSUE_AGE", "ATTAINED_AGE", "DURATION_MONTH", "DURATION_YEAR", 
   "POLICY_STATUS", "TERMINATION_DATE", "EXPOSURE", "ACTUAL_DEATHS", 
   "EXPECTED_MORTALITY_RT", "EXPECTED_DEATHS", "VARIANCE_DEATHS"
   ), groups = character(0), locals = list(), implicit_copy = FALSE, 
       needs_copy = FALSE, env = <environment>, name = `_DT1`), class = c("tbl_es", 
   "dtplyr_step_first", "dtplyr_step"), metric_vars = list(actuals = "ACTUAL_DEATHS", 
       expecteds = "EXPECTED_DEATHS", exposures = "EXPOSURE", variances = "VARIANCE_DEATHS"), metrics_applied = list(
       name = character(0), format = character(0))), function (.data, 
       .f, ..., .keep = FALSE) 
   {
       lifecycle::signal_stage("experimental", "group_map()")
       UseMethod("group_map")
   }, .f = function (x, row.names = NULL, optional = FALSE, ...) 
   {
       if (is.null(x)) 
           return(as.data.frame(list()))
       UseMethod("as.data.frame")
   })
8: .f(.x[[i]], ...)
7: map(.x, .f, ...)
6: purrr::walk(c(attr_preserved, class_preserved), exec, !!!list(es, 
       group_map, .f = as.data.frame)) at expstudy-test-fail.R#28
5: eval(ei, envir)
4: eval(ei, envir)
3: withVisible(eval(ei, envir))
2: ss(file, echo = visibly, local = local, print.eval = output, 
       spaced = FALSE, max.deparse.length = max.deparse.length, 
       keep.source = keep.source)
1: .ess.source("~/R/expstudy-test-fail.R", visibly = FALSE, output = TRUE)

Using CRAN release 1.14.6 I get the following (no error)

> purrr::walk(
+   c(attr_preserved, class_preserved),
+   exec,
+   !!!list(es, group_map, .f = as.data.frame)
+ )
> 

git bisect says this error started happening after merging #5320

Metadata

Metadata

Assignees

No one assigned

    Labels

    revdepReverse dependencies

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions