From 2320616c8d8f6601d2c4934deae942783c784a08 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Mon, 9 Jun 2025 15:34:17 -0700 Subject: [PATCH 1/8] Refactor plyr::vaggregate to use tapply() --- R/cast.r | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/cast.r b/R/cast.r index 79255cd..168f175 100644 --- a/R/cast.r +++ b/R/cast.r @@ -135,8 +135,12 @@ cast <- function(data, formula, fun.aggregate = NULL, ..., subset = NULL, fill = fun.aggregate <- length } - ordered <- vaggregate(.value = value, .group = overall, - .fun = fun.aggregate, ..., .default = fill, .n = n) + overall <- factor(overall, levels = seq_len(n)) + ordered <- as.vector(tapply(value, overall, fun.aggregate, ...)) + if (anyNA(ordered)) { + if (is.null(fill)) fill <- fun.aggregate(vector(typeof(ordered), 0L)) + ordered[is.na(ordered)] <- fill + } overall <- seq_len(n) } else { From 842b65a3cfa36e160982d74d6ff4292ae8b082f7 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Oct 2025 17:00:28 -0700 Subject: [PATCH 2/8] new regression test --- tests/testthat/test-cast.r | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/tests/testthat/test-cast.r b/tests/testthat/test-cast.r index a9e59c5..25d19cd 100644 --- a/tests/testthat/test-cast.r +++ b/tests/testthat/test-cast.r @@ -199,3 +199,17 @@ test_that("useful error message if value.var doesn't exist", { expect_error(dcast(airquality, month ~ day, value.var = "test"), "value.var (test) not found in input", fixed = TRUE) }) + +test_that("NA is not filled with 0 unintentionally", { + dates <- as.Date(c("2025-01-01", "2025-01-02")) + values <- c(1.37095844714667, NA) + DF <- data.frame( + time = dates, + variable = factor("value"), + value = values + ) + expect_equal( + dcast(DF, time ~ variable, sum), + data.frame(time = dates, value = values) + ) +}) From 764327de63aa57f14217822aa1c1eb1cb01ddca3 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Oct 2025 22:28:32 -0700 Subject: [PATCH 3/8] simplify --- tests/testthat/test-cast.r | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/tests/testthat/test-cast.r b/tests/testthat/test-cast.r index 25d19cd..025a548 100644 --- a/tests/testthat/test-cast.r +++ b/tests/testthat/test-cast.r @@ -201,15 +201,13 @@ test_that("useful error message if value.var doesn't exist", { }) test_that("NA is not filled with 0 unintentionally", { - dates <- as.Date(c("2025-01-01", "2025-01-02")) - values <- c(1.37095844714667, NA) DF <- data.frame( - time = dates, - variable = factor("value"), - value = values + group = c("1", "2"), + variable = "value", + value = c(1, NA) ) expect_equal( - dcast(DF, time ~ variable, sum), - data.frame(time = dates, value = values) + dcast(DF, group ~ variable, sum), + data.frame(group = c("1", "2"), value = c(1, NA)) ) }) From 8b8ee91d666b2d89688b09de94db23de9e9efc53 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Oct 2025 22:30:32 -0700 Subject: [PATCH 4/8] case passing '...' to fun.aggregate --- tests/testthat/test-cast.r | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/tests/testthat/test-cast.r b/tests/testthat/test-cast.r index 025a548..87710ba 100644 --- a/tests/testthat/test-cast.r +++ b/tests/testthat/test-cast.r @@ -210,4 +210,8 @@ test_that("NA is not filled with 0 unintentionally", { dcast(DF, group ~ variable, sum), data.frame(group = c("1", "2"), value = c(1, NA)) ) + expect_equal( + dcast(DF, group ~ variable, sum, na.rm = TRUE), + data.frame(group = c("1", "2"), value = c(1, 0)) + ) }) From 59bfc5906dce2a0d07068b922f2c771ce8404b67 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Oct 2025 22:33:35 -0700 Subject: [PATCH 5/8] more cases with non-NULL fill= --- tests/testthat/test-cast.r | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-cast.r b/tests/testthat/test-cast.r index 87710ba..d1d3669 100644 --- a/tests/testthat/test-cast.r +++ b/tests/testthat/test-cast.r @@ -200,7 +200,7 @@ test_that("useful error message if value.var doesn't exist", { "value.var (test) not found in input", fixed = TRUE) }) -test_that("NA is not filled with 0 unintentionally", { +test_that("mix of fun.aggregate= and fill=", { DF <- data.frame( group = c("1", "2"), variable = "value", @@ -214,4 +214,12 @@ test_that("NA is not filled with 0 unintentionally", { dcast(DF, group ~ variable, sum, na.rm = TRUE), data.frame(group = c("1", "2"), value = c(1, 0)) ) + expect_equal( + dcast(DF, group ~ variable, sum, fill = -1), + data.frame(group = c("1", "2"), value = c(1, NA)) + ) + expect_equal( + dcast(DF, group ~ variable, sum, na.rm = TRUE, fill = -1), + data.frame(group = c("1", "2"), value = c(1, 0)) + ) }) From 60cb9abe306e5d1c9429d1a23a31347768fea212 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Oct 2025 22:39:08 -0700 Subject: [PATCH 6/8] really clear example of structural missingness --> fill --- tests/testthat/test-cast.r | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/tests/testthat/test-cast.r b/tests/testthat/test-cast.r index d1d3669..3777d40 100644 --- a/tests/testthat/test-cast.r +++ b/tests/testthat/test-cast.r @@ -222,4 +222,27 @@ test_that("mix of fun.aggregate= and fill=", { dcast(DF, group ~ variable, sum, na.rm = TRUE, fill = -1), data.frame(group = c("1", "2"), value = c(1, 0)) ) + + # "structural" missingness, i.e., combinations that didn't exist + DF <- data.frame( + group = c("1", "2"), + variable = c("v1", "v2"), + value = c(1, NA) + ) + expect_equal( + dcast(DF, group ~ variable, sum), + data.frame(group = c("1", "2"), v1 = c(1, 0), v2 = c(0, NA)) + ) + expect_equal( + dcast(DF, group ~ variable, sum, na.rm = TRUE), + data.frame(group = c("1", "2"), v1 = c(1, 0), v2 = c(0, 0)) + ) + expect_equal( + dcast(DF, group ~ variable, sum, fill = -1), + data.frame(group = c("1", "2"), v1 = c(1, -1), v2 = c(-1, NA)) + ) + expect_equal( + dcast(DF, group ~ variable, sum, na.rm = TRUE, fill = -1), + data.frame(group = c("1", "2"), v1 = c(1, -1), v2 = c(-1, 0)) + ) }) From 4000477d49031a06b18105d43a199d29620c4387 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Oct 2025 23:06:49 -0700 Subject: [PATCH 7/8] passing new tests --- R/cast.r | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/R/cast.r b/R/cast.r index 168f175..064d135 100644 --- a/R/cast.r +++ b/R/cast.r @@ -136,10 +136,12 @@ cast <- function(data, formula, fun.aggregate = NULL, ..., subset = NULL, fill = } overall <- factor(overall, levels = seq_len(n)) - ordered <- as.vector(tapply(value, overall, fun.aggregate, ...)) + ordered <- tapply(value, overall, fun.aggregate, ...) if (anyNA(ordered)) { if (is.null(fill)) fill <- fun.aggregate(vector(typeof(ordered), 0L)) - ordered[is.na(ordered)] <- fill + structural_missing <- setdiff(names(ordered), levels(droplevels(overall))) + ordered[structural_missing] <- fill + names(ordered) <- NULL } overall <- seq_len(n) From 8948b2a74a12ba2bde0f2aff87ae383d5843f854 Mon Sep 17 00:00:00 2001 From: Michael Chirico Date: Thu, 2 Oct 2025 23:11:58 -0700 Subject: [PATCH 8/8] fun.aggregate template needs '...', with test --- R/cast.r | 2 +- tests/testthat/test-cast.r | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/R/cast.r b/R/cast.r index 064d135..6befaa7 100644 --- a/R/cast.r +++ b/R/cast.r @@ -138,7 +138,7 @@ cast <- function(data, formula, fun.aggregate = NULL, ..., subset = NULL, fill = overall <- factor(overall, levels = seq_len(n)) ordered <- tapply(value, overall, fun.aggregate, ...) if (anyNA(ordered)) { - if (is.null(fill)) fill <- fun.aggregate(vector(typeof(ordered), 0L)) + if (is.null(fill)) fill <- fun.aggregate(vector(typeof(ordered), 0L), ...) structural_missing <- setdiff(names(ordered), levels(droplevels(overall))) ordered[structural_missing] <- fill names(ordered) <- NULL diff --git a/tests/testthat/test-cast.r b/tests/testthat/test-cast.r index 3777d40..83aa541 100644 --- a/tests/testthat/test-cast.r +++ b/tests/testthat/test-cast.r @@ -245,4 +245,8 @@ test_that("mix of fun.aggregate= and fill=", { dcast(DF, group ~ variable, sum, na.rm = TRUE, fill = -1), data.frame(group = c("1", "2"), v1 = c(1, -1), v2 = c(-1, 0)) ) + expect_equal( + dcast(DF, group ~ variable, function(x, y) sum(x) + y, 1), + data.frame(group = c("1", "2"), v1 = c(2, 1), v2 = c(1, NA)) + ) })