Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ importFrom(plyr,alply)
importFrom(plyr,amv_dimnames)
importFrom(plyr,as.quoted)
importFrom(plyr,eval.quoted)
importFrom(plyr,id)
importFrom(plyr,llply)
importFrom(plyr,rbind.fill)
importFrom(plyr,split_labels)
Expand Down
62 changes: 61 additions & 1 deletion R/cast.r
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
#' @param value.var name of column which stores values, see
#' \code{\link{guess_value}} for default strategies to figure this out.
#' @seealso \code{\link{melt}}, \url{http://had.co.nz/reshape/}
#' @importFrom plyr alply amv_dimnames as.quoted eval.quoted id llply rbind.fill split_labels vaggregate
#' @importFrom plyr alply amv_dimnames as.quoted eval.quoted llply rbind.fill split_labels vaggregate
#' @import stringr
#' @examples
#' #Air quality example
Expand Down Expand Up @@ -92,6 +92,66 @@
#' @name cast
NULL

# replacement for plyr::id; it doesn't give the same result, but
# order(id(...)) matches order(plyr::id(...))
# interaction(drop=drop) doesn't quite work because plyr::id() retains the
# actual level numbers as if drop=FALSE, whereas interaction compacts them
# into 1:attr(, "n"), the unique number of observed levels.
id <- function(x, drop) {
if (length(x) == 0L) return(structure(integer(), n = 0L))

# Calculate number of levels for each vector
# For vectors that already had an explicit "n" attribute (e.g. from prior id() calls)
# we must use those directly instead of recomputing from observed discrete values.
ns <- vapply(seq_along(x), function(i) {
v <- x[[i]]
explicit_n <- attr(v, "n")
if (!is.null(explicit_n)) return(as.integer(explicit_n))
if (is.factor(v)) return(nlevels(v))
length(unique(v))
}, integer(1))

# Calculate integer values for each vector
vals <- lapply(seq_along(x), function(i) {
v <- x[[i]]
if (!is.null(attr(v, "n"))) return(as.integer(v))
if (is.factor(v)) return(as.integer(v))
# match includes NA by default if it's in the table
match(v, sort(unique(v), na.last = TRUE))
})

# We want the LAST variable to vary fastest.
# Weight for x[[i]] is prod(ns[(i+1):n])
n <- length(x)
weights <- rep(1, n)
if (n > 1) {
for (i in (n - 1):1) {
weights[i] <- weights[i + 1] * ns[i + 1]
}
}

res <- rep(1, length(vals[[1]]))
for (i in seq_along(vals)) {
res <- res + (vals[[i]] - 1) * weights[i]
}

if (drop) {
u <- unique(res)
# Sorts the unique generated IDs to be consistent.
su <- sort(u, na.last = TRUE)
res <- match(res, su)
# Ensure n is integer
n_attr <- as.integer(length(u))
} else {
# Ensure n is integer
n_attr <- as.integer(prod(ns))
}

res <- as.integer(res)
attr(res, "n") <- n_attr
res
}

cast <- function(data, formula, fun.aggregate = NULL, ..., subset = NULL, fill = NULL, drop = TRUE, value.var = guess_value(data), value_var) {

if (!missing(value_var)) {
Expand Down
50 changes: 50 additions & 0 deletions tests/testthat/test-cast.r
Original file line number Diff line number Diff line change
Expand Up @@ -211,3 +211,53 @@ test_that("useful error message if value.var doesn't exist", {
expect_error(dcast(airquality, month ~ day, value.var = "test"),
"value.var (test) not found in input", fixed = TRUE)
})

test_that("id handles recursive ID calls correctly", {
# This mimics how cast() handles multiple variables on one dimension
x1 <- c("a", "a", "b")
x2 <- c("1", "2", "1")

id1 <- id(list(x1, x2), drop = FALSE)
expect_equal(attr(id1, "n"), 4L)
expect_equal(as.integer(id1), c(1L, 2L, 3L))

# Recursive call
id2 <- id(list(id1), drop = FALSE)
expect_equal(attr(id2, "n"), 4L)
expect_equal(as.integer(id2), c(1L, 2L, 3L))
})

test_that("id handles NAs consistently with drop", {
x <- c(1, 2, NA)

# drop = TRUE
id_t <- id(list(x), drop = TRUE)
expect_equal(attr(id_t, "n"), 3L)
expect_equal(as.integer(id_t), c(1L, 2L, 3L))

# drop = FALSE
id_f <- id(list(x), drop = FALSE)
expect_equal(attr(id_f, "n"), 3L)
expect_equal(as.integer(id_f), c(1L, 2L, 3L))
})

test_that("id handles factor levels with drop=FALSE", {
f <- factor(c("a", "a"), levels = c("a", "b"))
id_f <- id(list(f), drop = FALSE)
expect_equal(attr(id_f, "n"), 2L)
expect_equal(as.integer(id_f), c(1L, 1L))

id_t <- id(list(f), drop = TRUE)
expect_equal(attr(id_t, "n"), 1L)
expect_equal(as.integer(id_t), c(1L, 1L))
})

test_that("ordering is correct for multi-variable id", {
# Last variable varies fastest
x1 <- c(1, 1, 2, 2)
x2 <- c(1, 2, 1, 2)
res <- id(list(x1, x2), drop = FALSE)
# (1,1)->1, (1,2)->2, (2,1)->3, (2,2)->4
expect_equal(as.integer(res), 1:4)
expect_equal(attr(res, "n"), 4L)
})
Loading