diff --git a/DESCRIPTION b/DESCRIPTION index ce8965b495..0d82874660 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -14,7 +14,7 @@ Authors@R: c( person("Hugh","Parsonage", role="ctb")) Depends: R (>= 3.1.0) Imports: methods -Suggests: bit64, curl, knitr, xts, nanotime, zoo, reshape2, testthat (>= 0.4) +Suggests: bit64, curl, knitr, xts, nanotime, zoo, reshape2 Description: Fast aggregation of large data (e.g. 100GB in RAM), fast ordered joins, fast add/modify/delete of columns by group using no copies at all, list columns, friendly and fast character-separated-value read/write. Offers a natural and flexible syntax, for faster development. License: MPL-2.0 | file LICENSE URL: http://r-datatable.com diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index a0821931c2..d871ad20e3 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11840,6 +11840,149 @@ gc() after = sum(gc()[, 2]) test(1912.2, after < before + 10) +# BEGIN port of old testthat tests, #2740. Issue numbers may be from R-forge. +# +# test-data.frame-like.R (merge, subset, transform)") +# +# `x` columns are valid, #1299 +d1 <- data.table(x=c(1,3,8), y1=rnorm(3), key="x") +d2 <- data.table(x=c(3,8,10), y2=rnorm(3), key="x") +ans1 <- merge(d1, d2, by="x") +ans2 <- cbind(d1[2:3], y2=d2[1:2]$y2) +setkey(ans2, x) +test(1913.1, ans1, ans2) +# +# `xkey` column names are valid in merge, #1299 +d1 <- data.table(xkey=c(1,3,8), y1=rnorm(3), key="xkey") +d2 <- data.table(xkey=c(3,8,10), y2=rnorm(3), key="xkey") +ans2 <- cbind(d1[2:3], y2=d2[1:2]$y2) +setkey(ans2, xkey) +test(1913.2, merge(d1, d2, by="xkey"), ans2) +# +# one column merges work, #1241 +dt <- data.table(a=rep(1:2,each=3), b=1:6, key="a") +y <- data.table(a=c(0,1), bb=c(10,11), key="a") +test(1913.3, merge(y, dt), data.table(a=1L, bb=11, b=1:3, key="a")) +test(1913.4, merge(y, dt, all=TRUE), + data.table(a=rep(c(0L,1L,2L),c(1,3,3)), + bb=rep(c(10,11,NA_real_),c(1,3,3)), + b=c(NA_integer_,1:6), key="a")) +# +# y with only a key column +y <- data.table(a=c(0,1), key="a") +test(1913.5, merge(y,dt), data.table(a=1L, b=1:3, key="a")) +test(1913.6, merge(y, dt, all=TRUE), data.table(a=rep(c(0L,1L,2L),c(1,3,3)), b=c(NA_integer_,1:6), key="a")) +# +# merging data.tables is almost like merging data.frames +d1 <- data.table(a=sample(letters, 10), b=sample(1:100, 10), key='a') +d2 <- data.table(a=d1$a, b=sample(1:50, 10), c=rnorm(10), key='a') +dtm <- merge(d1, d2, by='a', suffixes=c(".xx", ".yy")) +dtm.df <- as.data.frame(dtm) +dfm <- merge(as.data.frame(d1), as.data.frame(d2), by='a', suffixes=c('.xx', '.yy')) +test(1913.7, unname(dtm.df), unname(dfm)) +test(1913.8, colnames(dtm), colnames(dfm)) +# +## merge and auto-increment columns in y[x] +## merging tables that have common column names that end in *.1 gets +## tricky, because the y[x] mojo does some magic to increment the *.1 +## in the x (I think) and keep *.1 in the y +x <- data.table(a=letters[1:10], b=1:10, b.1=1:10 * 10, key="a") +y <- data.table(a=letters[1:10], b=letters[11:20], b.1=rnorm(10), key="a") +M <- merge(x, y) +m <- merge(as.data.frame(x), as.data.frame(y), by="a") +test(1913.9, is.data.table(M) && !is.data.table(m)) +test(1913.11, all(names(M) %in% union(names(M), names(m)))) +for (name in names(m)) { + test(1913.12, M[[name]], m[[name]]) +} +# +# Original example that smoked out the bug +M <- data.table(a=letters[1:10], b=1:10) +m <- as.data.frame(M) +ms <- lapply(1:3, function(x) data.table(a=letters[1:10], b=1:10 * 10^x)) +for (i in 1:3) { + M <- merge(M, ms[[i]], by='a', suffixes=c("", sprintf(".%d", i))) +} +for (i in 1:3) { + m <- merge(m, as.data.frame(ms[[i]]), by='a', suffixes=c("", sprintf(".%d", i))) +} +test(1913.13, is.data.table(M) && !is.data.table(m)) +test(1913.14, all(names(M) %in% union(names(M), names(m)))) +for (name in names(m)) { + test(1913.15, M[[name]], m[[name]]) +} +# +# simple subset maintains keys +dt <- data.table(a=sample(c('a', 'b', 'c'), 20, replace=TRUE), + b=sample(c('a', 'b', 'c'), 20, replace=TRUE), + c=sample(20), key='a') +sub <- subset(dt, a == 'b') +test(1913.16, key(dt), key(sub)) +# +# subset using 'select' maintains key appropriately" +dt <- data.table(a=sample(c('a', 'b', 'c'), 20, replace=TRUE), + b=sample(c('a', 'b', 'c'), 20, replace=TRUE), + c=sample(20), key=c('a', 'b')) +sub.1 <- subset(dt, a == 'a', select=c('c', 'b', 'a')) +test(1913.17, key(sub.1), key(dt)) # reordering columns +sub.2 <- subset(dt, a == 'a', select=c('a', 'c')) +test(1913.18, key(sub.2), 'a') # selected columns are prefix of key +sub.3 <- subset(dt, a == 'a', select=c('b', 'c')) +test(1913.19, is.null(key(sub.3))) # selected columns do not from a key prefix +sub.4 <- subset(dt, a == 'cc') +test(1913.21, nrow(sub.4), 0L) +test(1913.22, is.null(key(sub.4))) +# +# transform maintains keys +dt <- data.table(a=sample(c('a', 'b', 'c'), 20, replace=TRUE), + b=sample(c('a', 'b', 'c'), 20, replace=TRUE), + c=sample(20), key=c('a', 'b')) +t1 <- transform(dt, d=c+4) +test(1913.23, key(t1), key(dt)) +test(1913.24, t1$d, dt$c + 4) # transform was successful +t2 <- transform(dt, d=c+4, a=sample(c('x', 'y', 'z'), 20, replace=TRUE)) +test(1913.25, is.null(key(t2))) # transforming a key column nukes the key +## This is probably not necessary, but let's just check that transforming +## a key column doesn't twist around the rows in the result. +for (col in c('b', 'c')) { + test(1913.26, t2[[col]], dt[[col]]) # mutating-key-transform maintains other columns +} +# +# tests-S4.R (S4 Compatability) +# +setClass("Data.Table", contains="data.table") +setClass("S4Composition", representation(data="data.table")) +# data.table can be a parent class" +ids <- sample(letters[1:3], 10, replace=TRUE) +scores <- rnorm(10) +dt <- data.table(id=ids, score=scores) +dt.s4 <- new("Data.Table", data.table(id=ids, score=scores)) +test(1914.1, isS4(dt.s4)) +test(1914.2, inherits(dt.s4, 'data.table')) +## pull out data from S4 as.list, and compare to list from dt +dt.s4.list <- dt.s4@.Data +names(dt.s4.list) <- names(dt.s4) +test(1914.3, dt.s4.list, as.list(dt)) # Underlying data not identical +# simple S4 conversion-isms work +df = data.frame(a=sample(letters, 10), b=1:10) +dt = as.data.table(df) +test(1914.4, identical(as(df, 'data.table'), dt)) +test(1914.5, identical(as(dt, 'data.frame'), df)) +# data.table can be used in an S4 slot +dt <- data.table(a=sample(letters[1:3], 10, replace=TRUE), score=rnorm(10)) +dt.comp <- new("S4Composition", data=dt) +test(1914.6, dt.comp@data, dt) +# S4 methods dispatch properly on data.table slots" +dt <- data.table(a=sample(letters[1:3], 10, replace=TRUE), score=rnorm(10)) +dt.comp <- new("S4Composition", data=dt) +setGeneric("dtGet", function(x, what) standardGeneric("dtGet")) +setMethod("dtGet", c(x="S4Composition", what="missing"), function(x, what){x@data}) +setMethod("dtGet", c(x="S4Composition", what="ANY"), function(x, what) {x@data[[what]]}) +test(1914.7, dtGet(dt.comp), dt) # actually +test(1914.8, identical(dtGet(dt.comp, 1), dt[[1]])) +test(1914.9, identical(dtGet(dt.comp, 'b'), dt$b)) +# END port of old testthat tests + ################################### # Add new tests above this line # diff --git a/publish.R b/publish.R index 48c6af31f3..d1182aedaf 100644 --- a/publish.R +++ b/publish.R @@ -123,7 +123,7 @@ check.copy <- function(job, repodir="bus/integration/cran"){ dir.create(job.checks<-file.path(repodir, "web", "checks", pkg<-"data.table", job), recursive=TRUE); os = plat(job) from = file.path("bus", sprintf("%s/%s.Rcheck", job, pkg)) - current.rout = c("main.Rout","main.Rout.fail","testthat.Rout","testthat.Rout.fail","knitr.Rout","knitr.Rout.fail","memtest.csv","memtest.png") + current.rout = c("main.Rout","main.Rout.fail","knitr.Rout","knitr.Rout.fail","memtest.csv","memtest.png") if (os=="Windows") { dir.create(file.path(job.checks, "tests_i386"), showWarnings=FALSE) dir.create(file.path(job.checks, "tests_x64"), showWarnings=FALSE) @@ -159,7 +159,7 @@ check.index <- function(pkg, jobs, repodir="bus/integration/cran") { paste(na.omit(links), collapse=", ") } routs = lapply(jobs, function(job) { - current.rout = c("main.Rout.fail","testthat.Rout.fail","knitr.Rout.fail") + current.rout = c("main.Rout.fail","knitr.Rout.fail") os = plat(job) if (os=="Windows") { rout32 = file.path("tests_i386", current.rout) diff --git a/tests/testthat.R b/tests/testthat.R deleted file mode 100644 index c457c8fa5d..0000000000 --- a/tests/testthat.R +++ /dev/null @@ -1,6 +0,0 @@ -if(requireNamespace("testthat", quietly = TRUE)){ - library(testthat) - library(data.table) - test_check("data.table") -} - diff --git a/tests/testthat/test-S4.R b/tests/testthat/test-S4.R deleted file mode 100644 index 13bc50744f..0000000000 --- a/tests/testthat/test-S4.R +++ /dev/null @@ -1,56 +0,0 @@ -context("S4 Compatability") - -## S4 class definitions to test -setClass("Data.Table", contains="data.table") -setClass("S4Composition", representation(data="data.table")) - -test_that("data.table can be a parent class", { - ids <- sample(letters[1:3], 10, replace=TRUE) - scores<- rnorm(10) - dt <- data.table(id=ids, score=scores) - dt.s4 <- new("Data.Table", data.table(id=ids, score=scores)) - - expect_true(isS4(dt.s4)) - expect_true(inherits(dt.s4, 'data.table')) - - ## pull out data from S4 as.list, and compare to list from dt - dt.s4.list <- dt.s4@.Data - names(dt.s4.list) <- names(dt.s4) - expect_identical(dt.s4.list, as.list(dt), info="Underlying data not identical") -}) - -test_that("simple S4 conversion-isms work", { - df = data.frame(a=sample(letters, 10), b=1:10) - dt = as.data.table(df) - expect_equal(as(df, 'data.table'), dt) - expect_identical(as(dt, 'data.frame'), df) -}) - -test_that("data.table can be used in an S4 slot", { - ## A class with a data.table slot - dt <- data.table(a=sample(letters[1:3], 10, replace=TRUE), score=rnorm(10)) - dt.comp <- new("S4Composition", data=dt) - expect_equal(dt.comp@data, dt) -}) - -test_that("S4 methods dispatch properly on data.table slots", { - ## Make toy accessor functions and compare results against normal data.table - ## access - dt <- data.table(a=sample(letters[1:3], 10, replace=TRUE), score=rnorm(10)) - dt.comp <- new("S4Composition", data=dt) - setGeneric("dtGet", function(x, what) standardGeneric("dtGet")) - setMethod("dtGet", c(x="S4Composition", what="missing"), - function(x, what) { - x@data - }) - setMethod("dtGet", c(x="S4Composition", what="ANY"), - function(x, what) { - x@data[[what]] - }) - - expect_equal(dtGet(dt.comp), dt, label='actually') - expect_identical(dtGet(dt.comp, 1), dt[[1]]) - expect_identical(dtGet(dt.comp, 'b'), dt$b) -}) - - diff --git a/tests/testthat/test-data.frame-like.R b/tests/testthat/test-data.frame-like.R deleted file mode 100644 index 8c7ec403bf..0000000000 --- a/tests/testthat/test-data.frame-like.R +++ /dev/null @@ -1,160 +0,0 @@ -context("data.frame like functions (merge, subset, transform)") - -############################################################################### -## Merge -test_that("`x` columns are valid (bug #1299)", { - d1 <- data.table(x=c(1,3,8), y1=rnorm(3), key="x") - d2 <- data.table(x=c(3,8,10), y2=rnorm(3), key="x") - ans1 <- merge(d1, d2, by="x") - ans2 <- cbind(d1[2:3], y2=d2[1:2]$y2) - setkey(ans2, x) - expect_equal(ans1, ans2, info="Original test #230") -}) - -test_that("`xkey` column names are valid in merge (bug#1299", { - d1 <- data.table(xkey=c(1,3,8), y1=rnorm(3), key="xkey") - d2 <- data.table(xkey=c(3,8,10), y2=rnorm(3), key="xkey") - ans2 <- cbind(d1[2:3], y2=d2[1:2]$y2) - setkey(ans2, xkey) - expect_equal(merge(d1, d2, by="xkey"), ans2, info="Original test #238") -}) - -test_that("one column merges work (bug #1241)", { - dt <- data.table(a=rep(1:2,each=3), b=1:6, key="a") - y <- data.table(a=c(0,1), bb=c(10,11), key="a") - expect_equal(merge(y, dt), data.table(a=1L, bb=11, b=1:3, key="a"), - info="Original test #231") - expect_equal(merge(y, dt, all=TRUE), - data.table(a=rep(c(0L,1L,2L),c(1,3,3)), - bb=rep(c(10,11,NA_real_),c(1,3,3)), - b=c(NA_integer_,1:6), key="a"), - info="Original test #232") - - ## y with only a key column - y <- data.table(a=c(0,1), key="a") - expect_equal(merge(y,dt), data.table(a=1L, b=1:3, key="a"), - info="Original test #233") - expect_equal(merge(y, dt, all=TRUE), - data.table(a=rep(c(0L,1L,2L),c(1,3,3)), - b=c(NA_integer_,1:6),key="a"), - info="Original test #234") -}) - -test_that("merging data.tables is almost like merging data.frames", { - d1 <- data.table(a=sample(letters, 10), b=sample(1:100, 10), key='a') - d2 <- data.table(a=d1$a, b=sample(1:50, 10), c=rnorm(10), key='a') - dtm <- merge(d1, d2, by='a', suffixes=c(".xx", ".yy")) - dtm.df <- as.data.frame(dtm) - dfm <- merge(as.data.frame(d1), as.data.frame(d2), by='a', suffixes=c('.xx', '.yy')) - - expect_equal(unname(dtm.df), unname(dfm), - info="Testing contents/data after merge") - expect_equal(colnames(dtm), colnames(dfm), - info="Original test #255 (testing suffixes parameter)") -}) - -test_that("`suffixes` behavior can be toggled to pre 1.5.4 behavior", { - dt1 <- data.table(a=letters[1:5], b=1:5, key="a") - dt2 <- data.table(a=letters[3:8], b=1:6, key="a") - - # options(datatable.pre.suffixes=FALSE) - # Option removed in 1.7.10 - expect_equal(colnames(merge(dt1, dt2)), c("a", "b.x", "b.y")) - - #options(datatable.pre.suffixes=TRUE) - #expect_equal(colnames(merge(dt1, dt2)), c("a", "b", "b.1"), - # info="Pre 1.5.4 behavior not working") - - #options(datatable.pre.suffixes=FALSE) -}) - -test_that("merge and auto-increment columns in y[x]", { - ## merging tables that have common column names that end in *.1 gets - ## tricky, because the y[x] mojo does some magic to increment the *.1 - ## in the x (I think) and keep *.1 in the y - x <- data.table(a=letters[1:10], b=1:10, b.1=1:10 * 10, key="a") - y <- data.table(a=letters[1:10], b=letters[11:20], b.1=rnorm(10), key="a") - M <- merge(x, y) - m <- merge(as.data.frame(x), as.data.frame(y), by="a") - - expect_is(M, 'data.table') - expect_is(m, 'data.frame') - expect_true(all(names(M) %in% union(names(M), names(m)))) - for (name in names(m)) { - expect_equal(M[[name]], m[[name]]) - } - - - ## Original example that smoked out the bug - M <- data.table(a=letters[1:10], b=1:10) - m <- as.data.frame(M) - ms <- lapply(1:3, function(x) data.table(a=letters[1:10], b=1:10 * 10^x)) - - for (i in 1:3) { - M <- merge(M, ms[[i]], by='a', suffixes=c("", sprintf(".%d", i))) - } - - for (i in 1:3) { - m <- merge(m, as.data.frame(ms[[i]]), by='a', - suffixes=c("", sprintf(".%d", i))) - } - - expect_is(M, 'data.table') - expect_is(m, 'data.frame') - expect_true(all(names(M) %in% union(names(M), names(m)))) - for (name in names(m)) { - expect_equal(M[[name]], m[[name]]) - } -}) - -############################################################################### -## subset -test_that("simple subset maintains keys", { - dt <- data.table(a=sample(c('a', 'b', 'c'), 20, replace=TRUE), - b=sample(c('a', 'b', 'c'), 20, replace=TRUE), - c=sample(20), key='a') - sub <- subset(dt, a == 'b') - expect_equal(key(dt), key(sub)) -}) - -test_that("subset using 'select' maintains key appropriately", { - dt <- data.table(a=sample(c('a', 'b', 'c'), 20, replace=TRUE), - b=sample(c('a', 'b', 'c'), 20, replace=TRUE), - c=sample(20), key=c('a', 'b')) - - sub.1 <- subset(dt, a == 'a', select=c('c', 'b', 'a')) - expect_equal(key(sub.1), key(dt), info="reordering columns") - - sub.2 <- subset(dt, a == 'a', select=c('a', 'c')) - expect_equal(key(sub.2), 'a', info="selected columns are prefix of key") - - sub.3 <- subset(dt, a == 'a', select=c('b', 'c')) - expect_true(is.null(key(sub.3)), - info="selected columns do not from a key prefix") - - sub.4 <- subset(dt, a == 'cc') - expect_equal(nrow(sub.4), 0) - expect_true(is.null(key(sub.4))) -}) - -############################################################################### -## transform -test_that("transform maintains keys", { - dt <- data.table(a=sample(c('a', 'b', 'c'), 20, replace=TRUE), - b=sample(c('a', 'b', 'c'), 20, replace=TRUE), - c=sample(20), key=c('a', 'b')) - - t1 <- transform(dt, d=c+4) - expect_equal(key(t1), key(dt)) - expect_equal(t1$d, dt$c + 4, info="transform was successful") - - t2 <- transform(dt, d=c+4, a=sample(c('x', 'y', 'z'), 20, replace=TRUE)) - expect_true(is.null(key(t2)), info="transforming a key column nukes the key") - - ## This is probably not necessary, but let's just check that transforming - ## a key column doesn't twist around the rows in the result. - for (col in c('b', 'c')) { - msg <- sprintf("mutating-key-transform maintains other columns [%s]", col) - expect_equal(t2[[col]], dt[[col]], info=msg) - } -})