From ace2e01ee46c5ef4f335e72b129aab898ad82402 Mon Sep 17 00:00:00 2001 From: Andrey R Date: Sun, 15 Nov 2015 00:32:14 +0000 Subject: [PATCH] Performance improvements for dim.data.table and alloc.col, closes #1433 --- R/data.table.R | 14 ++++---------- README.md | 2 ++ src/assign.c | 14 +++++++++++++- src/init.c | 2 ++ src/wrappers.c | 23 +++++++++++++++++++++++ 5 files changed, 44 insertions(+), 11 deletions(-) diff --git a/R/data.table.R b/R/data.table.R index 73416f2ec7..5c8c85ac62 100644 --- a/R/data.table.R +++ b/R/data.table.R @@ -72,10 +72,9 @@ replace_dot <- function(e) { e } -dim.data.table <- function(x) { - if (length(x)) c(length(x[[1L]]), length(x)) - else c(0L,0L) - # TO DO: consider placing "dim" as an attibute updated on inserts. Saves this 'if'. +dim.data.table <- function(x) +{ + .Call(Cdim, x) } .global <- new.env() # thanks to: http://stackoverflow.com/a/12605694/403310 @@ -2138,12 +2137,7 @@ alloc.col <- function(DT, n=getOption("datatable.alloccol"), verbose=getOption(" name = substitute(DT) if (identical(name,quote(`*tmp*`))) stop("alloc.col attempting to modify `*tmp*`") ans = .Call(Calloccolwrapper,DT,as.integer(eval(n)),verbose) - for (i in seq_along(ans)) { - # clear the same excluded by copyMostAttrib(). Primarily for data.table and as.data.table, but added here centrally (see #4890). - setattr(ans[[i]],"names",NULL) - setattr(ans[[i]],"dim",NULL) - setattr(ans[[i]],"dimnames",NULL) - } + if (is.name(name)) { name = as.character(name) assign(name,ans,parent.frame(),inherits=TRUE) diff --git a/README.md b/README.md index b40bee0731..f3f5136baa 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,8 @@ 5. `test.data.table` gets new argument `silent`, if set to TRUE then it will not raise exception but returns TRUE/FALSE based on the test results. + 6. `dim.data.table` is now implemented in C. Thanks to Andrey Riabushenko. + ### Changes in v1.9.6 (on CRAN 19 Sep 2015) #### NEW FEATURES diff --git a/src/assign.c b/src/assign.c index 275e7519bf..bc0fe4b7bc 100644 --- a/src/assign.c +++ b/src/assign.c @@ -223,7 +223,19 @@ SEXP alloccol(SEXP dt, R_len_t n, Rboolean verbose) SEXP alloccolwrapper(SEXP dt, SEXP newncol, SEXP verbose) { if (!isInteger(newncol) || length(newncol)!=1) error("n must be integer length 1. Has datatable.alloccol somehow become unset?"); if (!isLogical(verbose) || length(verbose)!=1) error("verbose must be TRUE or FALSE"); - return(alloccol(dt, INTEGER(newncol)[0], LOGICAL(verbose)[0])); + + SEXP ans = PROTECT(alloccol(dt, INTEGER(newncol)[0], LOGICAL(verbose)[0])); + + for(R_len_t i = 0; i < LENGTH(ans); i++) { + // clear the same excluded by copyMostAttrib(). Primarily for data.table and as.data.table, but added here centrally (see #4890). + + setAttrib(VECTOR_ELT(ans, i), R_NamesSymbol, R_NilValue); + setAttrib(VECTOR_ELT(ans, i), R_DimSymbol, R_NilValue); + setAttrib(VECTOR_ELT(ans, i), R_DimNamesSymbol, R_NilValue); + } + + UNPROTECT(1); + return ans; } SEXP shallowwrapper(SEXP dt, SEXP cols) { diff --git a/src/init.c b/src/init.c index 1bd5116ab4..3292c6c708 100644 --- a/src/init.c +++ b/src/init.c @@ -66,6 +66,7 @@ SEXP ghead(); SEXP glast(); SEXP gfirst(); SEXP gnthvalue(); +SEXP dim(); // .Externals SEXP fastmean(); @@ -134,6 +135,7 @@ R_CallMethodDef callMethods[] = { {"Cglast", (DL_FUNC) &glast, -1}, {"Cgfirst", (DL_FUNC) &gfirst, -1}, {"Cgnthvalue", (DL_FUNC) &gnthvalue, -1}, +{"Cdim", (DL_FUNC) &dim, -1}, {NULL, NULL, 0} }; diff --git a/src/wrappers.c b/src/wrappers.c index 52b58b7140..213cb27346 100644 --- a/src/wrappers.c +++ b/src/wrappers.c @@ -96,3 +96,26 @@ SEXP copyNamedInList(SEXP x) return R_NilValue; } + + +SEXP dim(SEXP x) +{ + // fast implementation of dim.data.table + + if (TYPEOF(x) != VECSXP) { + error("dim.data.table expects a data.table as input (which is a list), but seems to be of type %s", + type2char(TYPEOF(x))); + } + + SEXP ans = allocVector(INTSXP, 2); + if(length(x) == 0) { + INTEGER(ans)[0] = 0; + INTEGER(ans)[1] = 0; + } + else { + INTEGER(ans)[0] = length(VECTOR_ELT(x, 0)); + INTEGER(ans)[1] = length(x); + } + + return ans; +}