Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,8 @@

13. New `mergelist()` and `setmergelist()` similarly work _a la_ `Reduce()` to recursively merge a `list` of data.tables, [#599](https://github.com/Rdatatable/data.table/issues/599). Different join modes (_left_, _inner_, _full_, _right_, _semi_, _anti_, and _cross_) are supported through the `how` argument; duplicate handling goes through the `mult` argument. `setmergelist()` carefully avoids copies where one is not needed, e.g. in a 1:1 left join. Thanks Patrick Nicholson for the FR (in 2013!), @jangorecki for the PR, and @MichaelChirico for extensive reviews and fine-tuning.

14. `fcoalesce()` and `setcoalesce()` gain `nan` argument to control whether `NaN` values should be treated as missing (`nan=NA`, the default) or non-missing (`nan=NaN`), [#4567](https://github.com/Rdatatable/data.table/issues/4567). This provides full compatibility with `nafill()` behavior. Thanks to @ethanbsmith for the feature request and @Mukulyadav2004 for the implementation.

### BUG FIXES

1. `fread()` no longer warns on certain systems on R 4.5.0+ where the file owner can't be resolved, [#6918](https://github.com/Rdatatable/data.table/issues/6918). Thanks @ProfFancyPants for the report and PR.
Expand Down
4 changes: 2 additions & 2 deletions R/wrappers.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
# Very small (e.g. one line) R functions that just call C.
# One file wrappers.R to avoid creating lots of small .R files.

fcoalesce = function(...) .Call(Ccoalesce, list(...), FALSE)
setcoalesce = function(...) .Call(Ccoalesce, list(...), TRUE)
fcoalesce = function(..., nan=NA) .Call(Ccoalesce, list(...), FALSE, nan_is_na(nan))
setcoalesce = function(..., nan=NA) .Call(Ccoalesce, list(...), TRUE, nan_is_na(nan))

fifelse = function(test, yes, no, na=NA) .Call(CfifelseR, test, yes, no, na)
fcase = function(..., default=NA) {
Expand Down
5 changes: 5 additions & 0 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -15586,6 +15586,11 @@ test(2060.154, fcoalesce(list(x)), x)
test(2060.155, setcoalesce(list(x)), x)
test(2060.156, setcoalesce(list(x,y,z)), ans)
test(2060.157, x, ans) # setcoalesce updated the first item (x) by reference
# nan parameter, #4567
test(2060.158, fcoalesce(c(NA_real_, NaN), 0, nan=NA), c(0, 0))
test(2060.159, fcoalesce(c(NA_real_, NaN), 0, nan=NaN), c(0, NaN))
test(2060.160, fcoalesce(c(NA_real_, NaN), c(1, 2), nan=NA), c(1, 2))
test(2060.161, fcoalesce(c(NA_real_, NaN), c(1, 2), nan=NaN), c(1, NaN))
# factor of different levels
x = factor(c('a','b',NA,NA,'b'))
y = factor(c('b','b','a',NA,'b'))
Expand Down
8 changes: 6 additions & 2 deletions man/coalesce.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,11 @@ Fill in missing values in a vector by successively pulling from candidate vector
Written in C, and multithreaded for numeric and factor types.
}
\usage{
fcoalesce(\dots)
fcoalesce(\dots, nan=NA)
}
\arguments{
\item{\dots}{ A set of same-class vectors. These vectors can be supplied as separate arguments or as a single plain list, data.table or data.frame, see examples. }
\item{nan}{ Either \code{NaN} or \code{NA}; if \code{NaN}, then \code{NaN} is treated as distinct from \code{NA}, otherwise they are treated the same during replacement (double columns only). }
}
\details{
Factor type is supported only when the factor levels of each item are equal.
Expand All @@ -22,7 +23,7 @@ Atomic vector of the same type and length as the first vector, having \code{NA}
If the first item is \code{NULL}, the result is \code{NULL}.
}
\seealso{
\code{\link{fifelse}}
\code{\link{fifelse}}, \code{\link{nafill}}
}
\examples{
x = c(11L, NA, 13L, NA, 15L, NA)
Expand All @@ -31,6 +32,9 @@ z = c(11L, NA, 1L, 14L, NA, NA)
fcoalesce(x, y, z)
fcoalesce(list(x,y,z)) # same
fcoalesce(x, list(y,z)) # same
x_num = c(NaN, NA_real_, 3.0)
fcoalesce(x_num, 1) # default: NaN treated as missing -> c(1, 1, 3)
fcoalesce(x_num, 1, nan=NaN) # preserve NaN -> c(NaN, 1, 3)
}
\keyword{ data }

57 changes: 40 additions & 17 deletions src/coalesce.c
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,12 @@
- The replacement of NAs with non-NA values from subsequent vectors
- The conditional checks within parallelized loops
*/
SEXP coalesce(SEXP x, SEXP inplaceArg) {
SEXP coalesce(SEXP x, SEXP inplaceArg, SEXP nan_is_na_arg) {
if (TYPEOF(x)!=VECSXP) internal_error(__func__, "input is list(...) at R level"); // # nocov
if (!IS_TRUE_OR_FALSE(inplaceArg)) internal_error(__func__, "argument 'inplaceArg' must be TRUE or FALSE"); // # nocov
if (!IS_TRUE_OR_FALSE(nan_is_na_arg)) internal_error(__func__, "argument 'nan_is_na_arg' must be TRUE or FALSE"); // # nocov
const bool inplace = LOGICAL(inplaceArg)[0];
const bool nan_is_na = LOGICAL(nan_is_na_arg)[0];
const bool verbose = GetVerbose();
int nprotect = 0;
if (length(x)==0 || isNull(VECTOR_ELT(x,0))) return R_NilValue; // coalesce(NULL, "foo") return NULL even though character type mismatches type NULL
Expand Down Expand Up @@ -102,23 +104,44 @@ SEXP coalesce(SEXP x, SEXP inplaceArg) {
} else {
double *xP = REAL(first), finalVal=NA_REAL;
int k=0;
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
if (length(item)==1) {
double tt = REAL(item)[0];
if (ISNAN(tt)) continue;
finalVal = tt;
break;
if (nan_is_na) {
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
if (length(item)==1) {
double tt = REAL(item)[0];
if (ISNAN(tt)) continue;
finalVal = tt;
break;
}
valP[k++] = REAL_RO(item);
}
const bool final = !ISNAN(finalVal);
#pragma omp parallel for num_threads(getDTthreads(nrow, true))
for (int i=0; i<nrow; ++i) {
double val=xP[i];
if (!ISNAN(val)) continue;
int j=0; while (ISNAN(val) && j<k) val=((double *)valP[j++])[i];
if (!ISNAN(val)) xP[i]=val; else if (final) xP[i]=finalVal;
}
} else {
for (int j=0; j<nval; ++j) {
SEXP item = VECTOR_ELT(x, j+off);
if (length(item)==1) {
double tt = REAL(item)[0];
if (ISNA(tt)) continue;
finalVal = tt;
break;
}
valP[k++] = REAL_RO(item);
}
const bool final = !ISNA(finalVal);
#pragma omp parallel for num_threads(getDTthreads(nrow, true))
for (int i=0; i<nrow; ++i) {
double val=xP[i];
if (!ISNA(val)) continue;
int j=0; while (ISNA(val) && j<k) val=((double *)valP[j++])[i];
if (!ISNA(val)) xP[i]=val; else if (final) xP[i]=finalVal;
}
valP[k++] = REAL_RO(item);
}
const bool final = !ISNAN(finalVal);
#pragma omp parallel for num_threads(getDTthreads(nrow, true))
for (int i=0; i<nrow; ++i) {
double val=xP[i];
if (!ISNAN(val)) continue;
int j=0; while (ISNAN(val) && j<k) val=((double *)valP[j++])[i];
if (!ISNAN(val)) xP[i]=val; else if (final) xP[i]=finalVal;
}
}
} break;
Expand Down
2 changes: 1 addition & 1 deletion src/data.table.h
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP nan_is_na_arg, SEXP inplace, S
SEXP between(SEXP x, SEXP lower, SEXP upper, SEXP incbounds, SEXP NAbounds, SEXP check);

// coalesce.c
SEXP coalesce(SEXP x, SEXP inplace);
SEXP coalesce(SEXP x, SEXP inplace, SEXP nan_is_na_arg);

// utils.c
bool within_int32_repres(double x);
Expand Down
Loading