Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 14 additions & 11 deletions R/bmerge.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,24 +2,26 @@
bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbose)
{
callersi = i
# Just so that when a double in i which contains integers stored as double, is joined to an integer column
# in x, the i column is returned as integer in the result. Just before the call to bmerge() in [.data.table
# there is a shallow() copy of i to prevent this change of type affecting the user's object by reference.
# There is only one use of callersi here.
# Useful for ad hoc joins when the L postfix is often forgotten.
# Otherwise, the type of the i column is always returned.

i = shallow(i)
# merge on .SD in i fails _sometimes_ because of set() being done here, #1926
# Just before the call to bmerge() in [.data.table there is a shallow() copy of i to prevent coercions here
# by bmerge changing the type of the user's input object by reference. We now shallow copy i again. If we then
# coerce a column in i only, we are just changing the temporary coercion used for the merge operation. If we
# set callersi too then we are keeping that coerced i column in the merge result returned to user.
# The type of the i column is always returned (i.e. just i set not callersi too), other than:
# i) to convert int-as-double to int, useful for ad hoc joins when the L postfix is often forgotten.
# ii) to coerce i.factor to character when joining to x.character
# So those are the only two uses of callersi below.
# Careful to only use plonk syntax (full column) on i and x from now on, otherwise user's i and x would
# change. This is why shallow() is very importantly internal only, currently.

# Using .SD in j to join could fail due to being locked and set() being used here, #1926
.Call(C_unlock, i)
x = shallow(x)
.Call(C_unlock, x)
if (.Call(C_islocked, callersi)) {
.Call(C_unlock, callersi)
on.exit(.Call(C_lock, callersi))
}
# careful to only plonk syntax (full column) on i/x from now on otherwise user's i and x would change;
# this is why shallow() is very importantly internal only, currently.

supported = c(ORDERING_TYPES, "factor", "integer64")

Expand Down Expand Up @@ -53,7 +55,8 @@ bmerge = function(i, x, icols, xcols, roll, rollends, nomatch, mult, ops, verbos
} else {
if (xclass=="character") {
if (verbose) cat("Coercing factor column i.",names(i)[ic]," to type character to match type of x.",names(x)[xc],".\n",sep="")
set(i, j=ic, value=as.character(i[[ic]]))
set(i, j=ic, value=val<-as.character(i[[ic]]))
set(callersi, j=ic, value=val) # factor in i joining to character in x will return character and not keep x's factor; e.g. for antaresRead #3581
next
} else if (iclass=="character") {
if (verbose) cat("Matching character column i.",names(i)[ic]," to factor levels in x.",names(x)[xc],".\n",sep="")
Expand Down
4 changes: 2 additions & 2 deletions R/data.table.R
Original file line number Diff line number Diff line change
Expand Up @@ -2400,6 +2400,7 @@ setnames = function(x,old,new,skip_absent=FALSE) {
# Sets by reference, maintains truelength, no copy of table at all.
# But also more convenient than names(DT)[i]="newname" because we can also do setnames(DT,"oldname","newname")
# without an onerous match() ourselves. old can be positions, too, but we encourage by name for robustness.
# duplicates are permitted to be created without warning; e.g. in revdeps and for example, and setting spacer columns all with ""
if (!is.data.frame(x)) stop("x is not a data.table or data.frame")
ncol = length(x)
if (length(names(x)) != ncol) stop("x has ",ncol," columns but its names are length ",length(names(x)))
Expand All @@ -2408,7 +2409,6 @@ setnames = function(x,old,new,skip_absent=FALSE) {
# for setnames(DT,new); e.g., setnames(DT,c("A","B")) where ncol(DT)==2
if (!is.character(old)) stop("Passed a vector of type '",typeof(old),"'. Needs to be type 'character'.")
if (length(old) != ncol) stop("Can't assign ",length(old)," names to a ",ncol," column data.table")
# note that duplicate names are permitted to be created in this usage only
if (anyNA(names(x))) {
# if x somehow has some NA names, which() needs help to return them, #2475
w = which((names(x) != old) | (is.na(names(x)) & !is.na(old)))
Expand All @@ -2421,7 +2421,7 @@ setnames = function(x,old,new,skip_absent=FALSE) {
} else {
if (missing(old)) stop("When 'new' is provided, 'old' must be provided too")
if (!is.character(new)) stop("'new' is not a character vector")
if (anyDuplicated(new)) stop("Some duplicates exist in 'new': ", brackify(new[duplicated(new)]))
# if (anyDuplicated(new)) warning("Some duplicates exist in 'new': ", brackify(new[duplicated(new)])) # dups allowed without warning; warn if and when the dup causes an ambiguity
if (anyNA(new)) stop("NA in 'new' at positions ", brackify(which(is.na(new))))
if (anyDuplicated(old)) stop("Some duplicates exist in 'old': ", brackify(old[duplicated(old)]))
if (is.numeric(old)) i = old = seq_along(x)[old] # leave it to standard R to manipulate bounds and negative numbers
Expand Down
10 changes: 5 additions & 5 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -1675,8 +1675,8 @@ test(577, CJ(x=c(1L,2L), c("a","b")), data.table(x=c(1L,1L,2L,2L),V2=c("a","b","
# Test factor to character join when factor contains unused and reverse order levels :
X = data.table(a=LETTERS[1:4],v=1:4,key="a")
Y = data.table(a=factor(c("D","B"),levels=rev(LETTERS)),key="a")
test(578, X[Y,verbose=TRUE], output="Coercing factor column i.a to type character to match type of x.a")
test(579, X[Y], data.table(a=Y$a, v=c(4L,2L), key="a"))
test(578, X[Y,verbose=TRUE], data.table(a=c("D","B"), v=c(4L,2L)), # no key because "D">"B", consistent with v1.12.2 and before
output="Coercing factor column i.a to type character to match type of x.a")

# Test that logical i in set() returns helpful error
DT = data.table(a=1:3,b=4:6)
Expand Down Expand Up @@ -13592,7 +13592,7 @@ test(1967.59, setnames(x, 1:2, c(8L, 9L)), error = "'new' is not a character")
test(1967.60, setnames(x, -1:1, c('hey', 'you')), error = "mixed.*negative")
test(1967.61, setnames(x, 1+3i, 'cplx'), error = "'old' is type complex")
test(1967.62, setnames(x, 1, c('d', 'e')), error = "'old' is length 1 but 'new'")
test(1967.621, setnames(x, 1:2, c("a","a")), error = "Some duplicates exist in 'new': [a]")
test(1967.621, setnames(x, 1:2, c("a","a")), data.table(a=1:5, a=6:10))
test(1967.622, setnames(x, 1:2, c("a",NA)), error = "NA in 'new' at positions [2]")
test(1967.63, setcolorder(x, c(1, 1)), error = 'specify duplicated column')
test(1967.64, setcolorder(x, 1+3i), error = 'must be character or numeric')
Expand Down Expand Up @@ -14898,9 +14898,9 @@ test(2044.62, dt1[dt2, ..cols, on="doubleInt==int", verbose=TRUE],
test(2044.63, dt1[dt2, ..cols, on="realDouble==int", verbose=TRUE],
data.table(x.bool=c(rep(FALSE,4),TRUE), x.int=INT(2,4,6,8,10), x.doubleInt=c(2,4,6,8,10), i.bool=TRUE, i.int=1:5, i.doubleInt=as.double(1:5), i.char=letters[1:5]),
output="Coerced integer column i.int to type double for join to match type of x.realDouble")
cols = c("x.int","x.char","x.fact","i.int","i.char","i.fact")
cols = c("x.int","x.char","x.fact","i.int","i.char","i.char")
test(2044.64, dt1[dt2, ..cols, on="char==fact", verbose=TRUE],
ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.fact=factor(letters[1:5])),
ans<-data.table(x.int=1:5, x.char=letters[1:5], x.fact=factor(letters[1:5]), i.int=1:5, i.char=letters[1:5], i.char=letters[1:5]),
output="Coercing factor column i.fact to type character to match type of x.char")
test(2044.65, dt1[dt2, ..cols, on="fact==char", verbose=TRUE],
ans,
Expand Down