Rdatatable · mattdowle · Apr 7, 2018 · Mar 31, 2018 · Mar 31, 2018 · Apr 6, 2018
@@ -5442,7 +5442,7 @@ test(1377.8, copy(DT)[FALSE, bar:=stop("eval'd")], DT)
 # therefore, this doesn't actually test mode="wb" but close as we can get
 # NB: As of v1.10.5, fread copes ok with any number of \r before the \n
 
-test(1378.1, fread(testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97)
+test(1378.1, fread(file=testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97)
 
 f = paste0("file://",testDir("russellCRLF.csv"))
 # simulates a http:// request as far as file.download() and unlink() goes, without internet
@@ -11146,16 +11146,22 @@ DT = data.table(col1 = c(1,1,1, 2,2,2), col2 = c(2,2,2,1,1,1), ID = c(rep(1,3),
 test(1863.2, DT[, lapply(.SD, var), by=ID], data.table(ID=c(1,2), col1=0, col2=0, key="ID"))
 
 # Fix the bug when keys contain non UTF8 strings #2566 #2462 #1826
-# Only on Windows platform it might fail, because other platforms use UTF8 as the native encoding.
-DT <- fread(file = testDir("issue_2566.csv"), encoding = "UTF-8")
-# `fread` return a utf-8 encoded data, we should convert x to native encoding.
-# However, we need this condition to ensure the native encoding can be used for Chinese characters.
-# Otherwise, the test will fail because the strings have been damaged.
-if (identical(enc2utf8(enc2native(DT$x)), DT$x)) DT[, x:= enc2native(x)]
-setkey(DT, x)
-test(1864.1, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L)
-setkey(DT, y)
-test(1864.2, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L)
+utf8_strings = c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de")
+latin1_strings = iconv(utf8_strings, from = "UTF-8", to = "latin1")
+mixed_strings = c(utf8_strings, latin1_strings)
+DT1 = data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10)
+DT2 = copy(DT1)
+setkey(DT1, x)
+setkey(DT2, y)
+# the ans is generated by `sort(c(utf8_strings, utf8_strings), method = "radix")`
+# but we should not use radix sort in the test because it's introduced after R3.3.0
+ans = c("El. pa\u00c5\u00a1tas", "El. pa\u00c5\u00a1tas", "fa\u00e7ile", "fa\u00e7ile",
+"\u00a1tas", "\u00a1tas", "\u00de", "\u00de", "\u00e7ile", "\u00e7ile")
+test(1864.1, DT1$x, ans)
+test(1864.2, DT2$y, ans)
+ans = c(1L, 6L, 2L, 7L, 3L, 8L, 4L, 9L, 5L, 10L)
+test(1864.3, DT1[c(utf8_strings, latin1_strings), z], c(ans, ans))
+test(1864.4, DT2[c(utf8_strings, latin1_strings), z], c(ans, ans))
 
 # memory exception under asan if there's an extra comma out-of-sample, #2523
 data = rep("a,b,c,d,e,f,g", 2100)