From e5f77436b7302e0d2aad8b4e96802eef650b46db Mon Sep 17 00:00:00 2001
From: shrektan <shrektan@126.com>
Date: Sat, 31 Mar 2018 09:55:36 +0800
Subject: [PATCH 1/4] improve test 1864 by using latin1 encoded strings, so it
 can be tested on all the platforms

---
 inst/tests/issue_2566.csv |  6 ------
 inst/tests/tests.Rraw     | 24 ++++++++++++++----------
 2 files changed, 14 insertions(+), 16 deletions(-)
 delete mode 100644 inst/tests/issue_2566.csv

diff --git a/inst/tests/issue_2566.csv b/inst/tests/issue_2566.csv
deleted file mode 100644
index a9f5448d3d..0000000000
--- a/inst/tests/issue_2566.csv
+++ /dev/null
@@ -1,6 +0,0 @@
-x,y,z
-公允价值变动损益,公允价值变动损益,1
-红利收入,红利收入,2
-价差收入,价差收入,3
-其他业务支出,其他业务支出,4
-资产减值损失,资产减值损失,5
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 4a256381b2..ce0dffafaf 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11146,16 +11146,20 @@ DT = data.table(col1 = c(1,1,1, 2,2,2), col2 = c(2,2,2,1,1,1), ID = c(rep(1,3),
 test(1863.2, DT[, lapply(.SD, var), by=ID], data.table(ID=c(1,2), col1=0, col2=0, key="ID"))
 
 # Fix the bug when keys contain non UTF8 strings #2566 #2462 #1826
-# Only on Windows platform it might fail, because other platforms use UTF8 as the native encoding.
-DT <- fread(file = testDir("issue_2566.csv"), encoding = "UTF-8")
-# `fread` return a utf-8 encoded data, we should convert x to native encoding.
-# However, we need this condition to ensure the native encoding can be used for Chinese characters.
-# Otherwise, the test will fail because the strings have been damaged.
-if (identical(enc2utf8(enc2native(DT$x)), DT$x)) DT[, x:= enc2native(x)]
-setkey(DT, x)
-test(1864.1, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L)
-setkey(DT, y)
-test(1864.2, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L)
+utf8_strings <- c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de")
+latin1_strings <- iconv(utf8_strings, from = "UTF-8", to = "latin1")
+mixed_strings <- c(utf8_strings, latin1_strings)
+DT1 <- data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10)
+DT2 <- copy(DT1)
+setkey(DT1, x)
+setkey(DT2, y)
+test(1864.1, DT1$x, sort(c(utf8_strings, utf8_strings), method = "radix"))
+test(1864.2, DT2$y, sort(c(utf8_strings, utf8_strings), method = "radix"))
+test(1864.3, DT1[J(utf8_strings)], DT1[J(latin1_strings)])
+test(1864.4, DT2[J(utf8_strings)], DT2[J(latin1_strings)])
+test(1864.5, DT1[J(utf8_strings)], DT2[J(latin1_strings)])
+test(1864.6, DT2[J(utf8_strings)], DT1[J(latin1_strings)])
+test(1864.7, as.data.frame(DT1), as.data.frame(DT2))
 
 # memory exception under asan if there's an extra comma out-of-sample, #2523
 data = rep("a,b,c,d,e,f,g", 2100)

From 77f90dd57d767ae2fdc3917c10878fd11a759bd5 Mon Sep 17 00:00:00 2001
From: shrektan <shrektan@126.com>
Date: Sat, 31 Mar 2018 12:39:33 +0800
Subject: [PATCH 2/4] change the test pattern as request

---
 inst/tests/tests.Rraw | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index ce0dffafaf..e461936faa 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11146,20 +11146,19 @@ DT = data.table(col1 = c(1,1,1, 2,2,2), col2 = c(2,2,2,1,1,1), ID = c(rep(1,3),
 test(1863.2, DT[, lapply(.SD, var), by=ID], data.table(ID=c(1,2), col1=0, col2=0, key="ID"))
 
 # Fix the bug when keys contain non UTF8 strings #2566 #2462 #1826
-utf8_strings <- c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de")
-latin1_strings <- iconv(utf8_strings, from = "UTF-8", to = "latin1")
-mixed_strings <- c(utf8_strings, latin1_strings)
-DT1 <- data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10)
-DT2 <- copy(DT1)
+utf8_strings = c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de")
+latin1_strings = iconv(utf8_strings, from = "UTF-8", to = "latin1")
+mixed_strings = c(utf8_strings, latin1_strings)
+DT1 = data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10)
+DT2 = copy(DT1)
 setkey(DT1, x)
 setkey(DT2, y)
-test(1864.1, DT1$x, sort(c(utf8_strings, utf8_strings), method = "radix"))
-test(1864.2, DT2$y, sort(c(utf8_strings, utf8_strings), method = "radix"))
-test(1864.3, DT1[J(utf8_strings)], DT1[J(latin1_strings)])
-test(1864.4, DT2[J(utf8_strings)], DT2[J(latin1_strings)])
-test(1864.5, DT1[J(utf8_strings)], DT2[J(latin1_strings)])
-test(1864.6, DT2[J(utf8_strings)], DT1[J(latin1_strings)])
-test(1864.7, as.data.frame(DT1), as.data.frame(DT2))
+ans = sort(c(utf8_strings, utf8_strings), method = "radix")
+test(1864.1, DT1$x, ans)
+test(1864.2, DT2$y, ans)
+ans = c(1L, 6L, 2L, 7L, 3L, 8L, 4L, 9L, 5L, 10L)
+test(1864.3, DT1[c(utf8_strings, latin1_strings), z], c(ans, ans))
+test(1864.4, DT2[c(utf8_strings, latin1_strings), z], c(ans, ans))
 
 # memory exception under asan if there's an extra comma out-of-sample, #2523
 data = rep("a,b,c,d,e,f,g", 2100)

From 6d04de3e5109a2924c68d5db3a326a425471df2a Mon Sep 17 00:00:00 2001
From: shrektan <shrektan@126.com>
Date: Fri, 6 Apr 2018 10:00:21 +0800
Subject: [PATCH 3/4] replace the radix sort answer

---
 inst/tests/tests.Rraw | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index f66783af93..5a6fcdb38d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11153,7 +11153,10 @@ DT1 = data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:1
 DT2 = copy(DT1)
 setkey(DT1, x)
 setkey(DT2, y)
-ans = sort(c(utf8_strings, utf8_strings), method = "radix")
+# the ans is generated by `sort(c(utf8_strings, utf8_strings), method = "radix")`
+# but we should not use radix sort in the test because it's introduced after R3.3.0
+ans = c("El. pa\u00c5\u00a1tas", "El. pa\u00c5\u00a1tas", "fa\u00e7ile", "fa\u00e7ile",
+"\u00a1tas", "\u00a1tas", "\u00de", "\u00de", "\u00e7ile", "\u00e7ile")
 test(1864.1, DT1$x, ans)
 test(1864.2, DT2$y, ans)
 ans = c(1L, 6L, 2L, 7L, 3L, 8L, 4L, 9L, 5L, 10L)

From 7523b2499c9adce8a498c1fcb0898ecac7ea0d04 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Fri, 6 Apr 2018 19:08:27 -0700
Subject: [PATCH 4/4] Aside: restore coverage of fread(file=) with correct
 existing file

---
 inst/tests/tests.Rraw | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 5a6fcdb38d..0769d40f37 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -5442,7 +5442,7 @@ test(1377.8, copy(DT)[FALSE, bar:=stop("eval'd")], DT)
 # therefore, this doesn't actually test mode="wb" but close as we can get
 # NB: As of v1.10.5, fread copes ok with any number of \r before the \n
 
-test(1378.1, fread(testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97)
+test(1378.1, fread(file=testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97)
 
 f = paste0("file://",testDir("russellCRLF.csv"))
 # simulates a http:// request as far as file.download() and unlink() goes, without internet