From e5f77436b7302e0d2aad8b4e96802eef650b46db Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 31 Mar 2018 09:55:36 +0800 Subject: [PATCH 1/4] improve test 1864 by using latin1 encoded strings, so it can be tested on all the platforms --- inst/tests/issue_2566.csv | 6 ------ inst/tests/tests.Rraw | 24 ++++++++++++++---------- 2 files changed, 14 insertions(+), 16 deletions(-) delete mode 100644 inst/tests/issue_2566.csv diff --git a/inst/tests/issue_2566.csv b/inst/tests/issue_2566.csv deleted file mode 100644 index a9f5448d3d..0000000000 --- a/inst/tests/issue_2566.csv +++ /dev/null @@ -1,6 +0,0 @@ -x,y,z -公允价值变动损益,公允价值变动损益,1 -红利收入,红利收入,2 -价差收入,价差收入,3 -其他业务支出,其他业务支出,4 -资产减值损失,资产减值损失,5 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4a256381b2..ce0dffafaf 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11146,16 +11146,20 @@ DT = data.table(col1 = c(1,1,1, 2,2,2), col2 = c(2,2,2,1,1,1), ID = c(rep(1,3), test(1863.2, DT[, lapply(.SD, var), by=ID], data.table(ID=c(1,2), col1=0, col2=0, key="ID")) # Fix the bug when keys contain non UTF8 strings #2566 #2462 #1826 -# Only on Windows platform it might fail, because other platforms use UTF8 as the native encoding. -DT <- fread(file = testDir("issue_2566.csv"), encoding = "UTF-8") -# `fread` return a utf-8 encoded data, we should convert x to native encoding. -# However, we need this condition to ensure the native encoding can be used for Chinese characters. -# Otherwise, the test will fail because the strings have been damaged. -if (identical(enc2utf8(enc2native(DT$x)), DT$x)) DT[, x:= enc2native(x)] -setkey(DT, x) -test(1864.1, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L) -setkey(DT, y) -test(1864.2, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L) +utf8_strings <- c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de") +latin1_strings <- iconv(utf8_strings, from = "UTF-8", to = "latin1") +mixed_strings <- c(utf8_strings, latin1_strings) +DT1 <- data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10) +DT2 <- copy(DT1) +setkey(DT1, x) +setkey(DT2, y) +test(1864.1, DT1$x, sort(c(utf8_strings, utf8_strings), method = "radix")) +test(1864.2, DT2$y, sort(c(utf8_strings, utf8_strings), method = "radix")) +test(1864.3, DT1[J(utf8_strings)], DT1[J(latin1_strings)]) +test(1864.4, DT2[J(utf8_strings)], DT2[J(latin1_strings)]) +test(1864.5, DT1[J(utf8_strings)], DT2[J(latin1_strings)]) +test(1864.6, DT2[J(utf8_strings)], DT1[J(latin1_strings)]) +test(1864.7, as.data.frame(DT1), as.data.frame(DT2)) # memory exception under asan if there's an extra comma out-of-sample, #2523 data = rep("a,b,c,d,e,f,g", 2100) From 77f90dd57d767ae2fdc3917c10878fd11a759bd5 Mon Sep 17 00:00:00 2001 From: shrektan Date: Sat, 31 Mar 2018 12:39:33 +0800 Subject: [PATCH 2/4] change the test pattern as request --- inst/tests/tests.Rraw | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ce0dffafaf..e461936faa 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11146,20 +11146,19 @@ DT = data.table(col1 = c(1,1,1, 2,2,2), col2 = c(2,2,2,1,1,1), ID = c(rep(1,3), test(1863.2, DT[, lapply(.SD, var), by=ID], data.table(ID=c(1,2), col1=0, col2=0, key="ID")) # Fix the bug when keys contain non UTF8 strings #2566 #2462 #1826 -utf8_strings <- c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de") -latin1_strings <- iconv(utf8_strings, from = "UTF-8", to = "latin1") -mixed_strings <- c(utf8_strings, latin1_strings) -DT1 <- data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10) -DT2 <- copy(DT1) +utf8_strings = c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de") +latin1_strings = iconv(utf8_strings, from = "UTF-8", to = "latin1") +mixed_strings = c(utf8_strings, latin1_strings) +DT1 = data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10) +DT2 = copy(DT1) setkey(DT1, x) setkey(DT2, y) -test(1864.1, DT1$x, sort(c(utf8_strings, utf8_strings), method = "radix")) -test(1864.2, DT2$y, sort(c(utf8_strings, utf8_strings), method = "radix")) -test(1864.3, DT1[J(utf8_strings)], DT1[J(latin1_strings)]) -test(1864.4, DT2[J(utf8_strings)], DT2[J(latin1_strings)]) -test(1864.5, DT1[J(utf8_strings)], DT2[J(latin1_strings)]) -test(1864.6, DT2[J(utf8_strings)], DT1[J(latin1_strings)]) -test(1864.7, as.data.frame(DT1), as.data.frame(DT2)) +ans = sort(c(utf8_strings, utf8_strings), method = "radix") +test(1864.1, DT1$x, ans) +test(1864.2, DT2$y, ans) +ans = c(1L, 6L, 2L, 7L, 3L, 8L, 4L, 9L, 5L, 10L) +test(1864.3, DT1[c(utf8_strings, latin1_strings), z], c(ans, ans)) +test(1864.4, DT2[c(utf8_strings, latin1_strings), z], c(ans, ans)) # memory exception under asan if there's an extra comma out-of-sample, #2523 data = rep("a,b,c,d,e,f,g", 2100) From 6d04de3e5109a2924c68d5db3a326a425471df2a Mon Sep 17 00:00:00 2001 From: shrektan Date: Fri, 6 Apr 2018 10:00:21 +0800 Subject: [PATCH 3/4] replace the radix sort answer --- inst/tests/tests.Rraw | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index f66783af93..5a6fcdb38d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11153,7 +11153,10 @@ DT1 = data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:1 DT2 = copy(DT1) setkey(DT1, x) setkey(DT2, y) -ans = sort(c(utf8_strings, utf8_strings), method = "radix") +# the ans is generated by `sort(c(utf8_strings, utf8_strings), method = "radix")` +# but we should not use radix sort in the test because it's introduced after R3.3.0 +ans = c("El. pa\u00c5\u00a1tas", "El. pa\u00c5\u00a1tas", "fa\u00e7ile", "fa\u00e7ile", +"\u00a1tas", "\u00a1tas", "\u00de", "\u00de", "\u00e7ile", "\u00e7ile") test(1864.1, DT1$x, ans) test(1864.2, DT2$y, ans) ans = c(1L, 6L, 2L, 7L, 3L, 8L, 4L, 9L, 5L, 10L) From 7523b2499c9adce8a498c1fcb0898ecac7ea0d04 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Fri, 6 Apr 2018 19:08:27 -0700 Subject: [PATCH 4/4] Aside: restore coverage of fread(file=) with correct existing file --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 5a6fcdb38d..0769d40f37 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5442,7 +5442,7 @@ test(1377.8, copy(DT)[FALSE, bar:=stop("eval'd")], DT) # therefore, this doesn't actually test mode="wb" but close as we can get # NB: As of v1.10.5, fread copes ok with any number of \r before the \n -test(1378.1, fread(testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97) +test(1378.1, fread(file=testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97) f = paste0("file://",testDir("russellCRLF.csv")) # simulates a http:// request as far as file.download() and unlink() goes, without internet