diff --git a/inst/tests/issue_2566.csv b/inst/tests/issue_2566.csv deleted file mode 100644 index a9f5448d3d..0000000000 --- a/inst/tests/issue_2566.csv +++ /dev/null @@ -1,6 +0,0 @@ -x,y,z -公允价值变动损益,公允价值变动损益,1 -红利收入,红利收入,2 -价差收入,价差收入,3 -其他业务支出,其他业务支出,4 -资产减值损失,资产减值损失,5 diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ef81215f19..0769d40f37 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -5442,7 +5442,7 @@ test(1377.8, copy(DT)[FALSE, bar:=stop("eval'd")], DT) # therefore, this doesn't actually test mode="wb" but close as we can get # NB: As of v1.10.5, fread copes ok with any number of \r before the \n -test(1378.1, fread(testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97) +test(1378.1, fread(file=testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97) f = paste0("file://",testDir("russellCRLF.csv")) # simulates a http:// request as far as file.download() and unlink() goes, without internet @@ -11146,16 +11146,22 @@ DT = data.table(col1 = c(1,1,1, 2,2,2), col2 = c(2,2,2,1,1,1), ID = c(rep(1,3), test(1863.2, DT[, lapply(.SD, var), by=ID], data.table(ID=c(1,2), col1=0, col2=0, key="ID")) # Fix the bug when keys contain non UTF8 strings #2566 #2462 #1826 -# Only on Windows platform it might fail, because other platforms use UTF8 as the native encoding. -DT <- fread(file = testDir("issue_2566.csv"), encoding = "UTF-8") -# `fread` return a utf-8 encoded data, we should convert x to native encoding. -# However, we need this condition to ensure the native encoding can be used for Chinese characters. -# Otherwise, the test will fail because the strings have been damaged. -if (identical(enc2utf8(enc2native(DT$x)), DT$x)) DT[, x:= enc2native(x)] -setkey(DT, x) -test(1864.1, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L) -setkey(DT, y) -test(1864.2, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L) +utf8_strings = c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de") +latin1_strings = iconv(utf8_strings, from = "UTF-8", to = "latin1") +mixed_strings = c(utf8_strings, latin1_strings) +DT1 = data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10) +DT2 = copy(DT1) +setkey(DT1, x) +setkey(DT2, y) +# the ans is generated by `sort(c(utf8_strings, utf8_strings), method = "radix")` +# but we should not use radix sort in the test because it's introduced after R3.3.0 +ans = c("El. pa\u00c5\u00a1tas", "El. pa\u00c5\u00a1tas", "fa\u00e7ile", "fa\u00e7ile", +"\u00a1tas", "\u00a1tas", "\u00de", "\u00de", "\u00e7ile", "\u00e7ile") +test(1864.1, DT1$x, ans) +test(1864.2, DT2$y, ans) +ans = c(1L, 6L, 2L, 7L, 3L, 8L, 4L, 9L, 5L, 10L) +test(1864.3, DT1[c(utf8_strings, latin1_strings), z], c(ans, ans)) +test(1864.4, DT2[c(utf8_strings, latin1_strings), z], c(ans, ans)) # memory exception under asan if there's an extra comma out-of-sample, #2523 data = rep("a,b,c,d,e,f,g", 2100)