Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions inst/tests/issue_2566.csv

This file was deleted.

28 changes: 17 additions & 11 deletions inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -5442,7 +5442,7 @@ test(1377.8, copy(DT)[FALSE, bar:=stop("eval'd")], DT)
# therefore, this doesn't actually test mode="wb" but close as we can get
# NB: As of v1.10.5, fread copes ok with any number of \r before the \n

test(1378.1, fread(testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97)
test(1378.1, fread(file=testDir("russellCRLF.csv"))[19,`Value With Dividends`], 357.97)

f = paste0("file://",testDir("russellCRLF.csv"))
# simulates a http:// request as far as file.download() and unlink() goes, without internet
Expand Down Expand Up @@ -11146,16 +11146,22 @@ DT = data.table(col1 = c(1,1,1, 2,2,2), col2 = c(2,2,2,1,1,1), ID = c(rep(1,3),
test(1863.2, DT[, lapply(.SD, var), by=ID], data.table(ID=c(1,2), col1=0, col2=0, key="ID"))

# Fix the bug when keys contain non UTF8 strings #2566 #2462 #1826
# Only on Windows platform it might fail, because other platforms use UTF8 as the native encoding.
DT <- fread(file = testDir("issue_2566.csv"), encoding = "UTF-8")
# `fread` return a utf-8 encoded data, we should convert x to native encoding.
# However, we need this condition to ensure the native encoding can be used for Chinese characters.
# Otherwise, the test will fail because the strings have been damaged.
if (identical(enc2utf8(enc2native(DT$x)), DT$x)) DT[, x:= enc2native(x)]
setkey(DT, x)
test(1864.1, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L)
setkey(DT, y)
test(1864.2, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L)
utf8_strings = c("\u00e7ile", "fa\u00e7ile", "El. pa\u00c5\u00a1tas", "\u00a1tas", "\u00de")
latin1_strings = iconv(utf8_strings, from = "UTF-8", to = "latin1")
mixed_strings = c(utf8_strings, latin1_strings)
DT1 = data.table(x = mixed_strings, y = c(latin1_strings, utf8_strings), z = 1:10)
DT2 = copy(DT1)
setkey(DT1, x)
setkey(DT2, y)
# the ans is generated by `sort(c(utf8_strings, utf8_strings), method = "radix")`
# but we should not use radix sort in the test because it's introduced after R3.3.0
ans = c("El. pa\u00c5\u00a1tas", "El. pa\u00c5\u00a1tas", "fa\u00e7ile", "fa\u00e7ile",
"\u00a1tas", "\u00a1tas", "\u00de", "\u00de", "\u00e7ile", "\u00e7ile")
test(1864.1, DT1$x, ans)
test(1864.2, DT2$y, ans)
ans = c(1L, 6L, 2L, 7L, 3L, 8L, 4L, 9L, 5L, 10L)
test(1864.3, DT1[c(utf8_strings, latin1_strings), z], c(ans, ans))
test(1864.4, DT2[c(utf8_strings, latin1_strings), z], c(ans, ans))

# memory exception under asan if there's an extra comma out-of-sample, #2523
data = rep("a,b,c,d,e,f,g", 2100)
Expand Down