diff --git a/NEWS.md b/NEWS.md index 420cadc484..e0ea1222ea 100644 --- a/NEWS.md +++ b/NEWS.md @@ -6,6 +6,8 @@ ## BUG FIXES +1. If `fread()` discards a single line footer, the warning message which includes the discarded text now displays any non-ASCII characters correctly on Windows, [#4747](https://github.com/Rdatatable/data.table/issues/4747). Thanks to @shrektan for reporting and the PR. + ## NOTES diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 390fadd0f3..a6013a3a11 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17246,3 +17246,15 @@ class(e) = "foo" dt = data.table(id=1, funs=list(e)) test(2161.2, dt[, .(funs), by=id], dt) +# fread message display non-ASCII messages correctly, #4747 +x = "fa\u00e7ile"; Encoding(x) = "UTF-8" +# should only run this test if the native encoding can represent latin1 correctly +if (identical(x, enc2native(x))) { + txt = enc2utf8(sprintf("A,B\n%s,%s\n%s", x, x, x)) + txt2 = iconv(txt, "UTF-8", "latin1") + out = data.table(A = x, B = x) + test(2162.1, fread(text = txt, encoding = 'UTF-8'), out, + warning="Discarded single-line footer: <>") + test(2162.2, fread(text = txt2, encoding = 'Latin-1'), out, + warning="Discarded single-line footer: <>") +} diff --git a/src/fread.c b/src/fread.c index 5b9bac3f03..7b1ba6df03 100644 --- a/src/fread.c +++ b/src/fread.c @@ -2575,7 +2575,7 @@ int freadMain(freadMainArgs _args) { if (ch==eof) { // whitespace at the end of the file is always skipped ok } else { - const char *skippedFooter = ch; + const char *skippedFooter = ENC2NATIVE(ch); // detect if it's a single line footer. Commonly the row count from SQL queries. while (ch +#include #include "po.h" #define FREAD_MAIN_ARGS_EXTRA_FIELDS \