diff --git a/NEWS.md b/NEWS.md index ab02356e8d..a9f4815be0 100644 --- a/NEWS.md +++ b/NEWS.md @@ -14,6 +14,9 @@ 5. `fwrite()` now accepts `matrix`, (#2613)[https://github.com/Rdatatable/data.table/issues/2613]. Thanks to Michael Chirico for the suggestion and Felipe Parages for implementing. For now matrix input is converted to data.table (which can be costly) before writing. +6. `fread()` and `fwrite()` can now handle file names in native and UTF-8 encoding, [#3078](https://github.com/Rdatatable/data.table/issues/3078). Thanks to Daniel Possenriede (@dpprdan) for reporting and fixing. + + #### BUG FIXES 1. Providing an `i` subset expression when attempting to delete a column correctly failed with helpful error, but when the column was missing too created a new column full of `NULL` values, [#3089](https://github.com/Rdatatable/data.table/issues/3089). Thanks to Michael Chirico for reporting. diff --git a/R/fread.R b/R/fread.R index 30212e13b4..1dd4c0cd9d 100644 --- a/R/fread.R +++ b/R/fread.R @@ -106,6 +106,8 @@ fread <- function(input="",file=NULL,text=NULL,cmd=NULL,sep="auto",sep2="auto",d file = decompFile # don't use 'tmpFile' symbol again, as tmpFile might be the http://domain.org/file.csv.gz download on.exit(unlink(decompFile), add=TRUE) } + file = enc2native(file) # CfreadR cannot handle UTF-8 if that is not the native encoding, see #3078. + input = file } if (!missing(autostart)) warning("'autostart' is now deprecated and ignored. Consider skip='string' or skip=n"); diff --git a/R/fwrite.R b/R/fwrite.R index 5bf6ae2c01..9f918c46da 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -68,6 +68,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", return(invisible()) } } + file <- enc2native(file) # CfwriteR cannot handle UTF-8 if that is not the native encoding, see #3078. .Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append, row.names, col.names, logical01, dateTimeAs, buffMB, nThread, showProgress, verbose) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index d89bfedd3b..f6a0ca5179 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -12498,6 +12498,30 @@ test(1964.4, colnames(DT), c("v1","v2")) test(1965, setDT(list(1, 1:2)), error = 'profile of input lengths') #3121 +# fread/fwrite file name in native and utf-8 encoding, #3078 +if (.Platform$OS.type=="windows") { + f = tempfile("\u00f6"); cat("3.14", file = f) + fn = enc2native(f); f8 = enc2utf8(f) + test(1966.1, fread(fn), data.table(V1=3.14)) + test(1966.2, fread(f8), data.table(V1=3.14)) + unlink(c(fn, f8)) + + DT = data.table("a"); pth = tempdir() + f = "\u00f6.csv"; fp = file.path(pth, f) + fpn = enc2native(fp); fp8 = enc2utf8(fp) + fwrite(DT, fpn) + test(1966.3, list.files(path = pth, pattern = "\\.csv$"), f) + unlink(c(fp, file.path(pth, "\u00c3\u00b6.csv"))) + fwrite(DT, fp8) + test(1966.4, list.files(path = pth, pattern = "\\.csv$"), f) + unlink(c(fp, file.path(pth, "\u00c3\u00b6.csv"))) + p = file.path(pth, "\u00fc"); dir.create(p); f = tempfile(tmpdir = p) + test(1966.5, fwrite(DT, enc2native(f)), NULL) + unlink(f) + test(1966.6, fwrite(DT, enc2utf8(f)), NULL) + unlink(p, recursive = TRUE) +} + ################################### # Add new tests above this line #