From f7e8252ca92b438aa40e8ae9685d573a1ee6deba Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Fri, 11 Sep 2020 20:42:48 +0200 Subject: [PATCH 1/6] add fread support for text= single line without \r or \n --- R/fread.R | 2 ++ inst/tests/tests.Rraw | 2 ++ 2 files changed, 4 insertions(+) diff --git a/R/fread.R b/R/fread.R index 95e5c4a45a..9bc49fd736 100644 --- a/R/fread.R +++ b/R/fread.R @@ -41,6 +41,8 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="") } else { # avoid creating a tempfile() for single strings, which can be done a lot; e.g. in the test suite. input = text + if (!length(grep("\\n|\\r", input))) + input = paste0(input, "\n") } } else if (is.null(cmd)) { diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index c966a5efc6..508937330d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -17127,3 +17127,5 @@ test(2151, dcast(DT, 1 ~ a, value.var='survmean'), data.table('.'='.', s=1L, x=2 y = person(given='Joel', family='Mossong') test(2152, copy(y), y) +DT = data.table(a = logical(), b = logical(), c = logical()) +test(2153, fread(text = "a b c"), DT) From 32898720102a931da7d1fb2c6e3fca3ecf627f51 Mon Sep 17 00:00:00 2001 From: ben-schwen Date: Fri, 11 Sep 2020 21:22:14 +0200 Subject: [PATCH 2/6] adds issue as comment --- R/fread.R | 1 + 1 file changed, 1 insertion(+) diff --git a/R/fread.R b/R/fread.R index 9bc49fd736..02e2300554 100644 --- a/R/fread.R +++ b/R/fread.R @@ -41,6 +41,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="") } else { # avoid creating a tempfile() for single strings, which can be done a lot; e.g. in the test suite. input = text + # adds a newline to single strings without \n or \r to be recognized as text and not as file #4689 if (!length(grep("\\n|\\r", input))) input = paste0(input, "\n") } From 125a968f0697b497c5e2663cc8e0e01245bbc9df Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Wed, 18 Aug 2021 14:53:27 -0600 Subject: [PATCH 3/6] avoid paste0(input, n) in case of large input --- R/fread.R | 5 +---- src/freadR.c | 25 ++++++++++++------------- 2 files changed, 13 insertions(+), 17 deletions(-) diff --git a/R/fread.R b/R/fread.R index 7aa62e5e27..12f46b57ea 100644 --- a/R/fread.R +++ b/R/fread.R @@ -46,9 +46,6 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") } else { # avoid creating a tempfile() for single strings, which can be done a lot; e.g. in the test suite. input = text - # adds a newline to single strings without \n or \r to be recognized as text and not as file #4689 - if (!length(grep("\\n|\\r", input))) - input = paste0(input, "\n") } } else if (is.null(cmd)) { @@ -265,7 +262,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (identical(tt,"") || is_utc(tt)) # empty TZ env variable ("") means UTC in C library, unlike R; _unset_ TZ means local tz="UTC" } - ans = .Call(CfreadR,input,sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip, + ans = .Call(CfreadR,input,identical(input,file),sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip, fill,showProgress,nThread,verbose,warnings2errors,logical01,select,drop,colClasses,integer64,encoding,keepLeadingZeros,tz=="UTC") if (!length(ans)) return(null.data.table()) # test 1743.308 drops all columns nr = length(ans[[1L]]) diff --git a/src/freadR.c b/src/freadR.c index 842baf00a3..97fe691aa1 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -50,6 +50,7 @@ static bool oldNoDateTime = false; SEXP freadR( // params passed to freadMain SEXP inputArg, + SEXP isFileNameArg, SEXP sepArg, SEXP decArg, SEXP quoteArg, @@ -81,22 +82,20 @@ SEXP freadR( freadMainArgs args; ncol = 0; dtnrows = 0; - const char *ch, *ch2; + if (!isString(inputArg) || LENGTH(inputArg)!=1) error(_("Internal error: freadR input not a single character string: a filename or the data itself. Should have been caught at R level.")); // # nocov - ch = ch2 = (const char *)CHAR(STRING_ELT(inputArg,0)); - while (*ch2!='\n' && *ch2!='\r' && *ch2!='\0') ch2++; - args.input = (*ch2=='\0') ? R_ExpandFileName(ch) : ch; // for convenience so user doesn't have to call path.expand() - - ch = args.input; - while (*ch!='\0' && *ch!='\n' && *ch!='\r') ch++; - if (*ch!='\0' || args.input[0]=='\0') { - if (verbose) DTPRINT(_("Input contains a \\n or is \")\". Taking this to be text input (not a filename)\n")); - args.filename = NULL; - } else { - if (verbose) DTPRINT(_("Input contains no \\n. Taking this to be a filename to open\n")); - args.filename = args.input; + const char *ch = (const char *)CHAR(STRING_ELT(inputArg,0)); + if (!isLogical(isFileNameArg) || LENGTH(isFileNameArg)!=1 || LOGICAL(isFileNameArg)[0]==NA_LOGICAL) + error(_("Internal error: freadR isFileNameArg not TRUE or FALSE")); // # nocov + if (LOGICAL(isFileNameArg)[0]) { + if (verbose) DTPRINT(_("freadR.c has been passed a filename: %s\n"), ch); + args.filename = R_ExpandFileName(ch); // for convenience so user doesn't have to call path.expand() args.input = NULL; + } else { + if (verbose) DTPRINT(_("freadR.c has been passed the data as text input (not a filename)\n")); + args.filename = NULL; + args.input = ch; } if (!isString(sepArg) || LENGTH(sepArg)!=1 || strlen(CHAR(STRING_ELT(sepArg,0)))>1) From df4f1d7c8e3a06ad947fb2f2c841828fc8369df2 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Wed, 18 Aug 2021 14:57:24 -0600 Subject: [PATCH 4/6] move test up --- inst/tests/tests.Rraw | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index ca5c8fb05e..4ca2fb45fe 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10417,6 +10417,7 @@ dir.create(d) test(1703.17, fread(text=c('a,b','1,2'), tmpdir=d), data.table(a=1L,b=2L)) test(1703.18, fread(text=c('a,b','1,2')), data.table(a=1L, b=2L)) unlink(d) +test(1703.19, fread(text="a b c"), data.table(a=logical(), b=logical(), c=logical())) # text= with no ending \n, #4689 # Ensure all.equal respects 'check.attributes' w.r.t. column names. As testthat::check_equivalent relies on this # as used by package popEpi in its tests @@ -17910,5 +17911,3 @@ test(2203.20, tstrsplit(w, "/", type.convert=list()), error="not support empty l test(2204, as.data.table(mtcars, keep.rownames='model', key='model'), setnames(setkey(as.data.table(mtcars, keep.rownames = TRUE), rn), 'rn', 'model')) -# text= with no ending \n, #4689 -test(2205, fread(text="a b c"), data.table(a=logical(), b=logical(), c=logical())) From 013a2900fdd0c8b6a9611e4d9da5e337e1b2c242 Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Wed, 18 Aug 2021 15:07:30 -0600 Subject: [PATCH 5/6] add news item --- NEWS.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/NEWS.md b/NEWS.md index e7f3acd890..6fa4e801fb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -172,6 +172,8 @@ 32. `test.data.table()` would fail on test 1894 if the variable `z` was defined by the user, [#3705](https://github.com/Rdatatable/data.table/issues/3705). The test suite already ran in its own separate environment. That environment's parent is no longer `.GlobalEnv` to isolate it further. Thanks to Michael Chirico for reporting, and Matt Dowle for the PR. +33. `fread(text="a,b,c")` (where input data contains no `\n` but `text=` has been used) now works instead of error `file not found: a,b,c`, [#4689](https://github.com/Rdatatable/data.table/issues/4689). Thanks to @trainormg for reporting, and @ben-schwen for the PR. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : From 3328f6855dd8d77625e2be2292ad8de3d44b83bd Mon Sep 17 00:00:00 2001 From: Matt Dowle Date: Wed, 18 Aug 2021 15:33:54 -0600 Subject: [PATCH 6/6] refine test comment --- inst/tests/tests.Rraw | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4ca2fb45fe..f5dec62a41 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10417,7 +10417,7 @@ dir.create(d) test(1703.17, fread(text=c('a,b','1,2'), tmpdir=d), data.table(a=1L,b=2L)) test(1703.18, fread(text=c('a,b','1,2')), data.table(a=1L, b=2L)) unlink(d) -test(1703.19, fread(text="a b c"), data.table(a=logical(), b=logical(), c=logical())) # text= with no ending \n, #4689 +test(1703.19, fread(text="a b c"), data.table(a=logical(), b=logical(), c=logical())) # text= with no \n, #4689 # Ensure all.equal respects 'check.attributes' w.r.t. column names. As testthat::check_equivalent relies on this # as used by package popEpi in its tests