diff --git a/NEWS.md b/NEWS.md index e7f3acd890..6fa4e801fb 100644 --- a/NEWS.md +++ b/NEWS.md @@ -172,6 +172,8 @@ 32. `test.data.table()` would fail on test 1894 if the variable `z` was defined by the user, [#3705](https://github.com/Rdatatable/data.table/issues/3705). The test suite already ran in its own separate environment. That environment's parent is no longer `.GlobalEnv` to isolate it further. Thanks to Michael Chirico for reporting, and Matt Dowle for the PR. +33. `fread(text="a,b,c")` (where input data contains no `\n` but `text=` has been used) now works instead of error `file not found: a,b,c`, [#4689](https://github.com/Rdatatable/data.table/issues/4689). Thanks to @trainormg for reporting, and @ben-schwen for the PR. + ## NOTES 1. New feature 29 in v1.12.4 (Oct 2019) introduced zero-copy coercion. Our thinking is that requiring you to get the type right in the case of `0` (type double) vs `0L` (type integer) is too inconvenient for you the user. So such coercions happen in `data.table` automatically without warning. Thanks to zero-copy coercion there is no speed penalty, even when calling `set()` many times in a loop, so there's no speed penalty to warn you about either. However, we believe that assigning a character value such as `"2"` into an integer column is more likely to be a user mistake that you would like to be warned about. The type difference (character vs integer) may be the only clue that you have selected the wrong column, or typed the wrong variable to be assigned to that column. For this reason we view character to numeric-like coercion differently and will warn about it. If it is correct, then the warning is intended to nudge you to wrap the RHS with `as.()` so that it is clear to readers of your code that a coercion from character to that type is intended. For example : diff --git a/R/fread.R b/R/fread.R index 1bc0267b34..12f46b57ea 100644 --- a/R/fread.R +++ b/R/fread.R @@ -262,7 +262,7 @@ yaml=FALSE, autostart=NA, tmpdir=tempdir(), tz="UTC") if (identical(tt,"") || is_utc(tt)) # empty TZ env variable ("") means UTC in C library, unlike R; _unset_ TZ means local tz="UTC" } - ans = .Call(CfreadR,input,sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip, + ans = .Call(CfreadR,input,identical(input,file),sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip, fill,showProgress,nThread,verbose,warnings2errors,logical01,select,drop,colClasses,integer64,encoding,keepLeadingZeros,tz=="UTC") if (!length(ans)) return(null.data.table()) # test 1743.308 drops all columns nr = length(ans[[1L]]) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 56ee3613a7..f5dec62a41 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -10417,6 +10417,7 @@ dir.create(d) test(1703.17, fread(text=c('a,b','1,2'), tmpdir=d), data.table(a=1L,b=2L)) test(1703.18, fread(text=c('a,b','1,2')), data.table(a=1L, b=2L)) unlink(d) +test(1703.19, fread(text="a b c"), data.table(a=logical(), b=logical(), c=logical())) # text= with no \n, #4689 # Ensure all.equal respects 'check.attributes' w.r.t. column names. As testthat::check_equivalent relies on this # as used by package popEpi in its tests @@ -17909,3 +17910,4 @@ test(2203.20, tstrsplit(w, "/", type.convert=list()), error="not support empty l # set rownames as key directly in as.data.table, #4468 test(2204, as.data.table(mtcars, keep.rownames='model', key='model'), setnames(setkey(as.data.table(mtcars, keep.rownames = TRUE), rn), 'rn', 'model')) + diff --git a/src/freadR.c b/src/freadR.c index 842baf00a3..97fe691aa1 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -50,6 +50,7 @@ static bool oldNoDateTime = false; SEXP freadR( // params passed to freadMain SEXP inputArg, + SEXP isFileNameArg, SEXP sepArg, SEXP decArg, SEXP quoteArg, @@ -81,22 +82,20 @@ SEXP freadR( freadMainArgs args; ncol = 0; dtnrows = 0; - const char *ch, *ch2; + if (!isString(inputArg) || LENGTH(inputArg)!=1) error(_("Internal error: freadR input not a single character string: a filename or the data itself. Should have been caught at R level.")); // # nocov - ch = ch2 = (const char *)CHAR(STRING_ELT(inputArg,0)); - while (*ch2!='\n' && *ch2!='\r' && *ch2!='\0') ch2++; - args.input = (*ch2=='\0') ? R_ExpandFileName(ch) : ch; // for convenience so user doesn't have to call path.expand() - - ch = args.input; - while (*ch!='\0' && *ch!='\n' && *ch!='\r') ch++; - if (*ch!='\0' || args.input[0]=='\0') { - if (verbose) DTPRINT(_("Input contains a \\n or is \")\". Taking this to be text input (not a filename)\n")); - args.filename = NULL; - } else { - if (verbose) DTPRINT(_("Input contains no \\n. Taking this to be a filename to open\n")); - args.filename = args.input; + const char *ch = (const char *)CHAR(STRING_ELT(inputArg,0)); + if (!isLogical(isFileNameArg) || LENGTH(isFileNameArg)!=1 || LOGICAL(isFileNameArg)[0]==NA_LOGICAL) + error(_("Internal error: freadR isFileNameArg not TRUE or FALSE")); // # nocov + if (LOGICAL(isFileNameArg)[0]) { + if (verbose) DTPRINT(_("freadR.c has been passed a filename: %s\n"), ch); + args.filename = R_ExpandFileName(ch); // for convenience so user doesn't have to call path.expand() args.input = NULL; + } else { + if (verbose) DTPRINT(_("freadR.c has been passed the data as text input (not a filename)\n")); + args.filename = NULL; + args.input = ch; } if (!isString(sepArg) || LENGTH(sepArg)!=1 || strlen(CHAR(STRING_ELT(sepArg,0)))>1)