From 505a034125a59d6002e40b417d80740e2b9a9065 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 5 Feb 2018 11:53:21 -0800
Subject: [PATCH 01/14] Interim

---
 R/fread.R             |  9 ++++++---
 inst/tests/tests.Rraw | 13 ++++++++-----
 src/fread.c           | 38 +++++++++++++++++++++++++++++++-------
 src/freadR.c          |  8 +++-----
 4 files changed, 48 insertions(+), 20 deletions(-)

diff --git a/R/fread.R b/R/fread.R
index f6064bdcf7..6bf53829bf 100644
--- a/R/fread.R
+++ b/R/fread.R
@@ -1,5 +1,5 @@
 
-fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows=Inf,header="auto",na.strings="NA",stringsAsFactors=FALSE,verbose=getOption("datatable.verbose"),autostart=NA,skip=0,select=NULL,drop=NULL,colClasses=NULL,integer64=getOption("datatable.integer64"), col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, showProgress=interactive(),data.table=getOption("datatable.fread.datatable"),nThread=getDTthreads(),logical01=TRUE)
+fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows=Inf,header="auto",na.strings="NA",stringsAsFactors=FALSE,verbose=getOption("datatable.verbose"),skip="auto",select=NULL,drop=NULL,colClasses=NULL,integer64=getOption("datatable.integer64"), col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, showProgress=interactive(),data.table=getOption("datatable.fread.datatable"),nThread=getDTthreads(),logical01=TRUE,autostart=NA)
 {
   if (is.null(sep)) sep="\n"         # C level knows that \n means \r\n on Windows, for example
   else {
@@ -21,7 +21,6 @@ fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows=
   if (is.na(nrows) || nrows<0) nrows=Inf   # accept -1 to mean Inf, as read.table does
   if (identical(header,"auto")) header=NA
   stopifnot(isTrueFalseNA(header))
-  stopifnot(length(skip)==1L)
   stopifnot(is.numeric(nThread) && length(nThread)==1L)
   nThread=as.integer(nThread)
   stopifnot(nThread>=1)
@@ -87,7 +86,11 @@ fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows=
       colClasses = tapply(names(colClasses), colClasses, c, simplify=FALSE)
     }
   }
-  if (is.numeric(skip)) skip = as.integer(skip)
+  stopifnot(length(skip)==1L, !is.na(skip), is.character(skip) || is.numeric(skip))
+  if (skip=="auto") skip=-1L
+  # so, skip="string" so long as "string" is not "auto". The skip="auto" default best conveys something
+  # is automatic there (better than skip=-1 or skip=NA). skip="string" is rarely used, so ok to treat "auto" specially.
+  if (is.double(skip)) skip = as.integer(skip)
   warnings2errors = getOption("warn") >= 2
   ans = .Call(CfreadR,input,sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip,
               fill,showProgress,nThread,verbose,warnings2errors,logical01,select,drop,colClasses,integer64,encoding)
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 2d76a826c3..bc57f04975 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11510,11 +11510,14 @@ test(1870.1, fread("A,100,200\n,300,400\n,500,600"), data.table(V1=c("A","",""),
 test(1870.2, fread("A,100,\n,,\n,500,600"), data.table(V1=c("A","",""), V2=c(100L,NA,500L), V3=c(NA,NA,600L)))
 test(1870.3, fread("A,B,\n,,\n,500,3.4"), data.table(A=NA, B=c(NA,500L), V3=c(NA,3.4)))
 
-# nrows= now ignores errors after those rows as expected, #1267
-# txt = "V1, V2, V3\n1,2,3\nV4, V5, V6, V7\n4,5,6,7\n8,9,10,11\n"
-# fread(txt)
-# fread(txt, nrows = 1, header = TRUE, skip = 0)
-# fread("1,2,3\n1,2", nrows=1)
+# nrows= now ignores errors after those nrows as expected and skip= determines first row for sure, #1267
+txt = "V1, V2, V3\n2,3,4\nV4, V5, V6, V7\n4,5,6,7\n8,9,10,11\n"
+test(1871.1, fread(txt), data.table(V4=INT(4,8), V5=INT(5,9), V6=INT(6,10), V7=INT(7,11)))
+test(1871.2, fread(txt, nrows=1), data.table(V4=4L, V5=5L, V6=6L, V7=7L))
+test(1871.3, fread(txt, skip=0), ans<-data.table(V1=2L, V2=3L, V3=4L), warning="discarded line V4, V5")
+test(1871.4, fread(txt, skip=0, nrows=1), ans)
+test(1871.5, fread(txt, skip=0, nrows=1, header=TRUE), ans)
+test(1871.6, fread(txt, skip=0, nrows=1, header=FALSE), data.table(V1=c("V1","2"), V2=c("V2","3"), V3=c("V3","4")))
 # for ( i in 100:1) {
 #   lines <- paste0(paste(rep("1,2,3", i), collapse='\n'), "\n1,2")
 #   fread(lines, nrows=i)
diff --git a/src/fread.c b/src/fread.c
index fa59f3b3c5..a69c80d22b 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -1073,7 +1073,7 @@ int freadMain(freadMainArgs _args) {
       else
         DTPRINT("  None of the NAstrings look like numbers.\n");
     }
-    if (args.skipNrow) DTPRINT("  skip num lines = %llu\n", (llu)args.skipNrow);
+    if (args.skipNrow >= 0) DTPRINT("  skip num lines = %llu\n", (llu)args.skipNrow);
     if (args.skipString) DTPRINT("  skip to string = <<%s>>\n", args.skipString);
     DTPRINT("  show progress = %d\n", args.showProgress);
     DTPRINT("  0/1 column will be read as %s\n", args.logical01? "boolean" : "integer");
@@ -1269,6 +1269,7 @@ int freadMain(freadMainArgs _args) {
   //*********************************************************************************************
   const char *pos;   // Location where the actual data in the file begins
   int row1line = 1;  // The line number where the data starts. Normally row 1 is column names and row1line ends up == 2.
+  bool skipAuto = true;
   {
 
   // First, set 'LFpresent' for use by eol() to know if \r-only line ending is allowed, #2371
@@ -1298,12 +1299,14 @@ int freadMain(freadMainArgs _args) {
     if (verbose) DTPRINT("Found skip='%s' on line %llu. Taking this to be header row or first row of data.\n",
                          args.skipString, (llu)row1line);
     ch = pos;
+    skipAuto = false;
   }
-  // Skip the first `skipNrow` lines of input.
-  else if (args.skipNrow>0) {
+  // Skip the first `skipNrow` lines of input, including 0 to force the first line to be the start
+  else if (args.skipNrow >= 0) {
     while (ch<eof && row1line<=args.skipNrow) row1line+=(*ch++=='\n');
     if (ch>=eof) STOP("skip=%llu but the input only has %llu line%s", (llu)args.skipNrow, (llu)row1line, row1line>1?"s":"");
     pos = ch;
+    skipAuto = false;
   }
 
   // skip blank input at the start
@@ -1383,7 +1386,12 @@ int freadMain(freadMainArgs _args) {
       while (ch<eof && thisLine++<JUMPLINES) {
         int thisncol = countfields(&ch);   // using this sep and quote rule; moves ch to start of next line
         if (thisncol<0) { numFields[0]=-1; break; }  // invalid file with this sep and quote rule; abort
-        if (thisncol!=lastncol) { numFields[++i]=thisncol; lastncol=thisncol; } // new contiguous consistent ncol started
+        if (thisncol!=lastncol) {
+          if (!skipAuto && i==0) break;  // biggest contiguous group always starting on the line skip= landed on
+          // else new contiguous consistent ncol started
+          numFields[++i]=thisncol;
+          lastncol=thisncol;
+        }
         numLines[i]++;
       }
       if (numFields[0]==-1) continue;
@@ -1425,11 +1433,11 @@ int freadMain(freadMainArgs _args) {
   sep = topSep;
   whiteChar = (sep==' ' ? '\t' : (sep=='\t' ? ' ' : 0));
   ch = pos;
-  if (fill) {
+  if (fill || !skipAuto) {
     // start input from first populated line, already pos.
     ncol = topNmax;
   } else {
-    // find the top line with the consistent number of fields.  There might be irregular banner lines above it.
+    // find the top line with the consistent number of fields.  There might be irregular banner lines above it (skip="auto")
     ncol = topNumFields;
     int thisLine=-1;
     while (ch<eof && ++thisLine<JUMPLINES) {
@@ -1543,6 +1551,22 @@ int freadMain(freadMainArgs _args) {
     bool bumped = false;  // did this jump find any different types; to reduce verbose output to relevant lines
     bool skipThisJump = false;
     int jumpLine = 0;    // line from this jump point start
+
+
+    setting nrows= must turn off jump sampling (just use the first min(JUMPLINES,nrowLimit))  AND turn off multithreading because we can't have type bumps
+    in later chunks affecting the current jump (before we know whether this or the next jump will fill nrowLimit, while threads 7 and 8 are reading!)  This will much simplify
+    the horrid logic later where we struggled with allocnrow==nrowLimit.  Cancelling the error was wrong thing to do due to the possible wrong bumps.
+    All we need is an extra  'if nrowLimit reached then break;' in COLD section (and branch predicted)
+    Add to manual that using nrows= will turn off multithreading.
+
+    Read up to the error line, return up to there and then report the error line as warning.  In sampling too, not stop.
+    Add to ?fread that for production purposes, options(warn=2) should be turned on.
+    Or, report up to 5 lines at the end as warning, otherwise, error.
+
+    So the errors in the ordered section, can only be not-errors if it is the last chunk being processed. To ensure no later jump chunk bumped types. If so, they can be
+    warnings about stopping on that line.
+
+
     while(ch<eof && (jumpLine++<JUMPLINES || jump==nJumps-1)) {  // nJumps==1 implies sample all of input to eof; last jump to eof too
       const char *lineStart = ch;
       if (sep==' ') while (*ch==' ') ch++;  // multiple sep=' ' at the beginning of a line does not mean sep
@@ -1624,7 +1648,7 @@ int freadMain(freadMainArgs _args) {
     }
   }
 
-  if (args.header==NA_BOOL8 && prevStart!=NULL && args.skipNrow==0 && args.skipString==NULL) {
+  if (args.header==NA_BOOL8 && prevStart!=NULL && skipAuto) {
     // The first data row matches types in the row after that, and user didn't override default auto detection.
     // Maybe previous line (if there is one, prevStart!=NULL) contains column names but there are too few (which is why it didn't become the first data row).
     ch = prevStart;
diff --git a/src/freadR.c b/src/freadR.c
index 40746e010c..17da5a92d9 100644
--- a/src/freadR.c
+++ b/src/freadR.c
@@ -125,15 +125,13 @@ SEXP freadR(
   }
 
   args.logical01 = LOGICAL(logical01Arg)[0];
-  args.skipNrow=0;
+  args.skipNrow=-1;
   args.skipString=NULL;
   if (isString(skipArg)) {
     args.skipString = CHAR(STRING_ELT(skipArg,0));  // LENGTH==1 was checked at R level
-  } else if (isReal(skipArg)) {
-    if (R_FINITE(REAL(skipArg)[0]) && REAL(skipArg)[0]>0.0) args.skipNrow = (uint64_t)REAL(skipArg)[0];
   } else if (isInteger(skipArg)) {
-    if (INTEGER(skipArg)[0]>0) args.skipNrow = (uint64_t)INTEGER(skipArg)[0];
-  } else error("skip must be a single positive numeric (integer or double), or a string to search for");
+    args.skipNrow = (int64_t)INTEGER(skipArg)[0];
+  } else error("Internal error: skip not integer or string in freadR.c");
 
   if (!isNull(NAstringsArg) && !isString(NAstringsArg))
     error("'na.strings' is type '%s'.  Must be either NULL or a character vector.", type2char(TYPEOF(NAstringsArg)));

From 71b52e0789ca0431994434cea4ca4259c62b15f3 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Fri, 9 Feb 2018 11:39:19 -0800
Subject: [PATCH 02/14] Interim

---
 src/fread.c  | 70 +++++++++++++++++++++-------------------------------
 src/freadR.c |  2 +-
 2 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/src/fread.c b/src/fread.c
index a69c80d22b..dc249e8bd1 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -65,8 +65,6 @@ static bool skipEmptyLines=false, fill=false;
 
 static double NA_FLOAT64;  // takes fread.h:NA_FLOAT64_VALUE
 
-#define JUMPLINES 100    // at each of the 100 jumps how many lines to guess column types (10,000 sample lines)
-
 // Private globals so they can be cleaned up both on error and on successful return
 static void *mmp = NULL;
 static size_t fileSize;
@@ -166,6 +164,7 @@ bool freadCleanup(void)
 
 #define CEIL(x)  ((size_t)(double)ceil(x))
 static inline size_t umax(size_t a, size_t b) { return a > b ? a : b; }
+static inline size_t umin(size_t a, size_t b) { return a < b ? a : b; }
 static inline int imin(int a, int b) { return a < b ? a : b; }
 
 /** Return value of `x` clamped to the range [upper, lower] */
@@ -1333,6 +1332,7 @@ int freadMain(freadMainArgs _args) {
   int ncol;  // Detected number of columns in the file
   const char *firstJumpEnd=NULL; // remember where the winning jumpline from jump 0 ends, to know its size excluding header
   const char *prevStart = NULL;  // the start of the non-empty line before the first not-ignored row (for warning message later, or taking as column names)
+  int jumpLines = (int)umin(100,nrowLimit);   // how many lines from each jump point to use. If nrowLimit is supplied, nJumps is later set to 1 as well.
   {
   if (verbose) DTPRINT("[06] Detect separator, quoting rule, and ncolumns\n");
 
@@ -1364,26 +1364,28 @@ int freadMain(freadMainArgs _args) {
                             //   (when fill=true, the max is usually the header row and is the longest but there are more
                             //    lines of fewer)
 
-  // We will scan the input line-by-line (at most `JUMPLINES + 1` lines; "+1"
+  // We will scan the input line-by-line (at most 100+1 lines; "+1"
   // covers the header row, at this stage we don't know if it's present), and
   // detect the number of fields on each line. If several consecutive lines
   // have the same number of fields, we'll call them a "contiguous group of
   // lines". Arrays `numFields` and `numLines` contain information about each
-  // contiguous group of lines encountered while scanning the first JUMPLINES
-  // + 1 lines: 'numFields` gives the count of fields in each group, and
-  // `numLines` has the number of lines in each group.
-  int numFields[JUMPLINES+1];
-  int numLines[JUMPLINES+1];
+  // contiguous group of lines encountered while scanning the first 100+1
+  // lines: 'numFields` gives the count of fields in each group, and
+  // `numLines` has the number of lines in each group. There is always a lot
+  // of unused space at the end of these vectors. They are only jumpLines+1 big
+  // for the worst case that no adjacent lines have the same number of fields.
+  int numFields[jumpLines+1];
+  int numLines[jumpLines+1];
   for (int s=0; s<nseps; s++) {
     sep = seps[s];
     whiteChar = (sep==' ' ? '\t' : (sep=='\t' ? ' ' : 0));  // 0 means both ' ' and '\t' to be skipped
     for (quoteRule=0; quoteRule<4; quoteRule++) {  // quote rule in order of preference
       ch = pos;
       // if (verbose) DTPRINT("  Trying sep='%c' with quoteRule %d ...\n", sep, quoteRule);
-      for (int i=0; i<=JUMPLINES; i++) { numFields[i]=0; numLines[i]=0; } // clear VLAs
+      for (int i=0; i<=jumpLines; i++) { numFields[i]=0; numLines[i]=0; } // clear VLAs
       int i=-1; // The slot we're counting the currently contiguous consistent ncol
       int thisLine=0, lastncol=-1;
-      while (ch<eof && thisLine++<JUMPLINES) {
+      while (ch<eof && thisLine++<jumpLines) {
         int thisncol = countfields(&ch);   // using this sep and quote rule; moves ch to start of next line
         if (thisncol<0) { numFields[0]=-1; break; }  // invalid file with this sep and quote rule; abort
         if (thisncol!=lastncol) {
@@ -1440,7 +1442,7 @@ int freadMain(freadMainArgs _args) {
     // find the top line with the consistent number of fields.  There might be irregular banner lines above it (skip="auto")
     ncol = topNumFields;
     int thisLine=-1;
-    while (ch<eof && ++thisLine<JUMPLINES) {
+    while (ch<eof && ++thisLine<jumpLines) {
       const char *lastLineStart = ch;
       int tt = countfields(&ch);
       if (tt==ncol) { ch=pos=lastLineStart; row1line+=thisLine; break; }
@@ -1504,22 +1506,23 @@ int freadMain(freadMainArgs _args) {
     tmpType[j] = type[j] = type0;
   }
 
-  size_t jump0size=(size_t)(firstJumpEnd-pos);  // the size in bytes of the first JUMPLINES from the start (jump point 0)
+  size_t jump0size=(size_t)(firstJumpEnd-pos);  // the size in bytes of the first 100 lines from the start (jump point 0)
   // how many places in the file to jump to and test types there (the very end is added as 11th or 101th)
   // not too many though so as not to slow down wide files; e.g. 10,000 columns.  But for such large files (50GB) it is
   // worth spending a few extra seconds sampling 10,000 rows to decrease a chance of costly reread even further.
-  nJumps = 0;
+  nJumps = 1;
   size_t sz = (size_t)(eof - pos);
   if (jump0size>0) {
     if (jump0size*100*2 < sz) nJumps=100;  // 100 jumps * 100 lines = 10,000 line sample
     else if (jump0size*10*2 < sz) nJumps=10;
     // *2 to get a good spacing. We don't want overlaps resulting in double counting.
-    // nJumps==1 means the whole (small) file will be sampled with one thread
   }
   nJumps++; // the extra sample at the very end (up to eof) is sampled and format checked but not jumped to when reading
+  if (nrowLimit<INT64_MAX) nJumps=1; // when nrowLimit supplied by user, no jumps and single threaded
   if (verbose) {
     DTPRINT("  Number of sampling jump points = %d because ", nJumps);
-    if (jump0size==0) DTPRINT("jump0size==0\n");
+    if (nrowLimit<INT64_MAX) DTPRINT("nrow limit (%llu) supplied\n", (llu)nrowLimit);
+    else if (jump0size==0) DTPRINT("jump0size==0\n");
     else DTPRINT("(%llu bytes from row 1 to eof) / (2 * %llu jump0size) == %llu\n",
                  (llu)sz, (llu)jump0size, (llu)(sz/(2*jump0size)));
   }
@@ -1539,7 +1542,7 @@ int freadMain(freadMainArgs _args) {
       }
       firstRowStart = ch;
     } else {
-      ch = (jump == nJumps-1) ? eof - (size_t)(0.5*jump0size) :
+      ch = (jump == nJumps-1) ? eof - (size_t)(0.5*jump0size) :  // to almost-surely sample the last line
                                 pos + (size_t)jump*((size_t)(eof-pos)/(size_t)(nJumps-1));
     }
     if (ch<lastRowEnd) ch=lastRowEnd;  // Overlap when apx 1,200 lines (just over 11*100) with short lines at the beginning and longer lines near the end, #2157
@@ -1552,22 +1555,7 @@ int freadMain(freadMainArgs _args) {
     bool skipThisJump = false;
     int jumpLine = 0;    // line from this jump point start
 
-
-    setting nrows= must turn off jump sampling (just use the first min(JUMPLINES,nrowLimit))  AND turn off multithreading because we can't have type bumps
-    in later chunks affecting the current jump (before we know whether this or the next jump will fill nrowLimit, while threads 7 and 8 are reading!)  This will much simplify
-    the horrid logic later where we struggled with allocnrow==nrowLimit.  Cancelling the error was wrong thing to do due to the possible wrong bumps.
-    All we need is an extra  'if nrowLimit reached then break;' in COLD section (and branch predicted)
-    Add to manual that using nrows= will turn off multithreading.
-
-    Read up to the error line, return up to there and then report the error line as warning.  In sampling too, not stop.
-    Add to ?fread that for production purposes, options(warn=2) should be turned on.
-    Or, report up to 5 lines at the end as warning, otherwise, error.
-
-    So the errors in the ordered section, can only be not-errors if it is the last chunk being processed. To ensure no later jump chunk bumped types. If so, they can be
-    warnings about stopping on that line.
-
-
-    while(ch<eof && (jumpLine++<JUMPLINES || jump==nJumps-1)) {  // nJumps==1 implies sample all of input to eof; last jump to eof too
+    while(ch<eof && jumpLine++<jumpLines) {
       const char *lineStart = ch;
       if (sep==' ') while (*ch==' ') ch++;  // multiple sep=' ' at the beginning of a line does not mean sep
       // detect blank lines ...
@@ -1585,14 +1573,12 @@ int freadMain(freadMainArgs _args) {
       // if too few fields are found, then we proceed here as if fill=true. If fill is "warning" or FALSE, we'll delay the message
       // in the data read loop where we know the line number exactly. (We're jumping here so we don't know the line number yet).
 
-      if (ch==eof) {  // TODO bring next line into this one 'if'
-        if (finalByte && tmpType[ncol-1]!=previousLastColType) {
-          // revert bump due to e.g. ,NA<eof> in the last field of last row where finalByte=='A' and N caused bump to character (test 894.0221)
-          if (verbose) DTPRINT("  Reverted bump of final column from %d to %d on final field due to finalByte='%c'."
-              " If the bump was actually correct, there will be a reread. Finish the file properly with newline to avoid the reread.\n",
-              previousLastColType, tmpType[ncol-1], finalByte);
-          tmpType[ncol-1] = previousLastColType;
-        }
+      if (ch==eof && finalByte && tmpType[ncol-1]!=previousLastColType) {
+        // revert bump due to e.g. ,NA<eof> in the last field of last row where finalByte=='A' and N caused bump to character (test 894.0221)
+        if (verbose) DTPRINT("  Reverted bump of final column from %d to %d on final field due to finalByte='%c'."
+            " If the bump was actually correct, there will be a reread. Finish the file properly with newline to avoid the reread.\n",
+            previousLastColType, tmpType[ncol-1], finalByte);
+        tmpType[ncol-1] = previousLastColType;
       }
       if (!eol(&ch) && *ch!='\0') {
         if (jump==0) {
@@ -1715,7 +1701,7 @@ int freadMain(freadMainArgs _args) {
   meanLineLen=0.0; // Average length (in bytes) of a single line in the input file
   bytesRead=0;     // Bytes in the data section (i.e. excluding column names, header and footer, if any)
 
-  if (nJumps==1) {
+  if (sampleLines < jumpLines) {
     if (verbose) DTPRINT("  All rows were sampled since file is small so we know nrow=%llu exactly\n", (llu)sampleLines);
     estnrow = allocnrow = sampleLines;
   } else {
@@ -1882,7 +1868,7 @@ int freadMain(freadMainArgs _args) {
     else if (nJumps>nth) nJumps = nth*(1+(nJumps-1)/nth);
     chunkBytes = bytesRead / (size_t)nJumps;
   } else {
-    nJumps = 1;
+    ASSERT(nJumps==1, "nJumps (%d) != 1", nJumps);
   }
   size_t initialBuffRows = allocnrow / (size_t)nJumps;
 
diff --git a/src/freadR.c b/src/freadR.c
index 17da5a92d9..ce969e5873 100644
--- a/src/freadR.c
+++ b/src/freadR.c
@@ -121,7 +121,7 @@ SEXP freadR(
   if (isReal(nrowLimitArg)) {
     if (R_FINITE(REAL(nrowLimitArg)[0]) && REAL(nrowLimitArg)[0]>=0.0) args.nrowLimit = (int64_t)(REAL(nrowLimitArg)[0]);
   } else {
-    if (INTEGER(nrowLimitArg)[0]>=0) args.nrowLimit = (int64_t)INTEGER(nrowLimitArg)[0];
+    if (INTEGER(nrowLimitArg)[0]>=1) args.nrowLimit = (int64_t)INTEGER(nrowLimitArg)[0];
   }
 
   args.logical01 = LOGICAL(logical01Arg)[0];

From 6e3841ed51291cfb3105a657ad9dcd3763aded57 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Fri, 9 Feb 2018 13:23:44 -0800
Subject: [PATCH 03/14] Tidied test numbers

---
 inst/tests/tests.Rraw | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index a7d388fe54..3ff735564d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11515,9 +11515,9 @@ test(1869.6, fread(testDir("colnames4096.csv")), error="very unusual.*one single
 test(1869.7, fread(testDir("onecol4096.csv")), error="very unusual.*single column.*multiple of 4096.*ends with 2 or more end-of-line")
 
 # better colname detection by comparing potential column names to the whole sample not just the first row of the sample, #2526
-test(1871.1, fread("A,100,200\n,300,400\n,500,600"), data.table(V1=c("A","",""), V2=c(100L,300L,500L), V3=c(200L,400L,600L)))
-test(1871.2, fread("A,100,\n,,\n,500,600"), data.table(V1=c("A","",""), V2=c(100L,NA,500L), V3=c(NA,NA,600L)))
-test(1871.3, fread("A,B,\n,,\n,500,3.4"), data.table(A=NA, B=c(NA,500L), V3=c(NA,3.4)))
+test(1870.1, fread("A,100,200\n,300,400\n,500,600"), data.table(V1=c("A","",""), V2=c(100L,300L,500L), V3=c(200L,400L,600L)))
+test(1870.2, fread("A,100,\n,,\n,500,600"), data.table(V1=c("A","",""), V2=c(100L,NA,500L), V3=c(NA,NA,600L)))
+test(1870.3, fread("A,B,\n,,\n,500,3.4"), data.table(A=NA, B=c(NA,500L), V3=c(NA,3.4)))
 
 # nrows= now ignores errors after those nrows as expected and skip= determines first row for sure, #1267
 txt = "V1, V2, V3\n2,3,4\nV4, V5, V6, V7\n4,5,6,7\n8,9,10,11\n"
@@ -11590,11 +11590,13 @@ DT = data.table(x=rep(c("b","a","c"),each=3), y=c(1,3,6), v=1:9)
 test(1872.14, DT[X, on=.(x, v>=v), verbose = TRUE],
      output = 'Non-equi join operators.*forder took.*group lengths.*done.*non-equi group ids.*done')
 
-
+# out-of-sample bump from int to quoted field containing comma, #2614
 DT = data.table(A=rep(10L, 2200), B="20")
 DT[111, B:="3,456"]
-fwrite(DT, f<-tempfile())
+fwrite(DT,f<-tempfile())
 test(1873, fread(f), DT)
+unlink(f)
+
 
 
 ##########################

From 0fdec61652baf13c91cc0e2fc41f63824f75ba79 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Fri, 9 Feb 2018 14:15:00 -0800
Subject: [PATCH 04/14] Interim

---
 R/fread.R   | 6 ++----
 src/fread.c | 9 +++++----
 2 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/R/fread.R b/R/fread.R
index 0f046b8cb5..f6c885aa49 100644
--- a/R/fread.R
+++ b/R/fread.R
@@ -1,5 +1,5 @@
 
-fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows=Inf,header="auto",na.strings="NA",stringsAsFactors=FALSE,verbose=getOption("datatable.verbose"),skip="auto",select=NULL,drop=NULL,colClasses=NULL,integer64=getOption("datatable.integer64"), col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, showProgress=interactive(),data.table=getOption("datatable.fread.datatable"),nThread=getDTthreads(),logical01=TRUE,autostart=NA)
+fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows=Inf,header="auto",na.strings="NA",stringsAsFactors=FALSE,verbose=getOption("datatable.verbose"),skip="__auto__",select=NULL,drop=NULL,colClasses=NULL,integer64=getOption("datatable.integer64"), col.names, check.names=FALSE, encoding="unknown", strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL, showProgress=interactive(),data.table=getOption("datatable.fread.datatable"),nThread=getDTthreads(),logical01=TRUE,autostart=NA)
 {
   if (is.null(sep)) sep="\n"         # C level knows that \n means \r\n on Windows, for example
   else {
@@ -87,9 +87,7 @@ fread <- function(input="",file,sep="auto",sep2="auto",dec=".",quote="\"",nrows=
     }
   }
   stopifnot(length(skip)==1L, !is.na(skip), is.character(skip) || is.numeric(skip))
-  if (skip=="auto") skip=-1L
-  # so, skip="string" so long as "string" is not "auto". The skip="auto" default best conveys something
-  # is automatic there (better than skip=-1 or skip=NA). skip="string" is rarely used, so ok to treat "auto" specially.
+  if (skip=="__auto__") skip=-1L   # skip="string" so long as "string" is not "__auto__". Best conveys to user something is automatic there (than -1 or NA).
   if (is.double(skip)) skip = as.integer(skip)
   warnings2errors = getOption("warn") >= 2
   ans = .Call(CfreadR,input,sep,dec,quote,header,nrows,skip,na.strings,strip.white,blank.lines.skip,
diff --git a/src/fread.c b/src/fread.c
index 841f53163b..ccb9780bf4 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -1518,7 +1518,7 @@ int freadMain(freadMainArgs _args) {
     // *2 to get a good spacing. We don't want overlaps resulting in double counting.
   }
   nJumps++; // the extra sample at the very end (up to eof) is sampled and format checked but not jumped to when reading
-  if (nrowLimit<INT64_MAX) nJumps=1; // when nrowLimit supplied by user, no jumps and single threaded
+  if (nrowLimit<INT64_MAX) nJumps=1; // when nrowLimit supplied by user, no jumps (not even at the end) and single threaded
   if (verbose) {
     DTPRINT("  Number of sampling jump points = %d because ", nJumps);
     if (nrowLimit<INT64_MAX) DTPRINT("nrow limit (%llu) supplied\n", (llu)nrowLimit);
@@ -1680,7 +1680,7 @@ int freadMain(freadMainArgs _args) {
           DTPRINT("  'header' determined to be false because there are some number columns and those columns do not have a string field at the top of them\n");
       }
     }
-    if (args.header==false && nJumps==1) sampleLines++; // all data rows were sampled, so increment sampleLines because it becomes the exact nrow allocation
+    if (args.header==false && nJumps<=2) sampleLines++; // all data rows may have been sampled, so increment sampleLines because it becomes the exact nrow allocation
   }
 
   if (args.header==false) {
@@ -1860,7 +1860,7 @@ int freadMain(freadMainArgs _args) {
   // space, then this variable will tell how many new rows has to be allocated.
   size_t extraAllocRows = 0;
 
-  if (nJumps/*from sampling*/>1) {
+  if (nJumps/*from sampling*/>2) {
     // ensure data size is split into same sized chunks (no remainder in last chunk) and a multiple of nth
     // when nth==1 we still split by chunk for consistency (testing) and code sanity
     nJumps = (int)(bytesRead/chunkBytes);
@@ -1868,7 +1868,8 @@ int freadMain(freadMainArgs _args) {
     else if (nJumps>nth) nJumps = nth*(1+(nJumps-1)/nth);
     chunkBytes = bytesRead / (size_t)nJumps;
   } else {
-    ASSERT(nJumps==1, "nJumps (%d) != 1", nJumps);
+    ASSERT(nJumps==1 /*when nrowLimit supplied*/ || nJumps==2 /*small files*/, "nJumps (%d) != 1|2", nJumps);
+    nJumps=1;
   }
   size_t initialBuffRows = allocnrow / (size_t)nJumps;
 

From f4bf93e229f56fa156527f37a7c6f767c31211b5 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Fri, 9 Feb 2018 20:30:55 -0800
Subject: [PATCH 05/14] Interim

---
 inst/tests/tests.Rraw | 39 ++++++++++---------
 src/fread.c           | 89 +++++++++++++++++++++++++++++--------------
 2 files changed, 81 insertions(+), 47 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 3ff735564d..c97bd80f89 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2746,9 +2746,9 @@ test(966, fread(input, colClasses=list(character=2:4)), error="Column number 4 (
 test(967, nrow(fread( paste( rep('a\tb\n', 10000), collapse=''), header=FALSE)), 10000L)
 
 # Test fread warns about removal of any footer (and autostart skips up over it)
-test(968, fread("A,B\n1,3\n2,4\n\nRowcount: 2\n"), data.table(A=1:2,B=3:4), warning="Found the last.*discarded.*Rowcount: 2")
-test(969, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2"), data.table(A=1:2,B=3:4), warning="Found the last.*discarded.*Rowcount: 2")
-test(970, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2\n\n"), data.table(A=1:2,B=3:4), warning="Found the last.*discarded.*Rowcount: 2")
+test(968, fread("A,B\n1,3\n2,4\n\nRowcount: 2\n"), data.table(A=1:2,B=3:4), warning="Discarded footer.*Rowcount: 2")
+test(969, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2"), data.table(A=1:2,B=3:4), warning="Discarded footer.*Rowcount: 2")
+test(970, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2\n\n"), data.table(A=1:2,B=3:4), warning="Discarded footer.*Rowcount: 2")
 
 # fread skip override
 input = "some,bad,data\nA,B,C\n1,3,5\n2,4,6\n"
@@ -2758,11 +2758,11 @@ test(973, fread(input, skip=2), data.table(V1=1:2,V2=3:4,V3=5:6))
 test(974, fread(input, skip=2, header=TRUE), data.table("1"=2L,"3"=4L,"5"=6L))
 test(975, fread(input, skip="B"), data.table(A=1:2,B=3:4,C=5:6))
 input = "\n\nA,B\n1,3\n2,4\n\nC,D\n5,7\n6,8\n\nE,F\n9,11\n10,12\n"   # 3 tables in one file
-test(976, fread(input), data.table(A=1:2,B=3:4), warning="Found the last.*discarded.*C,D")
-test(977, fread(input, skip="C"), data.table(C=5:6,D=7:8), warning="Found the last.*discarded.*E,F")
-test(978.1, fread(input, skip="D"), data.table(C=5:6,D=7:8), warning="Found the last.*discarded.*E,F")
-test(978.2, fread(input, skip=",F"), data.table(E=9:10,F=11:12))
-test(978.3, fread(input, skip=3), data.table(V1=1:2, V2=3:4), warning="Found the last.*discarded.*C,D")
+test(976, fread(input), error="Line 6 has too few fields. Expecting 2 fields but found 0")
+test(977, fread(input, skip="C"), error="Line 10 has too few fields. Expecting 2 fields but found 0")
+test(978.1, fread(input, skip="D"), error="Line 10")
+test(978.2, fread(input, skip=",F"), data.table(E=9:10, F=11:12))
+test(978.3, fread(input, skip=9), data.table(E=9:10, F=11:12))
 
 # mixed add and update in same `:=` bug/crash, #2528 and #2778
 DT = data.table(x=rep(1:2, c(3,2)), y=6:10)
@@ -7409,8 +7409,9 @@ str2="YYYY MM DD HH mm             19490             40790
 test(1555.14, fread(str1), fread(str2))
 
 # fix for #1330
-test(1556.1, fread(testDir("issue_1330_fread.txt"), nrow=2), data.table(a=1:2, b=1:2), warning="Found.*discarded.*<<3.*3>>")
-test(1556.2, fread(testDir("issue_1330_fread.txt"), nrow=4), data.table(a=1:2, b=1:2), warning="Found.*discarded.*<<3.*3>>")
+test(1556.1, fread(testDir("issue_1330_fread.txt"), nrow=2), data.table(a=1:2, b=1:2))
+test(1556.2, fread(testDir("issue_1330_fread.txt"), nrow=3), error="Line 4 has too few fields")
+test(1556.3, fread(testDir("issue_1330_fread.txt"), nrow=4), error="Line 4 has too few fields")
 
 # FR #768
 str="1,2\n3,4\n"
@@ -7766,7 +7767,7 @@ test(1585.2, f1(testDir("536_fread_fill_1.txt"), b=TRUE), f2(testDir("536_fread_
 test(1585.3, f1(testDir("536_fread_fill_2.txt")), f2(testDir("536_fread_fill_2.txt")))
 test(1585.4, f1(testDir("536_fread_fill_2.txt"), b=TRUE), f2(testDir("536_fread_fill_2.txt"), b=TRUE))
 
-test(1585.5, f1(testDir("536_fread_fill_3_extreme.txt")), f2(testDir("536_fread_fill_3_extreme.txt"))[-(7:9),])
+test(1585.5, f1(testDir("536_fread_fill_3_extreme.txt")), f2(testDir("536_fread_fill_3_extreme.txt"))[-9,])
 test(1585.6, f1(testDir("536_fread_fill_3_extreme.txt"), b=TRUE), f2(testDir("536_fread_fill_3_extreme.txt"), b=TRUE))
 # no warning about bumping type. when fill=TRUE, column type detection starts at first non-empty line (which makes sense).
 test(1585.7, f1(testDir("536_fread_fill_4.txt")), f2(testDir("536_fread_fill_4.txt"))[-29,])
@@ -10978,8 +10979,8 @@ test(1808.2, fread("A,B\r1,2\r3,4\r"), data.table(A=c(1L,3L),B=c(2L,4L)))
 cat("A,B\r1,2\r3,4",file=f<-tempfile())
   test(1808.3, fread(f), data.table(A=c(1L,3L),B=c(2L,4L)))
 unlink(f)
-test(1808.4, fread("A,B\r1,3\r\r\r2,4\r"), data.table(A=TRUE, B=3L), warning="last consistent line")
-test(1808.5, fread("A,B\r4,3\r\r \r2,4\r"), data.table(A=4L, B=3L), warning="afterwards.*discarded.*<<2,4>>")
+test(1808.4, fread("A,B\r1,3\r\r\r2,4\r"), data.table(A=TRUE, B=3L), warning="Discarded footer: <<2,4>>")
+test(1808.5, fread("A,B\r4,3\r\r \r2,4\r"), data.table(A=4L, B=3L), warning="Discarded footer: <<2,4>>")
 test(1808.6, fread("A,B\r1,3\r\r \r2,4\r", blank.lines.skip=TRUE), data.table(A=1:2, B=3:4))
 test(1808.7, fread("A,B\r1,3\r\r \r2,4\r", fill=TRUE), data.table(A=c(1L,NA,NA,2L), B=c(3L,NA,NA,4L)))
 test(1808.8, fread("A,B\r1,3\r\r \r2,\r", blank.lines.skip=TRUE, fill=TRUE), data.table(A=1:2, B=c(3L,NA)))
@@ -11015,10 +11016,10 @@ test(1818, fread(testDir("session_aborted_fatal_error.txt"))[c(1,.N),c(1,2,250,2
 # expansion of uses of as.ITime.character, PR#1796
 test(1819, as.ITime("2015-09-29 08:22:00"), structure(30120L, class = "ITime"))
 
-# Issue 2287: the % sign in the error message should not be interpreted as a format string!
-test(1820.1, fread("name,id\nfoo,1\nbar%\n"), error="Line 3 has too few.*Expecting 2 fields but found 1.*<<bar%>>")
-test(1820.2, fread("name,id\nfoo,1\nbar%d"), error="Line 3 has too few.*Expecting 2 fields but found 1.*<<bar%d>>")
-test(1820.3, fread("name,id\nfoo,1\nbar%s"), error="Line 3 has too few.*Expecting 2 fields but found 1.*<<bar%s>>")
+# Issue 2287: the % sign in the error/warning message should not be interpreted as a format string!
+test(1820.1, fread("name,id\nfoo,2\nbar%\n"), data.table(name="foo", id=2L), warning="Discarded footer: <<bar%>>")
+test(1820.2, fread("name,id\nfoo,2\nbar%d"), data.table(name="foo", id=2L), warning="Discarded footer: <<bar%d>>")
+test(1820.3, fread("name,id\nfoo,2\nbar%s"), data.table(name="foo", id=2L), warning="Discarded footer: <<bar%s>>")
 
 # new argument for print.data.table: col.names
 #   issue #1482 / PR #1483
@@ -11369,7 +11370,7 @@ test(1856.2, fread("A,B\n\n"), ans)
 test(1856.3, fread("A,B\n\n\n"), ans)
 test(1856.4, fread("A,B\n3,4\n\n\n"), data.table(A=3L, B=4L))
 test(1856.5, fread("A,B\n3,4\n,\n\n\n"), data.table(A=c(3L,NA), B=c(4L,NA)))
-test(1856.6, fread("A,B\n3,4\n\n5,6\n"), data.table(A=3L, B=4L), warning="text exists afterwards")
+test(1856.6, fread("A,B\n3,4\n\n5,6\n"), data.table(A=3L, B=4L), warning="Discarded footer: <<5,6>>")
 DTs = list(                                      # passed fread(fwrite(DT))==DT before fix?
   data.table(A=logical(0)),                      # yes
   data.table(A=NA),                              # no
@@ -11510,7 +11511,7 @@ test(1869.1, fread("A\r1\r\r\r2\r"), data.table(A=c(1L,NA,NA,2L)))
 test(1869.2, fread("A\r1\r\r\r2\r\r"), data.table(A=c(1L,NA,NA,2L,NA)))
 test(1869.3, fread("A\r1\r\r\r2\r\r\r"), data.table(A=c(1L,NA,NA,2L,NA,NA)))
 test(1869.4, fread("A,B\r2,3\r,\r,\r4,5\r\r"), data.table(A=c(2L,NA,NA,4L), B=c(3L,NA,NA,5L)))
-test(1869.5, fread("A,B\r2,3\r\r,\r2,4\r\r"), data.table(A=2L, B=3L), warning="consistent line")
+test(1869.5, fread("A,B\r2,3\r\r,\r2,4\r\r"), error="Line 3 has too few fields. Expecting 2 fields but found 0.")  # two line footer because of the comma. Only 1 line footers are auto discarded.
 test(1869.6, fread(testDir("colnames4096.csv")), error="very unusual.*one single line without any.*r.*n at the end.*and.*multiple of 4096")
 test(1869.7, fread(testDir("onecol4096.csv")), error="very unusual.*single column.*multiple of 4096.*ends with 2 or more end-of-line")
 
diff --git a/src/fread.c b/src/fread.c
index ccb9780bf4..d64881801b 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -181,14 +181,17 @@ static inline size_t clamp_szt(size_t x, size_t lower, size_t upper) {
  * Parameter `limit` cannot exceed 500.
  */
 static const char* strlim(const char *ch, size_t limit) {
-  static char buf[1002];
+  static char buf[1004];
   static int flip = 0;
-  char *ptr = buf + 501 * flip;
+  char *ptr = buf + 502 * flip;
   flip = 1 - flip;
   char *ch2 = ptr;
   if (limit>500) limit=500;
   size_t width = 0;
-  while ((*ch>'\r' || (*ch!='\0' && *ch!='\r' && *ch!='\n')) && width++<limit) *ch2++ = *ch++;
+  while ((*ch>'\r' || (*ch!='\0' && *ch!='\r' && *ch!='\n')) && width++<limit) {
+    if (*ch=='%') { *ch2++ = '%'; width++; } // 1004 and 502 above in case % occurs at position 500 and is doubled here
+    *ch2++ = *ch++;
+  }
   *ch2 = '\0';
   return ptr;
 }
@@ -1485,7 +1488,7 @@ int freadMain(freadMainArgs _args) {
   //*********************************************************************************************
   int nJumps;             // How many jumps to use when pre-scanning the file
   size_t sampleLines;     // How many lines were sampled during the initial pre-scan
-  const char *lastRowEnd; // Pointer to the end of the data section
+  //const char *lastRowEnd; // Pointer to the end of the data section
   bool autoFirstColName = false; // true when there's one less column name and then it's assumed that the first column is row names or index
   size_t estnrow=1;
   size_t allocnrow=0;     // Number of rows in the allocated DataTable
@@ -1530,9 +1533,9 @@ int freadMain(freadMainArgs _args) {
   sampleLines = 0;
   double sumLen=0.0, sumLenSq=0.0;
   int minLen=INT32_MAX, maxLen=-1;   // int_max so the first if(thisLen<minLen) is always true; similarly for max
-  lastRowEnd = pos;
+  const char *lastRowEnd = pos;
   const char *firstRowStart = pos;
-  bool lastSampleJumpOk = false;   // it won't be ok if its nextGoodLine returns false as testing in test 1768
+  //bool lastSampleJumpOk = false;   // it won't be ok if its nextGoodLine returns false as testing in test 1768
   for (int jump=0; jump<nJumps; jump++) {
     if (jump==0) {
       ch = pos;
@@ -1549,8 +1552,10 @@ int freadMain(freadMainArgs _args) {
     if (ch>=eof) break;                // The 9th jump could reach the end in the same situation and that's ok. As long as the end is sampled is what we want.
     if (jump>0 && !nextGoodLine(&ch, ncol)) {
       // skip this jump for sampling. Very unusual and in such unusual cases, we don't mind a slightly worse guess.
+      //lastSampleJumpOk = false;
       continue;
     }
+    //lastSampleJumpOk = true;
     bool bumped = false;  // did this jump find any different types; to reduce verbose output to relevant lines
     bool skipThisJump = false;
     int jumpLine = 0;    // line from this jump point start
@@ -1603,22 +1608,25 @@ int freadMain(freadMainArgs _args) {
       if (thisLineLen>maxLen) maxLen=thisLineLen;
     }
     if (skipThisJump) continue;
-    if (jump==nJumps-1) lastSampleJumpOk = true;
+    // if (jump==nJumps-1) lastSampleJumpOk = true;
     if (bumped) memcpy(type, tmpType, (size_t)ncol);
     if (verbose && (bumped || jump==0 || jump==nJumps-1)) {
       DTPRINT("  Type codes (jump %03d)    : %s  Quote rule %d\n", jump, typesAsString(ncol), quoteRule);
     }
   }
-  if (lastSampleJumpOk) {
-    while (ch<eof && isspace(*ch)) ch++;
-    if (ch<eof)
+  /*
+  ch = lastRowEnd;
+  while (ch<eof && isspace(*ch)) ch++;
+  if (ch<eof) {
+    if (lastSampleJumpOk) {
       DTWARN("Found the last consistent line but text exists afterwards. Consider fill=TRUE and/or blank.lines.skip=TRUE. First 200 characters of discarded line: <<%s>>", strlim(ch,200));
-  } else {
-    // nextGoodLine() was false for the last (extra) jump to check the end
-    // must set lastRowEnd to eof accordingly otherwise it'll be left wherever the last good jump finished
-    lastRowEnd = eof;
+    } else {
+      // nextGoodLine() was false for the last (extra) jump to check the end
+      // must set lastRowEnd to eof accordingly otherwise it'll be left wherever the last good jump finished
+      lastRowEnd = eof;
+    }
   }
-
+*/
   ch = pos;
   if (args.header==NA_BOOL8) {
     for (int j=0; j<ncol; j++) tmpType[j]=type0;   // reuse tmpType
@@ -1705,7 +1713,7 @@ int freadMain(freadMainArgs _args) {
     if (verbose) DTPRINT("  All rows were sampled since file is small so we know nrow=%llu exactly\n", (llu)sampleLines);
     estnrow = allocnrow = sampleLines;
   } else {
-    bytesRead = (size_t)(lastRowEnd - firstRowStart);
+    bytesRead = (size_t)(eof - firstRowStart);
     meanLineLen = (double)sumLen/sampleLines;
     estnrow = CEIL(bytesRead/meanLineLen);  // only used for progress meter and verbose line below
     double sd = sqrt( (sumLenSq - (sumLen*sumLen)/sampleLines)/(sampleLines-1) );
@@ -1846,6 +1854,7 @@ int freadMain(freadMainArgs _args) {
   char stopErr[stopErrSize+1]="";  // must be compile time size: the message is generated and we can't free before STOP
   size_t DTi = 0;   // the current row number in DT that we are writing to
   const char *prevJumpEnd = pos;  // the position after the last line the last thread processed (for checking)
+  const char *skippedFooter = NULL;  // if footer is skipped, this is its location to be printed.
   int buffGrown=0;
   // chunkBytes is the distance between each jump point; it decides the number of jumps
   // We may want each chunk to write to its own page of the final column, hence 1000*maxLen
@@ -1884,7 +1893,7 @@ int freadMain(freadMainArgs _args) {
   if (verbose) DTPRINT("[11] Read the data\n");
   read:  // we'll return here to reread any columns with out-of-sample type exceptions
   if (verbose) DTPRINT("  jumps=[%d..%d), chunk_size=%llu, total_size=%llu\n",
-                       jump0, nJumps, (llu)chunkBytes, (llu)(lastRowEnd-pos));
+                       jump0, nJumps, (llu)chunkBytes, (llu)(eof-pos));
   ASSERT(allocnrow <= nrowLimit, "allocnrow(%llu) <= nrowLimit(%llu)", (llu)allocnrow, (llu)nrowLimit);
   #pragma omp parallel num_threads(nth)
   {
@@ -1964,7 +1973,7 @@ int freadMain(freadMainArgs _args) {
 
       const char *tch = pos + (size_t)jump*chunkBytes;
       const char *tlineStart = tch;
-      const char *nextJump = jump<nJumps-1 ? tch+chunkBytes+1/*\n*/ : lastRowEnd;
+      const char *nextJump = jump<nJumps-1 ? tch+chunkBytes+1/*\n*/ : eof;
       // +1 is for when nextJump happens to fall exactly on a \n. The
       // next thread will start one line later because nextGoodLine() starts by finding next eol.
       // Even when nextGoodLine goes away, we still want the +1 to avoid always running-on.
@@ -2222,16 +2231,37 @@ int freadMain(freadMainArgs _args) {
           }
         }
         if (myWrongNumberFields>=0) {
-          stopTeam = true;
-          if (myWrongNumberFields<ncol) {
-            snprintf(stopErr, stopErrSize,
-              "Line %llu has too few fields. Expecting %d fields but found %d. Consider fill=TRUE. First 500 characters of line: <<%s>>",
-              (llu)ctx.DTi+myNrow+row1line, ncol, myWrongNumberFields, strlim(tlineStart, 500));
-          } else {
-            snprintf(stopErr, stopErrSize,
-              "Line %llu has more than the expected %d fields. Stopped on <<%s>> at character %d. "
-              "Consider setting 'comment.char=' if there is a trailing comment to be ignored. First 500 characters of line: <<%s>>",
-              (llu)ctx.DTi+myNrow+row1line, ncol, strlim(tch+1,10), (int)(tch-tlineStart+2), strlim(tlineStart,500));
+          if (jump==nJumps-1) {  // the last jump; we should be at the end of the file or at the start of the footer
+            const char *tt = tlineStart;
+            while (tt<eof && isspace(*tt)) tt++;
+            if (tt==eof) {
+              // whitespace at the end of the file is always skipped
+              myWrongNumberFields=-1;
+              tch=eof;
+            } else {
+              skippedFooter = tt;
+              // now ensure it's just one line.
+              while (tt<eof && *tt!='\n' && *tt!='\r') tt++;
+              while (tt<eof && isspace(*tt)) tt++;
+              if (tt==eof) {
+                // it's a one-line footer before eof, so that's ok to warn about dropping.
+                myWrongNumberFields=-1;
+                tch = eof;
+              }
+            }
+          }
+          if (myWrongNumberFields>=0) {
+            stopTeam = true;
+            if (myWrongNumberFields<ncol) {
+              snprintf(stopErr, stopErrSize,
+                "Line %llu has too few fields. Expecting %d fields but found %d. Consider fill=TRUE. First 500 characters of line: <<%s>>",
+                (llu)ctx.DTi+myNrow+row1line, ncol, myWrongNumberFields, strlim(tlineStart, 500));
+            } else {
+              snprintf(stopErr, stopErrSize,
+                "Line %llu has more than the expected %d fields. Stopped on <<%s>> at character %d. "
+                "Consider setting 'comment.char=' if there is a trailing comment to be ignored. First 500 characters of line: <<%s>>",
+                (llu)ctx.DTi+myNrow+row1line, ncol, strlim(tch+1,10), (int)(tch-tlineStart+2), strlim(tlineStart,500));
+            }
           }
         }
         // tell next thread (she not me) 2 things :
@@ -2344,6 +2374,9 @@ int freadMain(freadMainArgs _args) {
     }
   }
   setFinalNrow(DTi);
+  if (skippedFooter) {
+    DTWARN("Discarded footer: <<%s>>", strlim(skippedFooter,500));
+  }
 
   if (verbose) {
     DTPRINT("=============================\n");

From 67b39b39074d96c2a3cab7ab48dda338d1dd43ca Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Fri, 9 Feb 2018 22:03:48 -0800
Subject: [PATCH 06/14] Interim

---
 inst/tests/tests.Rraw | 24 +++++++++++++++---------
 src/fread.c           | 21 ++++++++++++++++++++-
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index c97bd80f89..e6521467a5 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -7423,9 +7423,9 @@ test(1557.5, names(fread(str, col.names=1:2)), error="Passed a vector of type")
 
 # Fix for #773
 f = testDir("issue_773_fread.txt")
-ans = data.table(AAA=as.character(c(4,7,rep(1,17),31,21)),
-                 BBB=as.character(c(5,8,rep(2,17),32,22)),
-                 CCC=as.integer(c(6,9,rep(3,17),33,23)))
+ans = data.table(AAA=INT(c(4,7,rep(1,17),31,21)),
+                 BBB=INT(c(5,8,rep(2,17),32,22)),
+                 CCC=INT(c(6,9,rep(3,17),33,23)))
 test(1558.1, fread(f), error="Line 23 has too few fields.*Expecting 3 fields but found 2.*<<ZZZ.*YYY>>")
 test(1558.2, fread(f, nrow=21L), ans)
 test(1558.3, fread(f, nrow=21L, fill=TRUE), ans)
@@ -11522,12 +11522,18 @@ test(1870.3, fread("A,B,\n,,\n,500,3.4"), data.table(A=NA, B=c(NA,500L), V3=c(NA
 
 # nrows= now ignores errors after those nrows as expected and skip= determines first row for sure, #1267
 txt = "V1, V2, V3\n2,3,4\nV4, V5, V6, V7\n4,5,6,7\n8,9,10,11\n"
-test(1871.1, fread(txt), data.table(V4=INT(4,8), V5=INT(5,9), V6=INT(6,10), V7=INT(7,11)))
-test(1871.2, fread(txt, nrows=1), data.table(V4=4L, V5=5L, V6=6L, V7=7L))
-test(1871.3, fread(txt, skip=0), ans<-data.table(V1=2L, V2=3L, V3=4L), warning="discarded line V4, V5")
-test(1871.4, fread(txt, skip=0, nrows=1), ans)
-test(1871.5, fread(txt, skip=0, nrows=1, header=TRUE), ans)
-test(1871.6, fread(txt, skip=0, nrows=1, header=FALSE), data.table(V1=c("V1","2"), V2=c("V2","3"), V3=c("V3","4")))
+test(1871.1, fread(txt), ans <- data.table(V4=INT(4,8), V5=INT(5,9), V6=INT(6,10), V7=INT(7,11)))
+test(1871.2, fread(txt, skip=2), ans)
+test(1871.3, fread(txt, skip=2, nrow=1), ans[1,])
+test(1871.4, fread(txt, skip=2, nrow=3), ans)
+test(1871.5, fread(txt, skip=3), ans <- data.table(V1=INT(4,8), V2=INT(5,9), V3=INT(6,10), V4=INT(7,11)))
+test(1871.6, fread(txt, skip=3, nrow=1), ans[1,])
+test(1871.7, fread(txt, nrows=1), data.table(V1=2L, V2=3L, V3=4L))
+test(1871.8, fread(txt, skip=0), error="Line 3 has more than the expected 3 fields.*<<V4, V5, V6, V7>>")
+test(1871.9, fread(txt, skip=0, nrows=1), ans<-data.table(V1=2L, V2=3L, V3=4L))
+test(1871.11, fread(txt, skip=0, nrows=1, header=TRUE), ans)
+test(1871.12, fread(txt, skip=0, nrows=1, header=FALSE), data.table(V1="V1", V2="V2", V3="V3"))
+test(1871.13, fread(txt, skip=0, nrows=2, header=FALSE), data.table(V1=c("V1","2"), V2=c("V2","3"), V3=c("V3","4")))
 # for ( i in 100:1) {
 #   lines <- paste0(paste(rep("1,2,3", i), collapse='\n'), "\n1,2")
 #   fread(lines, nrows=i)
diff --git a/src/fread.c b/src/fread.c
index d64881801b..4c849189e5 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -1997,7 +1997,7 @@ int freadMain(freadMainArgs _args) {
         .anchor = thisJumpStart,
       };
 
-      while (tch<nextJump) {
+      while (tch<nextJump && (nth>1 || DTi+myNrow<nrowLimit)) {  // setting nrowLimit sets nth to 1 to avoid bump or error on row after nrowLimit
         if (myNrow == myBuffRows) {
           // buffer full due to unusually short lines in this chunk vs the sample; e.g. #2070
           myBuffRows *= 1.5;
@@ -2374,9 +2374,28 @@ int freadMain(freadMainArgs _args) {
     }
   }
   setFinalNrow(DTi);
+
   if (skippedFooter) {
     DTWARN("Discarded footer: <<%s>>", strlim(skippedFooter,500));
   }
+  else if (prevJumpEnd<eof && DTi<nrowLimit) {
+    ch = prevJumpEnd;
+    while (ch<eof && isspace(*ch)) ch++;
+    if (ch==eof) {
+      // whitespace at the end of the file is always skipped
+    } else {
+      const char *skippedFooter = ch;
+      // now ensure it's just one line.
+      while (ch<eof && *ch!='\n' && *ch!='\r') ch++;
+      while (ch<eof && isspace(*ch)) ch++;
+      if (ch==eof) {
+        DTWARN("Discarded footer: <<%s>>", strlim(skippedFooter,500));
+      }
+      else {
+        STOP("More than one line: <<%s>>", strlim(skippedFooter,500));
+      }
+    }
+  }
 
   if (verbose) {
     DTPRINT("=============================\n");

From 5ebf28bc9e881367fcda8b22ea0048e1ee281e21 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 12 Feb 2018 03:22:14 -0800
Subject: [PATCH 07/14] Interim

---
 inst/tests/tests.Rraw |  81 ++++++++++++-----------
 src/fread.c           | 145 ++++++++++++++++++------------------------
 2 files changed, 105 insertions(+), 121 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index e6521467a5..f8af6e4fd3 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2746,9 +2746,9 @@ test(966, fread(input, colClasses=list(character=2:4)), error="Column number 4 (
 test(967, nrow(fread( paste( rep('a\tb\n', 10000), collapse=''), header=FALSE)), 10000L)
 
 # Test fread warns about removal of any footer (and autostart skips up over it)
-test(968, fread("A,B\n1,3\n2,4\n\nRowcount: 2\n"), data.table(A=1:2,B=3:4), warning="Discarded footer.*Rowcount: 2")
-test(969, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2"), data.table(A=1:2,B=3:4), warning="Discarded footer.*Rowcount: 2")
-test(970, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2\n\n"), data.table(A=1:2,B=3:4), warning="Discarded footer.*Rowcount: 2")
+test(968, fread("A,B\n1,3\n2,4\n\nRowcount: 2\n"), data.table(A=1:2,B=3:4), warning="Discarded single-line footer.*Rowcount: 2")
+test(969, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2"), data.table(A=1:2,B=3:4), warning="Discarded single-line footer.*Rowcount: 2")
+test(970, fread("A,B\n1,3\n2,4\n\n\nRowcount: 2\n\n"), data.table(A=1:2,B=3:4), warning="Discarded single-line footer.*Rowcount: 2")
 
 # fread skip override
 input = "some,bad,data\nA,B,C\n1,3,5\n2,4,6\n"
@@ -2758,9 +2758,9 @@ test(973, fread(input, skip=2), data.table(V1=1:2,V2=3:4,V3=5:6))
 test(974, fread(input, skip=2, header=TRUE), data.table("1"=2L,"3"=4L,"5"=6L))
 test(975, fread(input, skip="B"), data.table(A=1:2,B=3:4,C=5:6))
 input = "\n\nA,B\n1,3\n2,4\n\nC,D\n5,7\n6,8\n\nE,F\n9,11\n10,12\n"   # 3 tables in one file
-test(976, fread(input), error="Line 6 has too few fields. Expecting 2 fields but found 0")
-test(977, fread(input, skip="C"), error="Line 10 has too few fields. Expecting 2 fields but found 0")
-test(978.1, fread(input, skip="D"), error="Line 10")
+test(976, fread(input), data.table(A=1:2, B=3:4), warning="Stopped early on line 6.*First discarded non-empty line: <<C,D>>")
+test(977, fread(input, skip="C"), ans<-data.table(C=5:6, D=7:8), warning="Stopped early on line 10.*First discarded non-empty line: <<E,F>>")
+test(978.1, fread(input, skip="D"), ans, warning="Stopped.*line 10.*<<E,F>>")
 test(978.2, fread(input, skip=",F"), data.table(E=9:10, F=11:12))
 test(978.3, fread(input, skip=9), data.table(E=9:10, F=11:12))
 
@@ -2887,17 +2887,19 @@ DT = data.table(a=c(NA,NA,FALSE,FALSE), b=c(1,1,2,2))
 test(1009, DT[,list(mean(a), sum(a)),by=b], data.table(b=c(1,2),V1=c(NA,0),V2=c(NA_integer_,0L))) # sum(logical()) should be integer, not real
 
 # an fread error shouldn't hold a lock on the file on Windows
-f = tempfile()
-cat('A,B\n1,2\n3\n5,6\n', file=f)
-test(1010.1, fread(f), error="Line 3 has too few fields.*Expecting 2 fields but found 1.*fill.*TRUE")
+cat('A,B\n1,2\n3\n5,6\n', file=(f<-tempfile()))
+test(1010.1, fread(f), ans<-data.table(A=TRUE, B=2L), warning=(txt<-"Stopped early on line 3.*Expected 2 fields but found 1.*fill.*TRUE.*<<3>>"))
+oldw = options(warn=2)   # !!TODO!!: this doesn't seem sufficient in test framework to turn the warning into error.
+test(1010.2, fread(f), ans, warning=txt)
 cat('7\n8,9',file=f,append=TRUE)   # that append works after error
-test(1010.2, fread(f,fill=TRUE), data.table(A=INT(1,3,5,7,8), B=INT(2,NA,6,NA,9)))
-test(1010.3, fread(f), error="Line 3 has too few fields.*Expecting 2 fields but found 1.*fill.*TRUE")
+test(1010.3, fread(f,fill=TRUE), data.table(A=INT(1,3,5,7,8), B=INT(2,NA,6,NA,9)))
+test(1010.4, fread(f), ans, warning=txt)
 cat('A,B\n1,2\n3\n5,6\n', file=f)  # that overwrite works after error
-test(1010.4, fread(f,fill=TRUE), data.table(A=INT(1,3,5), B=INT(2,NA,6)))
-test(1010.5, fread(f), error="Line 3 has too few fields.*Expecting 2 fields but found 1.*fill.*TRUE")
+test(1010.5, fread(f,fill=TRUE), data.table(A=INT(1,3,5), B=INT(2,NA,6)))
+test(1010.6, fread(f), ans, warning=txt)
 unlink(f)                          # that file can be removed after error
-test(1010.6, !file.exists(f))
+test(1010.7, !file.exists(f))
+options(oldw)
 
 # detection of unescaped quotes, quote rule 3
 test(1011, fread('A,B\n"aa",1\n"bb,2\n"cc",3\n'), data.table(A=c('aa', '"bb', 'cc'), B=1:3))
@@ -6189,7 +6191,8 @@ test(1451.8, shallow(DT, character(0)), null.data.table())  # length-0 input wor
 test(1452, fread("notexist.csv"), error="File 'notexist.csv' does not exist; getwd()==")
 
 # Test for #802
-test(1453, fread(testDir("fread_line_error.csv")), error="Line 12 has more than.*24 fields.*Stopped on <<,M,B.Y,Q.B>> at character 61.*<<31,3-0-7 4:1:7.5 HVV,")
+test(1453, fread(testDir("fread_line_error.csv")), fread(testDir("fread_line_error.csv"), nrow=11),
+           warning="Stopped.*line 12. Expected 24 fields but found 47.*First discarded non-empty line: <<31,3-0-7 4:1:7.5 HVV,")
 # TODO: add comment=="#".   Ensure only after last field is observed.
 
 # no-sep-found => sep="\n", use case for this in #738
@@ -7409,9 +7412,9 @@ str2="YYYY MM DD HH mm             19490             40790
 test(1555.14, fread(str1), fread(str2))
 
 # fix for #1330
-test(1556.1, fread(testDir("issue_1330_fread.txt"), nrow=2), data.table(a=1:2, b=1:2))
-test(1556.2, fread(testDir("issue_1330_fread.txt"), nrow=3), error="Line 4 has too few fields")
-test(1556.3, fread(testDir("issue_1330_fread.txt"), nrow=4), error="Line 4 has too few fields")
+test(1556.1, fread(testDir("issue_1330_fread.txt"), nrow=2), ans<-data.table(a=1:2, b=1:2))
+test(1556.2, fread(testDir("issue_1330_fread.txt"), nrow=3), ans, warning=w<-"Stopped early on line 4. Expected 2.*found 0.*First discarded non-empty line: <<3.*3>>")
+test(1556.3, fread(testDir("issue_1330_fread.txt"), nrow=4), ans, warning=w)
 
 # FR #768
 str="1,2\n3,4\n"
@@ -7426,10 +7429,10 @@ f = testDir("issue_773_fread.txt")
 ans = data.table(AAA=INT(c(4,7,rep(1,17),31,21)),
                  BBB=INT(c(5,8,rep(2,17),32,22)),
                  CCC=INT(c(6,9,rep(3,17),33,23)))
-test(1558.1, fread(f), error="Line 23 has too few fields.*Expecting 3 fields but found 2.*<<ZZZ.*YYY>>")
+test(1558.1, fread(f), ans, warning=w<-"Stopped early on line 23. Expected 3 fields but found 2[.].*First discarded non-empty line: <<ZZZ.*YYY>>")
 test(1558.2, fread(f, nrow=21L), ans)
 test(1558.3, fread(f, nrow=21L, fill=TRUE), ans)
-test(1558.4, fread(f, nrow=22L), error="Line 23 has too few fields.*Expecting 3 fields but found 2.*<<ZZZ.*YYY>>")
+test(1558.4, fread(f, nrow=22L), ans, warning=w)
 test(1558.5, fread(f, nrow=22L, fill=TRUE), rbind(ans, list("ZZZ","YYY",NA)))
 
 # FR # 1338 -- check.names argument of setDT
@@ -7600,7 +7603,7 @@ test(1577.3, levels(X$b), character(0))
 input = "Header not 2 columns\n\n1,3\n2,4"
 test(1578.0, fread(input), data.table(V1=1:2, V2=3:4))
 input = "a,b\n\n1,3\n2,4"
-test(1578.1, fread(input), data.table(a=logical(), b=logical()), warning="Found.*discarded.*<<1,3>>")
+test(1578.1, fread(input), data.table(a=logical(), b=logical()), warning="Stopped early on line 2[.].*First discarded.*<<1,3>>")
 test(1578.2, fread(input, blank.lines.skip=TRUE), data.table( a=1:2,  b=3:4))
 input = "a,b\n\n\n1,3\n2,4"
 test(1578.3, fread(input, blank.lines.skip=TRUE), data.table( a=1:2,  b=3:4))
@@ -7608,9 +7611,10 @@ input = "a,b\n\n\n1,3\n\n2,4\n\n"
 test(1578.4, fread(input, blank.lines.skip=TRUE), data.table( a=1:2,  b=3:4))
 
 f = testDir("530_fread.txt")
-test(1578.5, fread(f, skip=47L), data.table(a=logical(), b=logical()), warning="Found.*discarded.*<<1,3>>")
+test(1578.5, fread(f, skip=47L), data.table(a=logical(), b=logical()), warning="Stopped early.*discarded.*<<1,3>>")
 test(1578.6, fread(f, skip=49L), data.table(V1=1:2, V2=3:4))
 test(1578.7, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4))
+test(1578.8, fread(f, skip=48L), data.table(V1=1:2, V2=3:4))  # start on blank line 49 and skip="auto" to first data row on line 50
 
 # gforce optimisations
 dt = data.table(x  = sample(letters, 300, TRUE),
@@ -10528,8 +10532,8 @@ if ("package:nanotime" %in% search()) {
 
 # check too many fields error from ,\n line ending highlighted in #2044
 test(1753.1, fread("X,Y\n1,2\n3,4\n5,6"), data.table(X=INT(1,3,5),Y=INT(2,4,6)))
-test(1753.2, fread("X,Y\n1,2\n3,4,\n5,6"), error="Line 3 has more than.*2 fields. Stopped on <<,>> at character 4.*<<3,4,>>")
-test(1753.3, fread("X,Y\n1,2\n3,4,7\n5,6"), error="Line 3 has more than.*2 fields. Stopped on <<,7>> at character 4.*<<3,4,7>>")
+test(1753.2, fread("X,Y\n1,2\n3,4,\n5,6"), ans<-data.table(X=TRUE,Y=2L), warning="Stopped.*line 3. Expected 2 fields but found 3.*discarded.*<<3,4,>>")
+test(1753.3, fread("X,Y\n1,2\n3,4,7\n5,6"), ans,                         warning="Stopped.*line 3. Expected 2 fields but found 3.*discarded.*<<3,4,7>>")
 
 # issue 2051 where a quoted field contains ",  New quote rule detection handles it.
 test(1753.4, fread(testDir("issue_2051.csv"))[2,grep("^Our.*tool$",COLUMN50)], 1L)
@@ -10979,8 +10983,8 @@ test(1808.2, fread("A,B\r1,2\r3,4\r"), data.table(A=c(1L,3L),B=c(2L,4L)))
 cat("A,B\r1,2\r3,4",file=f<-tempfile())
   test(1808.3, fread(f), data.table(A=c(1L,3L),B=c(2L,4L)))
 unlink(f)
-test(1808.4, fread("A,B\r1,3\r\r\r2,4\r"), data.table(A=TRUE, B=3L), warning="Discarded footer: <<2,4>>")
-test(1808.5, fread("A,B\r4,3\r\r \r2,4\r"), data.table(A=4L, B=3L), warning="Discarded footer: <<2,4>>")
+test(1808.4, fread("A,B\r1,3\r\r\r2,4\r"), data.table(A=TRUE, B=3L), warning="Discarded single-line footer: <<2,4>>")
+test(1808.5, fread("A,B\r4,3\r\r \r2,4\r"), data.table(A=4L, B=3L), warning="Discarded single-line footer: <<2,4>>")
 test(1808.6, fread("A,B\r1,3\r\r \r2,4\r", blank.lines.skip=TRUE), data.table(A=1:2, B=3:4))
 test(1808.7, fread("A,B\r1,3\r\r \r2,4\r", fill=TRUE), data.table(A=c(1L,NA,NA,2L), B=c(3L,NA,NA,4L)))
 test(1808.8, fread("A,B\r1,3\r\r \r2,\r", blank.lines.skip=TRUE, fill=TRUE), data.table(A=1:2, B=c(3L,NA)))
@@ -11017,9 +11021,9 @@ test(1818, fread(testDir("session_aborted_fatal_error.txt"))[c(1,.N),c(1,2,250,2
 test(1819, as.ITime("2015-09-29 08:22:00"), structure(30120L, class = "ITime"))
 
 # Issue 2287: the % sign in the error/warning message should not be interpreted as a format string!
-test(1820.1, fread("name,id\nfoo,2\nbar%\n"), data.table(name="foo", id=2L), warning="Discarded footer: <<bar%>>")
-test(1820.2, fread("name,id\nfoo,2\nbar%d"), data.table(name="foo", id=2L), warning="Discarded footer: <<bar%d>>")
-test(1820.3, fread("name,id\nfoo,2\nbar%s"), data.table(name="foo", id=2L), warning="Discarded footer: <<bar%s>>")
+test(1820.1, fread("name,id\nfoo,2\nbar%\n"), data.table(name="foo", id=2L), warning="Discarded single-line footer: <<bar%>>")
+test(1820.2, fread("name,id\nfoo,2\nbar%d"), data.table(name="foo", id=2L), warning="Discarded single-line footer: <<bar%d>>")
+test(1820.3, fread("name,id\nfoo,2\nbar%s"), data.table(name="foo", id=2L), warning="Discarded single-line footer: <<bar%s>>")
 
 # new argument for print.data.table: col.names
 #   issue #1482 / PR #1483
@@ -11046,7 +11050,8 @@ src = paste(c("A,B",
               paste(rep("3,4", 10000), collapse="\n"),
               ""),
             collapse="\n")
-test(1822, fread(src), error="Line 102 has too few.*Expecting 2 fields but found 1.*<<999>>")
+test(1822, fread(src), data.table(A=rep(1L,100L), B=2L), warning="Stopped early on line 102. Expected 2 fields but found 1.*discarded.*<<999>>")
+# NB: The first sample jump uses the first 100 rows and just misses the 999. Since the data is large enough, the other jumps capture the type bump from 1 (bool) to 3 (int).
 
 # Issue 2326: .SD mistakenly includes column being set when get() appears in j
 DT <- data.table(x = seq(1, 10), y = seq(10, 1))
@@ -11174,8 +11179,8 @@ for (i in 0:1000) {
   if (i==502) write("-999,Bad,Line,0.0,0.0,extra\n", f, append=TRUE)
 }
 test(1835, fread(f, verbose=TRUE),
-  output = "Not using sample from jump 50.*could not establish the next true line start.*jumps=[0..2)",
-  error  = "Line 42253 has more than the expected 5 fields.*<<-999,Bad,Line,0.0,0.0,extra>>")
+  output = "A line with too-few or too-many.*jump 50.*Type bumps.*ignored",
+  warning = "Stopped.*line 42253. Expected 5 fields but found 6.*discarded.*<<-999,Bad,Line,0.0,0.0,extra>>")
 unlink(f)
 
 test(1836, fread('1,2,"3,a"\n4,5,"6,b"'), data.table(V1=c(1L,4L), V2=c(2L,5L), V3=c("3,a","6,b")))   # 2196
@@ -11209,7 +11214,7 @@ test(1839.6, fread(txt, sep=""), data.table("DECLARATION OF INDEPENDENCE"=lines[
 txt = 'a,b\n ab,cd,ce\n abcdef\n hjkli \n'  # now auto detected as ncol 1 anyway
 test(1840.1, fread(txt), data.table("a,b" = c("ab,cd,ce","abcdef","hjkli")))
 write('a,b\n ab,cd,ce\nabc,def \n hj,kli  ', f<-tempfile())  # write to file to generate \r\n line ending on Windows, test 1840.6 below
-test(1840.2, fread(f), error="more than the expected")
+test(1840.2, fread(f), data.table(a=logical(), b=logical()), warning="Stopped early on line 2.*discarded.*<<ab,cd,ce>>")
 test(1840.3, fread(f, sep=NA), error="!is.na(sep) is not TRUE")
 test(1840.4, fread(f, sep=NA_character_), error="!is.na(sep) is not TRUE")
 test(1840.5, fread(f, sep=""), ans<-data.table("a,b"=c("ab,cd,ce","abc,def","hj,kli")))
@@ -11370,7 +11375,7 @@ test(1856.2, fread("A,B\n\n"), ans)
 test(1856.3, fread("A,B\n\n\n"), ans)
 test(1856.4, fread("A,B\n3,4\n\n\n"), data.table(A=3L, B=4L))
 test(1856.5, fread("A,B\n3,4\n,\n\n\n"), data.table(A=c(3L,NA), B=c(4L,NA)))
-test(1856.6, fread("A,B\n3,4\n\n5,6\n"), data.table(A=3L, B=4L), warning="Discarded footer: <<5,6>>")
+test(1856.6, fread("A,B\n3,4\n\n5,6\n"), data.table(A=3L, B=4L), warning="Discarded single-line footer: <<5,6>>")
 DTs = list(                                      # passed fread(fwrite(DT))==DT before fix?
   data.table(A=logical(0)),                      # yes
   data.table(A=NA),                              # no
@@ -11430,7 +11435,9 @@ test(1864.2, DT[J("\u516c\u5141\u4ef7\u503c\u53d8\u52a8\u635f\u76ca"), z], 1L)
 data = rep("a,b,c,d,e,f,g", 2100)
 data[111] = "a,b,c,d,e,f,g,"
 cat(data, file=(f<-tempfile()), sep="\n")
-test(1865, fread(f, header=FALSE), error="Line 111.*more than.*7 fields.*Stopped on <<,>> at character 14.*<<a,b,c,d,e,f,g,>>")
+test(1865, fread(f, header=FALSE),
+           data.table(V1=rep("a",110),V2="b",V3="c",V4="d",V5="e",V6="f",V7="g"),
+           warning="Stopped early on line 111. Expected 7.*found 8.*discarded.*<<a,b,c,d,e,f,g,>>")
 unlink(f)
 
 # "Natural" provision of value.name in measure.vars list, #1547 and #2551
@@ -11511,7 +11518,7 @@ test(1869.1, fread("A\r1\r\r\r2\r"), data.table(A=c(1L,NA,NA,2L)))
 test(1869.2, fread("A\r1\r\r\r2\r\r"), data.table(A=c(1L,NA,NA,2L,NA)))
 test(1869.3, fread("A\r1\r\r\r2\r\r\r"), data.table(A=c(1L,NA,NA,2L,NA,NA)))
 test(1869.4, fread("A,B\r2,3\r,\r,\r4,5\r\r"), data.table(A=c(2L,NA,NA,4L), B=c(3L,NA,NA,5L)))
-test(1869.5, fread("A,B\r2,3\r\r,\r2,4\r\r"), error="Line 3 has too few fields. Expecting 2 fields but found 0.")  # two line footer because of the comma. Only 1 line footers are auto discarded.
+test(1869.5, fread("A,B\r2,3\r\r,\r2,4\r\r"), data.table(A=2L, B=3L), warning="Stopped.*line 3. Expected 2 fields but found 0.*First discarded non-empty line: <<,>>")  # two line footer because of the comma
 test(1869.6, fread(testDir("colnames4096.csv")), error="very unusual.*one single line without any.*r.*n at the end.*and.*multiple of 4096")
 test(1869.7, fread(testDir("onecol4096.csv")), error="very unusual.*single column.*multiple of 4096.*ends with 2 or more end-of-line")
 
@@ -11529,7 +11536,7 @@ test(1871.4, fread(txt, skip=2, nrow=3), ans)
 test(1871.5, fread(txt, skip=3), ans <- data.table(V1=INT(4,8), V2=INT(5,9), V3=INT(6,10), V4=INT(7,11)))
 test(1871.6, fread(txt, skip=3, nrow=1), ans[1,])
 test(1871.7, fread(txt, nrows=1), data.table(V1=2L, V2=3L, V3=4L))
-test(1871.8, fread(txt, skip=0), error="Line 3 has more than the expected 3 fields.*<<V4, V5, V6, V7>>")
+test(1871.8, fread(txt, skip=0), data.table(V1=2L, V2=3L, V3=4L), warning="Stopped early.*line 3. Expected 3 fields but found 4.*discarded.*<<V4, V5, V6, V7>>")
 test(1871.9, fread(txt, skip=0, nrows=1), ans<-data.table(V1=2L, V2=3L, V3=4L))
 test(1871.11, fread(txt, skip=0, nrows=1, header=TRUE), ans)
 test(1871.12, fread(txt, skip=0, nrows=1, header=FALSE), data.table(V1="V1", V2="V2", V3="V3"))
diff --git a/src/fread.c b/src/fread.c
index 4c849189e5..cc1b22e7cf 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -939,11 +939,10 @@ static reader_fun_t fun[NUMTYPE] = {
 
 static int disabled_parsers[NUMTYPE] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0};
 
-static bool detect_types( const char **pch, int8_t type[], int ncol) {
+static int detect_types( const char **pch, int8_t type[], int ncol, bool *bumped) {
   // used in sampling column types and whether column names are present
   // test at most ncol fields. If there are fewer fields, the data read step later
   // will error (if fill==false) when the line number is known, so we don't need to handle that here.
-  bool bumped=false;
   const char *ch = *pch;
   double trash; // double so that this throw-away storage is aligned. char trash[8] would not be aligned.
   void *targets[9] = {NULL, &trash, NULL, NULL, &trash, NULL, NULL, NULL, &trash};
@@ -985,16 +984,16 @@ static bool detect_types( const char **pch, int8_t type[], int ncol) {
         //          quoteRule, quoteRule+1, field+1, jline, j, strlim(fieldStart,200));
         quoteRule++;
       }
-      bumped = true;
+      *bumped = true;
       ch = fieldStart;
     }
     field++;
-    if (*ch!=sep) break;
+    if (*ch!=sep || field==ncol) break;  // field==ncol is needed for 1753.2 where line ends with an extra comma but shouldn't, so shouldn't be moved over
     if (sep==' ') while (ch[1]==' ') ch++;
     ch++;
   }
   *pch = ch;
-  return bumped;
+  return field; // the number of fields so caller knows if ncol were read
 }
 
 
@@ -1557,7 +1556,7 @@ int freadMain(freadMainArgs _args) {
     }
     //lastSampleJumpOk = true;
     bool bumped = false;  // did this jump find any different types; to reduce verbose output to relevant lines
-    bool skipThisJump = false;
+    //bool skipThisJump = false;
     int jumpLine = 0;    // line from this jump point start
 
     while(ch<eof && jumpLine++<jumpLines) {
@@ -1574,10 +1573,12 @@ int freadMain(freadMainArgs _args) {
 
       int8_t previousLastColType = tmpType[ncol-1];  // to revert any bump in last colum due to final field on final row due to finalByte
 
-      if (detect_types(&ch, tmpType, ncol)) bumped=true;
-      // if too few fields are found, then we proceed here as if fill=true. If fill is "warning" or FALSE, we'll delay the message
-      // in the data read loop where we know the line number exactly. (We're jumping here so we don't know the line number yet).
-
+      if ( (detect_types(&ch, tmpType, ncol, &bumped)<ncol && !fill) ||
+           (!eol(&ch) && *ch!='\0') ) {
+        if (verbose && jump>0) DTPRINT("  A line with too-few or too-many fields was found in sample from jump %d. Type bumps from this jump will be ignored.\n", jump);
+        bumped = false;
+        break;
+      }
       if (ch==eof && finalByte && tmpType[ncol-1]!=previousLastColType) {
         // revert bump due to e.g. ,NA<eof> in the last field of last row where finalByte=='A' and N caused bump to character (test 894.0221)
         if (verbose) DTPRINT("  Reverted bump of final column from %d to %d on final field due to finalByte='%c'."
@@ -1585,19 +1586,6 @@ int freadMain(freadMainArgs _args) {
             previousLastColType, tmpType[ncol-1], finalByte);
         tmpType[ncol-1] = previousLastColType;
       }
-      if (!eol(&ch) && *ch!='\0') {
-        if (jump==0) {
-          STOP("Line %d has more than the expected %d fields. Stopped on <<%s>> at character %d. "
-             "Consider setting 'comment.char=' if there is a trailing comment to be ignored. First 500 characters of line: <<%s>>",
-             row1line+jumpLine-1, ncol, strlim(ch-1,10), (int)(ch-lineStart), strlim(lineStart,500));
-        }
-        if (verbose) {
-          DTPRINT("  Not using sample from jump %d. Looks like a complicated file where nextGoodLine could not establish the next true line start.\n", jump);
-          // the nrow estimate will still include the (probably wrong) row widths so far from this sample, but that's ok as it's just an estimate
-        }
-        skipThisJump = true;
-        break;
-      }
       ch += (*ch=='\n' || *ch=='\r');
       lastRowEnd = ch;
       int thisLineLen = (int)(ch-lineStart);  // ch is now on start of next line so this includes line ending already
@@ -1606,10 +1594,18 @@ int freadMain(freadMainArgs _args) {
       sumLenSq += thisLineLen*thisLineLen;
       if (thisLineLen<minLen) minLen=thisLineLen;
       if (thisLineLen>maxLen) maxLen=thisLineLen;
+      if (jump==0 && bumped) {
+        // apply bumps after each line in the first jump from the start in case invalid line stopped early on is in the first 100 lines.
+        // otherwise later jumps must complete fully before their bumps are appplied. Invalid lines in those are more likely to be due to bad jump start.
+        memcpy(type, tmpType, (size_t)ncol);
+        bumped = false;  // detect_types() only updates &bumped when it's true. So reset to false here.
+      }
+    }
+    if (bumped) {
+      // when jump>0, apply the bumps (if any) at the end of the successfully completed jump sample
+      ASSERT(jump>0, "jump(%d)>0", jump);
+      memcpy(type, tmpType, (size_t)ncol);
     }
-    if (skipThisJump) continue;
-    // if (jump==nJumps-1) lastSampleJumpOk = true;
-    if (bumped) memcpy(type, tmpType, (size_t)ncol);
     if (verbose && (bumped || jump==0 || jump==nJumps-1)) {
       DTPRINT("  Type codes (jump %03d)    : %s  Quote rule %d\n", jump, typesAsString(ncol), quoteRule);
     }
@@ -1630,7 +1626,8 @@ int freadMain(freadMainArgs _args) {
   ch = pos;
   if (args.header==NA_BOOL8) {
     for (int j=0; j<ncol; j++) tmpType[j]=type0;   // reuse tmpType
-    detect_types(&ch, tmpType, ncol);
+    bool bumped=false;
+    detect_types(&ch, tmpType, ncol, &bumped);
     if (sampleLines>0) for (int j=0; j<ncol; j++) {
       if (tmpType[j]==CT_STRING && type[j]>type0 && type[j]<CT_STRING) {
         // >type0 can only happen if the column is not all blank
@@ -1854,7 +1851,7 @@ int freadMain(freadMainArgs _args) {
   char stopErr[stopErrSize+1]="";  // must be compile time size: the message is generated and we can't free before STOP
   size_t DTi = 0;   // the current row number in DT that we are writing to
   const char *prevJumpEnd = pos;  // the position after the last line the last thread processed (for checking)
-  const char *skippedFooter = NULL;  // if footer is skipped, this is its location to be printed.
+  // const char *skippedFooter = NULL;  // if footer is skipped, this is its location to be printed.
   int buffGrown=0;
   // chunkBytes is the distance between each jump point; it decides the number of jumps
   // We may want each chunk to write to its own page of the final column, hence 1000*maxLen
@@ -1912,7 +1909,7 @@ int freadMain(freadMainArgs _args) {
     const char *thisJumpStart=NULL;  // The first good start-of-line after the jump point
     size_t myNrow = 0; // the number of rows in my chunk
     size_t myBuffRows = initialBuffRows;  // Upon realloc, myBuffRows will increase to grown capacity
-    int myWrongNumberFields = -1;      // -1 means false. If set, it's set to >=0 holding the (wrong) number of fields observed
+    bool myStoppingEarly = false;      // true when an empty or too-short or too-long row is encountered when fill=false
 
     // Allocate thread-private row-major `myBuff`s
     ThreadLocalFreadParsingContext ctx = {
@@ -1940,7 +1937,7 @@ int freadMain(freadMainArgs _args) {
 
     #pragma omp for ordered schedule(dynamic) reduction(+:thNextGoodLine,thRead,thPush)
     for (int jump = jump0; jump < nJumps; jump++) {
-      if (stopTeam) continue;  // must continue and not break. We desire not to depend on (relatively new) omp cancel directive, yet
+      if (stopTeam && !myStoppingEarly) continue;  // must continue and not break. We desire not to depend on (relatively new) omp cancel directive, yet
       double tLast = 0.0;      // thread local wallclock time at last measuring point for verbose mode only.
       if (verbose) tLast = wallclock();
       if (myNrow) {
@@ -1969,10 +1966,11 @@ int freadMain(freadMainArgs _args) {
             progress((int)(100.0*jump/nJumps), ETA);
           }
         }
+        if (myStoppingEarly) continue;
       }
 
       const char *tch = pos + (size_t)jump*chunkBytes;
-      const char *tlineStart = tch;
+      const char *tLineStart = tch;
       const char *nextJump = jump<nJumps-1 ? tch+chunkBytes+1/*\n*/ : eof;
       // +1 is for when nextJump happens to fall exactly on a \n. The
       // next thread will start one line later because nextGoodLine() starts by finding next eol.
@@ -2015,7 +2013,7 @@ int freadMain(freadMainArgs _args) {
           fctx.targets[4] = (void*)((char*)ctx.buff4 + myNrow * rowSize4);
           fctx.targets[1] = (void*)((char*)ctx.buff1 + myNrow * rowSize1);
         }
-        tlineStart = tch;  // for error message
+        tLineStart = tch;  // for error message
         const char *fieldStart = tch;
         int j = 0;
 
@@ -2038,11 +2036,11 @@ int freadMain(freadMainArgs _args) {
             j++;
           }
           //*** END HOT. START TEPID ***//
-          if (tch==tlineStart) {
+          if (tch==tLineStart) {
             skip_white(&tch);
             if (*tch=='\0') break;  // empty last line
             if (eol(&tch) && skipEmptyLines) { tch++; continue; }
-            tch = tlineStart;  // in case white space at the beginning may need to be including in field
+            tch = tLineStart;  // in case white space at the beginning may need to be including in field
           }
           else if (eol(&tch) && j<ncol) {   // j<ncol needed for #2523 (erroneous extra comma after last field)
             int8_t thisSize = size[j];
@@ -2066,9 +2064,10 @@ int freadMain(freadMainArgs _args) {
         // TODO: reduce(slowerBranch++). So we can see in verbose mode if this is happening too much.
 
         if (sep==' ') {
-          while (*tch==' ') tch++;  // multiple sep=' ' at the tlineStart does not mean sep. We're at tLineStart because the fast branch above doesn't run when sep=' '
+          while (*tch==' ') tch++;  // multiple sep=' ' at the tLineStart does not mean sep. We're at tLineStart because the fast branch above doesn't run when sep=' '
           fieldStart = tch;
         }
+        bool checkedNumberOfFields = false;
         if (fill || ncol==1 || (*tch!='\n' && *tch!='\r')) while (j < ncol) {
           fieldStart = tch;
           int8_t joldType = type[j];
@@ -2112,6 +2111,13 @@ int freadMain(freadMainArgs _args) {
 
           if (thisType != joldType             // rare out-of-sample type exception.
               && (!finalByte || finalSep)) {   // don't bump the final field until we've replaced the finalByte (if any) test 894.0221 where final field is NA and finalByte=='A'
+            if (!checkedNumberOfFields && !fill) {
+              // check this line has the correct number of fields. If not, don't apply the bump from this invalid line. Instead fall through to myStoppingEarly below.
+              const char *tt = fieldStart;
+              int fieldsRemaining = countfields(&tt);
+              if (j+fieldsRemaining != ncol) break;
+              checkedNumberOfFields = true;
+            }
             #pragma omp critical
             {
               joldType = type[j];  // fetch shared value again in case another thread bumped it while I was waiting.
@@ -2182,9 +2188,9 @@ int freadMain(freadMainArgs _args) {
         if (j<ncol || (!eol(&tch) && *tch!='\0'))  {
           // Too few or too many columns observed (including empty line). If fill==true, fields should already have been filled
           // above due to continue inside while(j<ncol)
-          // Delay error to the ordered clause so that the first line with error is reported (e.g. if two jumps both
-          // see an error at the same time) with the correct line number too (which needs all preceeding jumps to process first)
-          myWrongNumberFields = j;  // used in error message. It was initialized to -1.
+          // We will push rows read so far and then warn we stopped early.
+          myStoppingEarly = true;
+          tch = tLineStart;
           break;
         }
         if (*tch!='\0') tch++;
@@ -2211,13 +2217,11 @@ int freadMain(freadMainArgs _args) {
         if (ctx.DTi >= allocnrow) {  // a previous thread has already reached the `allocnrow` limit
           stopTeam = true;
           myNrow = 0;
-          myWrongNumberFields = -1;  // forget the error, as it occured after the nrow limit requested by user
         } else if (myNrow + ctx.DTi >= allocnrow) {  // current thread's rows will fill all allocnrow
           if (allocnrow == nrowLimit) {
-            // allocnrow is the same as nrowLimit, no need to reallocate the DT,
-            // just truncate the rows in the current chunk
-            myNrow = nrowLimit - ctx.DTi;
-            myWrongNumberFields = -1;  // e.g. test 1558.2 where the format error is after nrowLimit
+            // the loop above should have stopped when the nrowLimit was reached
+            ASSERT(myNrow == nrowLimit-ctx.DTi, "myNrow[%llu] == nrowLimit[%llu]-ctx.DTi[%llu]", myNrow, nrowLimit, ctx.DTi);
+            ASSERT(nth==1, "nth[%d]==1", nth);
           } else if (myNrow + ctx.DTi > allocnrow) {
             // We reached `allocnrow` limit, but there are more data to read
             // left. In this case we arrange to terminate all threads but
@@ -2230,41 +2234,11 @@ int freadMain(freadMainArgs _args) {
             stopTeam = true;
           }
         }
-        if (myWrongNumberFields>=0) {
-          if (jump==nJumps-1) {  // the last jump; we should be at the end of the file or at the start of the footer
-            const char *tt = tlineStart;
-            while (tt<eof && isspace(*tt)) tt++;
-            if (tt==eof) {
-              // whitespace at the end of the file is always skipped
-              myWrongNumberFields=-1;
-              tch=eof;
-            } else {
-              skippedFooter = tt;
-              // now ensure it's just one line.
-              while (tt<eof && *tt!='\n' && *tt!='\r') tt++;
-              while (tt<eof && isspace(*tt)) tt++;
-              if (tt==eof) {
-                // it's a one-line footer before eof, so that's ok to warn about dropping.
-                myWrongNumberFields=-1;
-                tch = eof;
-              }
-            }
-          }
-          if (myWrongNumberFields>=0) {
-            stopTeam = true;
-            if (myWrongNumberFields<ncol) {
-              snprintf(stopErr, stopErrSize,
-                "Line %llu has too few fields. Expecting %d fields but found %d. Consider fill=TRUE. First 500 characters of line: <<%s>>",
-                (llu)ctx.DTi+myNrow+row1line, ncol, myWrongNumberFields, strlim(tlineStart, 500));
-            } else {
-              snprintf(stopErr, stopErrSize,
-                "Line %llu has more than the expected %d fields. Stopped on <<%s>> at character %d. "
-                "Consider setting 'comment.char=' if there is a trailing comment to be ignored. First 500 characters of line: <<%s>>",
-                (llu)ctx.DTi+myNrow+row1line, ncol, strlim(tch+1,10), (int)(tch-tlineStart+2), strlim(tlineStart,500));
-            }
-          }
+        if (myStoppingEarly) {
+          if (stopTeam || myNrow==0) myStoppingEarly=false;
+          stopTeam=true;
         }
-        // tell next thread (she not me) 2 things :
+        // tell next thread 2 things :
         prevJumpEnd = tch; // i) the \n I finished on so she can check (above) she started exactly on that \n good line start
         DTi += myNrow;     // ii) which row in the final result she should start writing to since now I know myNrow.
         ctx.nRows = myNrow;
@@ -2375,24 +2349,27 @@ int freadMain(freadMainArgs _args) {
   }
   setFinalNrow(DTi);
 
-  if (skippedFooter) {
+  /*if (skippedFooter) {
     DTWARN("Discarded footer: <<%s>>", strlim(skippedFooter,500));
-  }
-  else if (prevJumpEnd<eof && DTi<nrowLimit) {
+  }*/
+  if (prevJumpEnd<eof && DTi<nrowLimit) {
     ch = prevJumpEnd;
     while (ch<eof && isspace(*ch)) ch++;
     if (ch==eof) {
-      // whitespace at the end of the file is always skipped
+      // whitespace at the end of the file is always skipped ok
     } else {
       const char *skippedFooter = ch;
-      // now ensure it's just one line.
+      // detect if it's a single line footer. Commonly the row count from SQL queries.
       while (ch<eof && *ch!='\n' && *ch!='\r') ch++;
       while (ch<eof && isspace(*ch)) ch++;
       if (ch==eof) {
-        DTWARN("Discarded footer: <<%s>>", strlim(skippedFooter,500));
+        DTWARN("Discarded single-line footer: <<%s>>", strlim(skippedFooter,500));
       }
       else {
-        STOP("More than one line: <<%s>>", strlim(skippedFooter,500));
+        ch = prevJumpEnd;
+        int tt = countfields(&ch);
+        DTWARN("Stopped early on line %llu. Expected %d fields but found %d. Consider fill=TRUE and comment.char=. First discarded non-empty line: <<%s>>",
+          DTi+row1line, ncol, tt, strlim(skippedFooter,500));
       }
     }
   }

From 469458cda454fde6fba3eaa979132a648690851e Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 12 Feb 2018 13:22:37 -0800
Subject: [PATCH 08/14] Passing tests locally

---
 cc.R         | 4 ++--
 man/fread.Rd | 8 ++++----
 src/fread.c  | 3 ++-
 3 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/cc.R b/cc.R
index d0014704b3..541e7b6da8 100644
--- a/cc.R
+++ b/cc.R
@@ -53,9 +53,9 @@ cc = function(test=TRUE, clean=FALSE, debug=FALSE, cc_dir=Sys.getenv("CC_DIR"))
   cat(getwd(),"\n")
   if (clean) system("rm *.o *.so")
   if (debug) {
-    ret = system("MAKEFLAGS='-j CC=gcc-7 PKG_CFLAGS=-fno-openmp CFLAGS=-std=c99\\ -Og\\ -ggdb\\ -pedantic' R CMD SHLIB -d -o data.table.so *.c")
+    ret = system("MAKEFLAGS='-j CC=gcc PKG_CFLAGS=-fno-openmp CFLAGS=-std=c99\\ -O0\\ -ggdb\\ -pedantic' R CMD SHLIB -d -o data.table.so *.c")
   } else {
-    ret = system("MAKEFLAGS='-j CC=gcc-7 CFLAGS=-fopenmp\\ -std=c99\\ -O3\\ -pipe\\ -Wall\\ -pedantic' R CMD SHLIB -o data.table.so *.c")
+    ret = system("MAKEFLAGS='-j CC=gcc CFLAGS=-fopenmp\\ -std=c99\\ -O3\\ -pipe\\ -Wall\\ -pedantic' R CMD SHLIB -o data.table.so *.c")
     # TODO add -Wextra too?
   }
   if (ret) return()
diff --git a/man/fread.Rd b/man/fread.Rd
index a300b97ffb..d6b00cca8e 100644
--- a/man/fread.Rd
+++ b/man/fread.Rd
@@ -11,15 +11,15 @@
 \usage{
 fread(input, file, sep="auto", sep2="auto", dec=".", quote="\"",
 nrows=Inf, header="auto", na.strings="NA",
-stringsAsFactors=FALSE, verbose=getOption("datatable.verbose"), autostart=NA,
-skip=0, select=NULL, drop=NULL, colClasses=NULL,
+stringsAsFactors=FALSE, verbose=getOption("datatable.verbose"),
+skip="__auto__", select=NULL, drop=NULL, colClasses=NULL,
 integer64=getOption("datatable.integer64"),         # default: "integer64"
 col.names,
 check.names=FALSE, encoding="unknown",
 strip.white=TRUE, fill=FALSE, blank.lines.skip=FALSE, key=NULL,
 showProgress=interactive(),
 data.table=getOption("datatable.fread.datatable"),
-nThread=getDTthreads(), logical01=TRUE
+nThread=getDTthreads(), logical01=TRUE, autostart=NA
 )
 }
 \arguments{
@@ -32,7 +32,6 @@ nThread=getDTthreads(), logical01=TRUE
   \item{file}{ File path, useful when we want to ensure that no shell commands will be executed. File path can also be provided to \code{input} argument. }
   \item{stringsAsFactors}{ Convert all character columns to factors? }
   \item{verbose}{ Be chatty and report timings? }
-  \item{autostart}{ Deprecated and ignored with warning. Please use \code{skip} instead. }
   \item{skip}{ If 0 (default) start on the first line and from there finds the first row with a consistent number of columns. This automatically avoids irregular header information before the column names row. \code{skip>0} means ignore the first \code{skip} rows manually. \code{skip="string"} searches for \code{"string"} in the file (e.g. a substring of the column names row) and starts on that line (inspired by read.xls in package gdata). }
   \item{select}{ Vector of column names or numbers to keep, drop the rest. }
   \item{drop}{ Vector of column names or numbers to drop, keep the rest. }
@@ -51,6 +50,7 @@ nThread=getDTthreads(), logical01=TRUE
   \item{data.table}{ TRUE returns a \code{data.table}. FALSE returns a \code{data.frame}. }
   \item{nThread}{The number of threads to use. Experiment to see what works best for your data on your hardware.}
   \item{logical01}{If TRUE a column containing only 0s and 1s will be read as logical, otherwise as integer.}
+  \item{autostart}{ Deprecated and ignored with warning. Please use \code{skip} instead. }
 }
 \details{
 
diff --git a/src/fread.c b/src/fread.c
index cc1b22e7cf..68f6b94efc 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -992,6 +992,7 @@ static int detect_types( const char **pch, int8_t type[], int ncol, bool *bumped
     if (sep==' ') while (ch[1]==' ') ch++;
     ch++;
   }
+  if (ch==eof && finalByte && finalByte==sep && sep!=' ') field++;  // for test 1776.2
   *pch = ch;
   return field; // the number of fields so caller knows if ncol were read
 }
@@ -2110,7 +2111,7 @@ int freadMain(freadMainArgs _args) {
           }
 
           if (thisType != joldType             // rare out-of-sample type exception.
-              && (!finalByte || finalSep)) {   // don't bump the final field until we've replaced the finalByte (if any) test 894.0221 where final field is NA and finalByte=='A'
+              && (tch<eof || !finalByte || finalSep)) {   // don't bump the final field until we've replaced the finalByte (if any) test 894.0221 where final field is NA and finalByte=='A'
             if (!checkedNumberOfFields && !fill) {
               // check this line has the correct number of fields. If not, don't apply the bump from this invalid line. Instead fall through to myStoppingEarly below.
               const char *tt = fieldStart;

From c0f2558614f3524ac5abe80fe02e00244be98ed9 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 12 Feb 2018 15:26:13 -0800
Subject: [PATCH 09/14] Added another test from the issue

---
 NEWS.md               | 1 +
 inst/tests/tests.Rraw | 8 ++++----
 src/fread.c           | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index 5c299a2603..9b7c05bac5 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -27,6 +27,7 @@
     * `sep=NULL` or `sep=""` (i.e., no column separator) can now be used to specify single column input reliably like `base::readLines`, [#1616](https://github.com/Rdatatable/data.table/issues/1616). `sep='\\n'` still works (even on Windows where line ending is actually `\\r\\n`) but `NULL` or `""` are now documented and recommended. Thanks to Dmitriy Selivanov for the pull request and many others for comments. As before, `sep=NA` is not valid; use the default `"auto"` for automatic separator detection. `sep='\\n'` may be deprecated in future.
     * Single-column input with blank lines is now valid and the blank lines are significant (meaning an NA in the single column). The blank lines are significant even at the very end, which may be surprising on first glance. The change is so that `fread(fwrite(DT))==DT` for single-column inputs containing NA which are written as blank. There is no change when `ncol>1` (i.e., input stops with detailed warning at the first blank line) because a blank line when `ncol>1` is invalid input due to no separators present instead of `ncol-1` separators.
     * Too few column names are now auto filled with default column names, with warning, [#1625](https://github.com/Rdatatable/data.table/issues/1625). If there is just one missing column name it is guessed to be for the first column (row names or an index), otherwise the column names are filled at the end. Similarly, too many column names now automatically sets `fill=TRUE`, with warning.
+    * `skip=` and `nrow=` are more reliable and no longer affected by invalid lines outside the range specified. Thanks to Ziyad Saeed and Kyle Chung for reporting, [#1267](https://github.com/Rdatatable/data.table/issues/1267). Tests added.
     * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney for testing before release to CRAN: [#2070](https://github.com/Rdatatable/data.table/issues/2070), [#2073](https://github.com/Rdatatable/data.table/issues/2073), [#2087](https://github.com/Rdatatable/data.table/issues/2087), [#2091](https://github.com/Rdatatable/data.table/issues/2091), [#2107](https://github.com/Rdatatable/data.table/issues/2107), [fst#50](https://github.com/fstpackage/fst/issues/50#issuecomment-294287846), [#2118](https://github.com/Rdatatable/data.table/issues/2118), [#2092](https://github.com/Rdatatable/data.table/issues/2092), [#1888](https://github.com/Rdatatable/data.table/issues/1888), [#2123](https://github.com/Rdatatable/data.table/issues/2123), [#2167](https://github.com/Rdatatable/data.table/issues/2167), [#2194](https://github.com/Rdatatable/data.table/issues/2194), [#2238](https://github.com/Rdatatable/data.table/issues/2238), [#2228](https://github.com/Rdatatable/data.table/issues/2228), [#1464](https://github.com/Rdatatable/data.table/issues/1464), [#2201](https://github.com/Rdatatable/data.table/issues/2201), [#2287](https://github.com/Rdatatable/data.table/issues/2287), [#2299](https://github.com/Rdatatable/data.table/issues/2299), [#2285](https://github.com/Rdatatable/data.table/issues/2285), [#2251](https://github.com/Rdatatable/data.table/issues/2251), [#2347](https://github.com/Rdatatable/data.table/issues/2347), [#2222](https://github.com/Rdatatable/data.table/issues/2222), [#2352](https://github.com/Rdatatable/data.table/issues/2352), [#2246](https://github.com/Rdatatable/data.table/issues/2246), [#2370](https://github.com/Rdatatable/data.table/issues/2370), [#2371](https://github.com/Rdatatable/data.table/issues/2371), [#2404](https://github.com/Rdatatable/data.table/issues/2404), [#2196](https://github.com/Rdatatable/data.table/issues/2196), [#2322](https://github.com/Rdatatable/data.table/issues/2322), [#2453](https://github.com/Rdatatable/data.table/issues/2453), [#2446](https://github.com/Rdatatable/data.table/issues/2446), [#2464](https://github.com/Rdatatable/data.table/issues/2464), [#2457](https://github.com/Rdatatable/data.table/issues/2457), [#1895](https://github.com/Rdatatable/data.table/issues/1895), [#2481](https://github.com/Rdatatable/data.table/pull/2481), [#2499](https://github.com/Rdatatable/data.table/issues/2499), [#2516](https://github.com/Rdatatable/data.table/issues/2516), [#2520](https://github.com/Rdatatable/data.table/issues/2520), [#2512](https://github.com/Rdatatable/data.table/issues/2512), [#2523](https://github.com/Rdatatable/data.table/issues/2523), [#2542](https://github.com/Rdatatable/data.table/issues/2542), [#2526](https://github.com/Rdatatable/data.table/issues/2526)
 
 2. `fwrite()`:
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 0a6097c7e7..984415f2df 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11541,10 +11541,10 @@ test(1871.9, fread(txt, skip=0, nrows=1), ans<-data.table(V1=2L, V2=3L, V3=4L))
 test(1871.11, fread(txt, skip=0, nrows=1, header=TRUE), ans)
 test(1871.12, fread(txt, skip=0, nrows=1, header=FALSE), data.table(V1="V1", V2="V2", V3="V3"))
 test(1871.13, fread(txt, skip=0, nrows=2, header=FALSE), data.table(V1=c("V1","2"), V2=c("V2","3"), V3=c("V3","4")))
-# for ( i in 100:1) {
-#   lines <- paste0(paste(rep("1,2,3", i), collapse='\n'), "\n1,2")
-#   fread(lines, nrows=i)
-# }
+for (i in 100:1) {
+  lines <- paste(c(rep("2,3,4",i), "2,3"), collapse='\n')
+  test(1871.2 + i/1000, fread(lines, nrows=i), data.table(V1=rep.int(2L,i), V2=3L, V3=4L))
+}
 
 # miscellaneous missing tests uncovered by CodeCov difference
 #   in the process of PR #2573
diff --git a/src/fread.c b/src/fread.c
index 68f6b94efc..8acd946acc 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -1707,7 +1707,7 @@ int freadMain(freadMainArgs _args) {
   meanLineLen=0.0; // Average length (in bytes) of a single line in the input file
   bytesRead=0;     // Bytes in the data section (i.e. excluding column names, header and footer, if any)
 
-  if (sampleLines < jumpLines) {
+  if (sampleLines <= jumpLines) {
     if (verbose) DTPRINT("  All rows were sampled since file is small so we know nrow=%llu exactly\n", (llu)sampleLines);
     estnrow = allocnrow = sampleLines;
   } else {

From e1feed34c31b3c084279fa715826913419690569 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 12 Feb 2018 15:52:21 -0800
Subject: [PATCH 10/14] Tidy

---
 inst/tests/tests.Rraw |  3 +--
 src/fread.c           | 25 ++-----------------------
 2 files changed, 3 insertions(+), 25 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 984415f2df..27c3a548b0 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -2887,9 +2887,9 @@ DT = data.table(a=c(NA,NA,FALSE,FALSE), b=c(1,1,2,2))
 test(1009, DT[,list(mean(a), sum(a)),by=b], data.table(b=c(1,2),V1=c(NA,0),V2=c(NA_integer_,0L))) # sum(logical()) should be integer, not real
 
 # an fread error shouldn't hold a lock on the file on Windows
+# TODO: now that these are warnings and not errors, we need another way to trigger a STOP() inside fread.c. options(warn=2) isn't enough.
 cat('A,B\n1,2\n3\n5,6\n', file=(f<-tempfile()))
 test(1010.1, fread(f), ans<-data.table(A=TRUE, B=2L), warning=(txt<-"Stopped early on line 3.*Expected 2 fields but found 1.*fill.*TRUE.*<<3>>"))
-oldw = options(warn=2)   # !!TODO!!: this doesn't seem sufficient in test framework to turn the warning into error.
 test(1010.2, fread(f), ans, warning=txt)
 cat('7\n8,9',file=f,append=TRUE)   # that append works after error
 test(1010.3, fread(f,fill=TRUE), data.table(A=INT(1,3,5,7,8), B=INT(2,NA,6,NA,9)))
@@ -2899,7 +2899,6 @@ test(1010.5, fread(f,fill=TRUE), data.table(A=INT(1,3,5), B=INT(2,NA,6)))
 test(1010.6, fread(f), ans, warning=txt)
 unlink(f)                          # that file can be removed after error
 test(1010.7, !file.exists(f))
-options(oldw)
 
 # detection of unescaped quotes, quote rule 3
 test(1011, fread('A,B\n"aa",1\n"bb,2\n"cc",3\n'), data.table(A=c('aa', '"bb', 'cc'), B=1:3))
diff --git a/src/fread.c b/src/fread.c
index 8acd946acc..13850f8799 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -1303,8 +1303,8 @@ int freadMain(freadMainArgs _args) {
     ch = pos;
     skipAuto = false;
   }
-  // Skip the first `skipNrow` lines of input, including 0 to force the first line to be the start
   else if (args.skipNrow >= 0) {
+    // Skip the first `skipNrow` lines of input, including 0 to force the first line to be the start
     while (ch<eof && row1line<=args.skipNrow) row1line+=(*ch++=='\n');
     if (ch>=eof) STOP("skip=%llu but the input only has %llu line%s", (llu)args.skipNrow, (llu)row1line, row1line>1?"s":"");
     pos = ch;
@@ -1488,7 +1488,6 @@ int freadMain(freadMainArgs _args) {
   //*********************************************************************************************
   int nJumps;             // How many jumps to use when pre-scanning the file
   size_t sampleLines;     // How many lines were sampled during the initial pre-scan
-  //const char *lastRowEnd; // Pointer to the end of the data section
   bool autoFirstColName = false; // true when there's one less column name and then it's assumed that the first column is row names or index
   size_t estnrow=1;
   size_t allocnrow=0;     // Number of rows in the allocated DataTable
@@ -1535,7 +1534,6 @@ int freadMain(freadMainArgs _args) {
   int minLen=INT32_MAX, maxLen=-1;   // int_max so the first if(thisLen<minLen) is always true; similarly for max
   const char *lastRowEnd = pos;
   const char *firstRowStart = pos;
-  //bool lastSampleJumpOk = false;   // it won't be ok if its nextGoodLine returns false as testing in test 1768
   for (int jump=0; jump<nJumps; jump++) {
     if (jump==0) {
       ch = pos;
@@ -1552,12 +1550,9 @@ int freadMain(freadMainArgs _args) {
     if (ch>=eof) break;                // The 9th jump could reach the end in the same situation and that's ok. As long as the end is sampled is what we want.
     if (jump>0 && !nextGoodLine(&ch, ncol)) {
       // skip this jump for sampling. Very unusual and in such unusual cases, we don't mind a slightly worse guess.
-      //lastSampleJumpOk = false;
       continue;
     }
-    //lastSampleJumpOk = true;
     bool bumped = false;  // did this jump find any different types; to reduce verbose output to relevant lines
-    //bool skipThisJump = false;
     int jumpLine = 0;    // line from this jump point start
 
     while(ch<eof && jumpLine++<jumpLines) {
@@ -1611,19 +1606,7 @@ int freadMain(freadMainArgs _args) {
       DTPRINT("  Type codes (jump %03d)    : %s  Quote rule %d\n", jump, typesAsString(ncol), quoteRule);
     }
   }
-  /*
-  ch = lastRowEnd;
-  while (ch<eof && isspace(*ch)) ch++;
-  if (ch<eof) {
-    if (lastSampleJumpOk) {
-      DTWARN("Found the last consistent line but text exists afterwards. Consider fill=TRUE and/or blank.lines.skip=TRUE. First 200 characters of discarded line: <<%s>>", strlim(ch,200));
-    } else {
-      // nextGoodLine() was false for the last (extra) jump to check the end
-      // must set lastRowEnd to eof accordingly otherwise it'll be left wherever the last good jump finished
-      lastRowEnd = eof;
-    }
-  }
-*/
+
   ch = pos;
   if (args.header==NA_BOOL8) {
     for (int j=0; j<ncol; j++) tmpType[j]=type0;   // reuse tmpType
@@ -1852,7 +1835,6 @@ int freadMain(freadMainArgs _args) {
   char stopErr[stopErrSize+1]="";  // must be compile time size: the message is generated and we can't free before STOP
   size_t DTi = 0;   // the current row number in DT that we are writing to
   const char *prevJumpEnd = pos;  // the position after the last line the last thread processed (for checking)
-  // const char *skippedFooter = NULL;  // if footer is skipped, this is its location to be printed.
   int buffGrown=0;
   // chunkBytes is the distance between each jump point; it decides the number of jumps
   // We may want each chunk to write to its own page of the final column, hence 1000*maxLen
@@ -2350,9 +2332,6 @@ int freadMain(freadMainArgs _args) {
   }
   setFinalNrow(DTi);
 
-  /*if (skippedFooter) {
-    DTWARN("Discarded footer: <<%s>>", strlim(skippedFooter,500));
-  }*/
   if (prevJumpEnd<eof && DTi<nrowLimit) {
     ch = prevJumpEnd;
     while (ch<eof && isspace(*ch)) ch++;

From 73009280bc0b7caabd91514b79ff6d98c7ac3c55 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 12 Feb 2018 16:59:42 -0800
Subject: [PATCH 11/14] Added test from #2518

---
 NEWS.md               | 2 +-
 inst/tests/tests.Rraw | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index 9b7c05bac5..67b06119ab 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -28,7 +28,7 @@
     * Single-column input with blank lines is now valid and the blank lines are significant (meaning an NA in the single column). The blank lines are significant even at the very end, which may be surprising on first glance. The change is so that `fread(fwrite(DT))==DT` for single-column inputs containing NA which are written as blank. There is no change when `ncol>1` (i.e., input stops with detailed warning at the first blank line) because a blank line when `ncol>1` is invalid input due to no separators present instead of `ncol-1` separators.
     * Too few column names are now auto filled with default column names, with warning, [#1625](https://github.com/Rdatatable/data.table/issues/1625). If there is just one missing column name it is guessed to be for the first column (row names or an index), otherwise the column names are filled at the end. Similarly, too many column names now automatically sets `fill=TRUE`, with warning.
     * `skip=` and `nrow=` are more reliable and no longer affected by invalid lines outside the range specified. Thanks to Ziyad Saeed and Kyle Chung for reporting, [#1267](https://github.com/Rdatatable/data.table/issues/1267). Tests added.
-    * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney for testing before release to CRAN: [#2070](https://github.com/Rdatatable/data.table/issues/2070), [#2073](https://github.com/Rdatatable/data.table/issues/2073), [#2087](https://github.com/Rdatatable/data.table/issues/2087), [#2091](https://github.com/Rdatatable/data.table/issues/2091), [#2107](https://github.com/Rdatatable/data.table/issues/2107), [fst#50](https://github.com/fstpackage/fst/issues/50#issuecomment-294287846), [#2118](https://github.com/Rdatatable/data.table/issues/2118), [#2092](https://github.com/Rdatatable/data.table/issues/2092), [#1888](https://github.com/Rdatatable/data.table/issues/1888), [#2123](https://github.com/Rdatatable/data.table/issues/2123), [#2167](https://github.com/Rdatatable/data.table/issues/2167), [#2194](https://github.com/Rdatatable/data.table/issues/2194), [#2238](https://github.com/Rdatatable/data.table/issues/2238), [#2228](https://github.com/Rdatatable/data.table/issues/2228), [#1464](https://github.com/Rdatatable/data.table/issues/1464), [#2201](https://github.com/Rdatatable/data.table/issues/2201), [#2287](https://github.com/Rdatatable/data.table/issues/2287), [#2299](https://github.com/Rdatatable/data.table/issues/2299), [#2285](https://github.com/Rdatatable/data.table/issues/2285), [#2251](https://github.com/Rdatatable/data.table/issues/2251), [#2347](https://github.com/Rdatatable/data.table/issues/2347), [#2222](https://github.com/Rdatatable/data.table/issues/2222), [#2352](https://github.com/Rdatatable/data.table/issues/2352), [#2246](https://github.com/Rdatatable/data.table/issues/2246), [#2370](https://github.com/Rdatatable/data.table/issues/2370), [#2371](https://github.com/Rdatatable/data.table/issues/2371), [#2404](https://github.com/Rdatatable/data.table/issues/2404), [#2196](https://github.com/Rdatatable/data.table/issues/2196), [#2322](https://github.com/Rdatatable/data.table/issues/2322), [#2453](https://github.com/Rdatatable/data.table/issues/2453), [#2446](https://github.com/Rdatatable/data.table/issues/2446), [#2464](https://github.com/Rdatatable/data.table/issues/2464), [#2457](https://github.com/Rdatatable/data.table/issues/2457), [#1895](https://github.com/Rdatatable/data.table/issues/1895), [#2481](https://github.com/Rdatatable/data.table/pull/2481), [#2499](https://github.com/Rdatatable/data.table/issues/2499), [#2516](https://github.com/Rdatatable/data.table/issues/2516), [#2520](https://github.com/Rdatatable/data.table/issues/2520), [#2512](https://github.com/Rdatatable/data.table/issues/2512), [#2523](https://github.com/Rdatatable/data.table/issues/2523), [#2542](https://github.com/Rdatatable/data.table/issues/2542), [#2526](https://github.com/Rdatatable/data.table/issues/2526)
+    * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney for testing before release to CRAN: [#2070](https://github.com/Rdatatable/data.table/issues/2070), [#2073](https://github.com/Rdatatable/data.table/issues/2073), [#2087](https://github.com/Rdatatable/data.table/issues/2087), [#2091](https://github.com/Rdatatable/data.table/issues/2091), [#2107](https://github.com/Rdatatable/data.table/issues/2107), [fst#50](https://github.com/fstpackage/fst/issues/50#issuecomment-294287846), [#2118](https://github.com/Rdatatable/data.table/issues/2118), [#2092](https://github.com/Rdatatable/data.table/issues/2092), [#1888](https://github.com/Rdatatable/data.table/issues/1888), [#2123](https://github.com/Rdatatable/data.table/issues/2123), [#2167](https://github.com/Rdatatable/data.table/issues/2167), [#2194](https://github.com/Rdatatable/data.table/issues/2194), [#2238](https://github.com/Rdatatable/data.table/issues/2238), [#2228](https://github.com/Rdatatable/data.table/issues/2228), [#1464](https://github.com/Rdatatable/data.table/issues/1464), [#2201](https://github.com/Rdatatable/data.table/issues/2201), [#2287](https://github.com/Rdatatable/data.table/issues/2287), [#2299](https://github.com/Rdatatable/data.table/issues/2299), [#2285](https://github.com/Rdatatable/data.table/issues/2285), [#2251](https://github.com/Rdatatable/data.table/issues/2251), [#2347](https://github.com/Rdatatable/data.table/issues/2347), [#2222](https://github.com/Rdatatable/data.table/issues/2222), [#2352](https://github.com/Rdatatable/data.table/issues/2352), [#2246](https://github.com/Rdatatable/data.table/issues/2246), [#2370](https://github.com/Rdatatable/data.table/issues/2370), [#2371](https://github.com/Rdatatable/data.table/issues/2371), [#2404](https://github.com/Rdatatable/data.table/issues/2404), [#2196](https://github.com/Rdatatable/data.table/issues/2196), [#2322](https://github.com/Rdatatable/data.table/issues/2322), [#2453](https://github.com/Rdatatable/data.table/issues/2453), [#2446](https://github.com/Rdatatable/data.table/issues/2446), [#2464](https://github.com/Rdatatable/data.table/issues/2464), [#2457](https://github.com/Rdatatable/data.table/issues/2457), [#1895](https://github.com/Rdatatable/data.table/issues/1895), [#2481](https://github.com/Rdatatable/data.table/pull/2481), [#2499](https://github.com/Rdatatable/data.table/issues/2499), [#2516](https://github.com/Rdatatable/data.table/issues/2516), [#2520](https://github.com/Rdatatable/data.table/issues/2520), [#2512](https://github.com/Rdatatable/data.table/issues/2512), [#2523](https://github.com/Rdatatable/data.table/issues/2523), [#2542](https://github.com/Rdatatable/data.table/issues/2542), [#2526](https://github.com/Rdatatable/data.table/issues/2526), [#2518](https://github.com/Rdatatable/data.table/issues/2518)
 
 2. `fwrite()`:
     * empty strings are now always quoted (`,"",`) to distinguish them from `NA` which by default is still empty (`,,`) but can be changed using `na=` as before. If `na=` is provided and `quote=` is the default `'auto'` then `quote=` is set to `TRUE` so that if the `na=` value occurs in the data, it can be distinguished from `NA`. Thanks to Ethan Welty for the request [#2214](https://github.com/Rdatatable/data.table/issues/2214) and Pasha for the code change and tests, [#2215](https://github.com/Rdatatable/data.table/issues/2215).
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 27c3a548b0..3bccd5fed7 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11540,6 +11540,7 @@ test(1871.9, fread(txt, skip=0, nrows=1), ans<-data.table(V1=2L, V2=3L, V3=4L))
 test(1871.11, fread(txt, skip=0, nrows=1, header=TRUE), ans)
 test(1871.12, fread(txt, skip=0, nrows=1, header=FALSE), data.table(V1="V1", V2="V2", V3="V3"))
 test(1871.13, fread(txt, skip=0, nrows=2, header=FALSE), data.table(V1=c("V1","2"), V2=c("V2","3"), V3=c("V3","4")))
+test(1871.14, fread("A\n100\n200", verbose=TRUE), data.table(A=c(100L,200L)), output="All rows were sampled since file is small so we know nrow=2 exactly")
 for (i in 100:1) {
   lines <- paste(c(rep("2,3,4",i), "2,3"), collapse='\n')
   test(1871.2 + i/1000, fread(lines, nrows=i), data.table(V1=rep.int(2L,i), V2=3L, V3=4L))

From d3caa5f6332cf07025140d898c784b7b0f1cf086 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 12 Feb 2018 17:16:15 -0800
Subject: [PATCH 12/14] Added test from #2515

---
 NEWS.md               | 2 +-
 inst/tests/test0.txt  | 1 +
 inst/tests/tests.Rraw | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)
 create mode 100644 inst/tests/test0.txt

diff --git a/NEWS.md b/NEWS.md
index 67b06119ab..f989f8a963 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -28,7 +28,7 @@
     * Single-column input with blank lines is now valid and the blank lines are significant (meaning an NA in the single column). The blank lines are significant even at the very end, which may be surprising on first glance. The change is so that `fread(fwrite(DT))==DT` for single-column inputs containing NA which are written as blank. There is no change when `ncol>1` (i.e., input stops with detailed warning at the first blank line) because a blank line when `ncol>1` is invalid input due to no separators present instead of `ncol-1` separators.
     * Too few column names are now auto filled with default column names, with warning, [#1625](https://github.com/Rdatatable/data.table/issues/1625). If there is just one missing column name it is guessed to be for the first column (row names or an index), otherwise the column names are filled at the end. Similarly, too many column names now automatically sets `fill=TRUE`, with warning.
     * `skip=` and `nrow=` are more reliable and no longer affected by invalid lines outside the range specified. Thanks to Ziyad Saeed and Kyle Chung for reporting, [#1267](https://github.com/Rdatatable/data.table/issues/1267). Tests added.
-    * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney for testing before release to CRAN: [#2070](https://github.com/Rdatatable/data.table/issues/2070), [#2073](https://github.com/Rdatatable/data.table/issues/2073), [#2087](https://github.com/Rdatatable/data.table/issues/2087), [#2091](https://github.com/Rdatatable/data.table/issues/2091), [#2107](https://github.com/Rdatatable/data.table/issues/2107), [fst#50](https://github.com/fstpackage/fst/issues/50#issuecomment-294287846), [#2118](https://github.com/Rdatatable/data.table/issues/2118), [#2092](https://github.com/Rdatatable/data.table/issues/2092), [#1888](https://github.com/Rdatatable/data.table/issues/1888), [#2123](https://github.com/Rdatatable/data.table/issues/2123), [#2167](https://github.com/Rdatatable/data.table/issues/2167), [#2194](https://github.com/Rdatatable/data.table/issues/2194), [#2238](https://github.com/Rdatatable/data.table/issues/2238), [#2228](https://github.com/Rdatatable/data.table/issues/2228), [#1464](https://github.com/Rdatatable/data.table/issues/1464), [#2201](https://github.com/Rdatatable/data.table/issues/2201), [#2287](https://github.com/Rdatatable/data.table/issues/2287), [#2299](https://github.com/Rdatatable/data.table/issues/2299), [#2285](https://github.com/Rdatatable/data.table/issues/2285), [#2251](https://github.com/Rdatatable/data.table/issues/2251), [#2347](https://github.com/Rdatatable/data.table/issues/2347), [#2222](https://github.com/Rdatatable/data.table/issues/2222), [#2352](https://github.com/Rdatatable/data.table/issues/2352), [#2246](https://github.com/Rdatatable/data.table/issues/2246), [#2370](https://github.com/Rdatatable/data.table/issues/2370), [#2371](https://github.com/Rdatatable/data.table/issues/2371), [#2404](https://github.com/Rdatatable/data.table/issues/2404), [#2196](https://github.com/Rdatatable/data.table/issues/2196), [#2322](https://github.com/Rdatatable/data.table/issues/2322), [#2453](https://github.com/Rdatatable/data.table/issues/2453), [#2446](https://github.com/Rdatatable/data.table/issues/2446), [#2464](https://github.com/Rdatatable/data.table/issues/2464), [#2457](https://github.com/Rdatatable/data.table/issues/2457), [#1895](https://github.com/Rdatatable/data.table/issues/1895), [#2481](https://github.com/Rdatatable/data.table/pull/2481), [#2499](https://github.com/Rdatatable/data.table/issues/2499), [#2516](https://github.com/Rdatatable/data.table/issues/2516), [#2520](https://github.com/Rdatatable/data.table/issues/2520), [#2512](https://github.com/Rdatatable/data.table/issues/2512), [#2523](https://github.com/Rdatatable/data.table/issues/2523), [#2542](https://github.com/Rdatatable/data.table/issues/2542), [#2526](https://github.com/Rdatatable/data.table/issues/2526), [#2518](https://github.com/Rdatatable/data.table/issues/2518)
+    * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney for testing before release to CRAN: [#2070](https://github.com/Rdatatable/data.table/issues/2070), [#2073](https://github.com/Rdatatable/data.table/issues/2073), [#2087](https://github.com/Rdatatable/data.table/issues/2087), [#2091](https://github.com/Rdatatable/data.table/issues/2091), [#2107](https://github.com/Rdatatable/data.table/issues/2107), [fst#50](https://github.com/fstpackage/fst/issues/50#issuecomment-294287846), [#2118](https://github.com/Rdatatable/data.table/issues/2118), [#2092](https://github.com/Rdatatable/data.table/issues/2092), [#1888](https://github.com/Rdatatable/data.table/issues/1888), [#2123](https://github.com/Rdatatable/data.table/issues/2123), [#2167](https://github.com/Rdatatable/data.table/issues/2167), [#2194](https://github.com/Rdatatable/data.table/issues/2194), [#2238](https://github.com/Rdatatable/data.table/issues/2238), [#2228](https://github.com/Rdatatable/data.table/issues/2228), [#1464](https://github.com/Rdatatable/data.table/issues/1464), [#2201](https://github.com/Rdatatable/data.table/issues/2201), [#2287](https://github.com/Rdatatable/data.table/issues/2287), [#2299](https://github.com/Rdatatable/data.table/issues/2299), [#2285](https://github.com/Rdatatable/data.table/issues/2285), [#2251](https://github.com/Rdatatable/data.table/issues/2251), [#2347](https://github.com/Rdatatable/data.table/issues/2347), [#2222](https://github.com/Rdatatable/data.table/issues/2222), [#2352](https://github.com/Rdatatable/data.table/issues/2352), [#2246](https://github.com/Rdatatable/data.table/issues/2246), [#2370](https://github.com/Rdatatable/data.table/issues/2370), [#2371](https://github.com/Rdatatable/data.table/issues/2371), [#2404](https://github.com/Rdatatable/data.table/issues/2404), [#2196](https://github.com/Rdatatable/data.table/issues/2196), [#2322](https://github.com/Rdatatable/data.table/issues/2322), [#2453](https://github.com/Rdatatable/data.table/issues/2453), [#2446](https://github.com/Rdatatable/data.table/issues/2446), [#2464](https://github.com/Rdatatable/data.table/issues/2464), [#2457](https://github.com/Rdatatable/data.table/issues/2457), [#1895](https://github.com/Rdatatable/data.table/issues/1895), [#2481](https://github.com/Rdatatable/data.table/pull/2481), [#2499](https://github.com/Rdatatable/data.table/issues/2499), [#2516](https://github.com/Rdatatable/data.table/issues/2516), [#2520](https://github.com/Rdatatable/data.table/issues/2520), [#2512](https://github.com/Rdatatable/data.table/issues/2512), [#2523](https://github.com/Rdatatable/data.table/issues/2523), [#2542](https://github.com/Rdatatable/data.table/issues/2542), [#2526](https://github.com/Rdatatable/data.table/issues/2526), [#2518](https://github.com/Rdatatable/data.table/issues/2518), [#2515](https://github.com/Rdatatable/data.table/issues/2515)
 
 2. `fwrite()`:
     * empty strings are now always quoted (`,"",`) to distinguish them from `NA` which by default is still empty (`,,`) but can be changed using `na=` as before. If `na=` is provided and `quote=` is the default `'auto'` then `quote=` is set to `TRUE` so that if the `na=` value occurs in the data, it can be distinguished from `NA`. Thanks to Ethan Welty for the request [#2214](https://github.com/Rdatatable/data.table/issues/2214) and Pasha for the code change and tests, [#2215](https://github.com/Rdatatable/data.table/issues/2215).
diff --git a/inst/tests/test0.txt b/inst/tests/test0.txt
new file mode 100644
index 0000000000..343641777b
--- /dev/null
+++ b/inst/tests/test0.txt
@@ -0,0 +1 @@
+x0656609   701231   733130   603634   355257   598656   368172   154195   328918   541999   378803   322161   321092   791071   150389   419669   180705   188717   274702   800259   100932   113270   509670   423688   412085   767369   790109   351661   959688   142249   593856   430035   881849   883353   932253   727230   319403   970870   769759   557740   283302   121615   609275   458244   53145   645010   919631   286721   63482   397957   360723   102529   81232   930175   666775   227586   87954   931600   314102   819515   474886   307681   555198   256257   935567   584995   887227   631934   434291   895515   795983   76195   307116   482805   486231   705261   785018   288274   628695   484178   34233   400298   489154   493941   744274   575616   704744   830367   289528   94032   101765   224100   791138   90153   11788   977400   421275   298837   805904   969139   70402   640508   927283   148022   424566   270942   923478   555518   248364   286337   974164   248739   32678   546732   24499   813317   981579   545526   955469   972214   940743   644527   622021   785924   23817   870966   940461   590656   675131   823011   205458   190691   291547   414287   713461   613051   126750   930891   779069   761445   638559   410482   250950   534637   191131   749656   870264   639621   966972   73639   580494   190103   331573   115388   480143   240431   302584   880387   396715   466719   202512   762275   30761   289286   324542   962755   74055   596781   557808   858438   167748   551506   313974   398181   700113   91957   814308   560534   894768   234128   275400   154705   451191   82213   846542   140500   801347   105546   881607   320445   434178   272078   918623   789413   221578   336189   893454   288401   738358   196732   269610   906540   199487   248174   151170   920877   281825   11999   229927   142709   865073   821730   733870   927047   382099   501732   209365   111025   629165   40013   781585   625607   873297   390798   950249   150346   546167   696964   774745   924600   562044   546639   820513   875737   4613   268007   538964   54024   147940   887098   259041   432766   712760   361465   541462   707152   217728   656004   749406   164417   178438   725605   930454   552495   778309   172520   585342   -6993   135061   388732   169809   62254   -3252   351291   442562   108310   810094   383020   191313   831278   966205   628916   396364   161747   52626   389767   643418   459668   73641   837384   449943   962037   101682   156039   215569   541052   556353   715341   863959   420742   187277   76339   55960   792854   169862   605135   196136   507606   874500   40006   589341   921790   174259   835893   789012   58689   999570   183003   12804   33132   16762   700847   328861   951624   152948   147249   824065   464931   119416   679783   260417   256339   365195   474795   951240   388833   597366   422250   481521   240251   94093   470069   267104   876396   941440   5060   59123   973339   407419   697781   70640   697698   890818   301616   186050   529539   430533   590844   317713   770167   908137   184259   7662   811278   667116   369633   704257   463585   770546   567979   768563   179296   823750   66862   774164   655603   603775   709287   848672   125761   222582   791991   153699   551297   379311   306024   511210   214245   437379   321291   504286   981317   617686   413759   236307   759616   437084   597432   248273   332715   304108   634669   377569   223991   659156   98602   569501   817043   -3139   34273   343602   62985   978268   501113   676726   172614   734484   459624   536897   577632   993373   468857   118235   285947   273862   557753   77861   208124   53740   813437   196687   780632   456756   737824   73209   193248   814739   82946   620531   937509   534977   750713   792793   154008   719548   63369   50103   789700   705314   598162   957370   939796   482935   136924   769343   401354   960856   372173   72628   562835   261494   443922   252954   808805   244167   993007   357420   319382   716637   131148   491938   10387   225293   215295   601463   15300   99585   955999   899293   692080   430896   939463   130638   249144   732186   706193   298076   625366   211715   466801   539754   260766   939737   175824   386850   526747   97302   345468   360414   313365   193665   933930   223183   566652   485281   221511   559204   134626   447527   616350   963250   67709   561389   223309   481791   184012   192084   495185   176337   225176   211027   620818   795659   626412   693440   854162   21056   295476   667959   583901   288218   374506   333942   580931   91268   436843   798558   951687   388189   71434   445894   977016   293562   868269   104872   665782   402427   704246   555812   112731   705347   320555   381071   513045   961812   424932   750972   723433   797141   963732   583124   534123   528064   755494   765100   945616   319267   816319   544736   939945   858844   724481   127355   89824   809157   323435   796710   982505   55151   281952   266858   103704   278013   326863   625396   286453   37590   691841   180121   324560   242657   942644   313051   18052   273293   899965   478179   806404   894543   627957   468200   179024   832540   984899   688120   916036   882768   314820   980983   287823   316173   627346   334888   845949   534463   248454   403857   751145   447842   322956   549981   200715   488329   769497   90061   860605   785771   943363   998468   496620   77270   696221   413011   166754   151521   116447   831564   163226   848260   884366   166701   589100   824493   512837   555067   808754   928003   692445   960730   697769   841303   971950   860491   983013   653710   530517   219003   644211   837924   566243   37342   545639   839650   475874   682796   32342   245297   91611   219337   357445   537100   587478   692140   906003   842543   19578   167882   982194   698333   611028   407855   224180   391638   784633   143842   116728   880111   374703   749623   73736   457614   209872   610555   255371   367870   403419   735033   562208   895083   911063   551045   991328   726404   730649   586811   464660   999369   961951   46272   612162   896926   273792   253320   625856   432386   220498   805981   218562   339073   373035   48807   265383   36535   346042   239965   683477   976158   172863   530536   527694   420081   702072   112393   453258   861480   993586   945009   968521   933886   785678   313589   333287   655906   532286   25058   985104   393032   886681   490590   364607   561365   560067   204463   319445   656924   576824   978768   780295   39908   946103   143274   771073   342473   762406   41341   309350   693254   127688   462478   793635   402445   511759   393560   858302   127338   444183   776580   985153   204304   586211   497882   412152   107519   129459   909550   726244   808115   338429   887961   962179   846652   706925   599625   848476   174466   915803   971757   717132   861039   650894   45015   60327   63774   173479   846497   813495   815334   544183   831596   617676   428664   655030   63478   201239   927460   760012   629036   204727   932502   212201   684350   878241   578776   158199   447942   655891   308078   709745   900637   548270   578200   54213   832238   181724   900876   613349   606173   523540   820104   869225   574145   172267   904974   238439   970837   927617   933646   339006   810331   208790   28266   4996   298034   797752   638315   341766   298888   924946   711613   490629   606538   126793   610571   239295   289220   785128   248223   647844   861089   406627   599125   410243   312553   107253   697336   35212   228437   541347   551765   576224   627122   751956   664415   417495   -2368   955199   
\ No newline at end of file
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 3bccd5fed7..408399de4d 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11375,6 +11375,7 @@ test(1856.3, fread("A,B\n\n\n"), ans)
 test(1856.4, fread("A,B\n3,4\n\n\n"), data.table(A=3L, B=4L))
 test(1856.5, fread("A,B\n3,4\n,\n\n\n"), data.table(A=c(3L,NA), B=c(4L,NA)))
 test(1856.6, fread("A,B\n3,4\n\n5,6\n"), data.table(A=3L, B=4L), warning="Discarded single-line footer: <<5,6>>")
+test(1856.7, fread(testDir("test0.txt"))[c(1,997,998,999)], data.table(x0=c(656609L, NA, -2368L, 955199L)))  # issue 2515
 DTs = list(                                      # passed fread(fwrite(DT))==DT before fix?
   data.table(A=logical(0)),                      # yes
   data.table(A=NA),                              # no

From 4ac7e26ec7381e3970ac33c5fce4c45265256cc0 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 12 Feb 2018 17:36:06 -0800
Subject: [PATCH 13/14] Added test from #1671

---
 NEWS.md               | 2 +-
 inst/tests/tests.Rraw | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/NEWS.md b/NEWS.md
index f989f8a963..8c88319078 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -28,7 +28,7 @@
     * Single-column input with blank lines is now valid and the blank lines are significant (meaning an NA in the single column). The blank lines are significant even at the very end, which may be surprising on first glance. The change is so that `fread(fwrite(DT))==DT` for single-column inputs containing NA which are written as blank. There is no change when `ncol>1` (i.e., input stops with detailed warning at the first blank line) because a blank line when `ncol>1` is invalid input due to no separators present instead of `ncol-1` separators.
     * Too few column names are now auto filled with default column names, with warning, [#1625](https://github.com/Rdatatable/data.table/issues/1625). If there is just one missing column name it is guessed to be for the first column (row names or an index), otherwise the column names are filled at the end. Similarly, too many column names now automatically sets `fill=TRUE`, with warning.
     * `skip=` and `nrow=` are more reliable and no longer affected by invalid lines outside the range specified. Thanks to Ziyad Saeed and Kyle Chung for reporting, [#1267](https://github.com/Rdatatable/data.table/issues/1267). Tests added.
-    * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney for testing before release to CRAN: [#2070](https://github.com/Rdatatable/data.table/issues/2070), [#2073](https://github.com/Rdatatable/data.table/issues/2073), [#2087](https://github.com/Rdatatable/data.table/issues/2087), [#2091](https://github.com/Rdatatable/data.table/issues/2091), [#2107](https://github.com/Rdatatable/data.table/issues/2107), [fst#50](https://github.com/fstpackage/fst/issues/50#issuecomment-294287846), [#2118](https://github.com/Rdatatable/data.table/issues/2118), [#2092](https://github.com/Rdatatable/data.table/issues/2092), [#1888](https://github.com/Rdatatable/data.table/issues/1888), [#2123](https://github.com/Rdatatable/data.table/issues/2123), [#2167](https://github.com/Rdatatable/data.table/issues/2167), [#2194](https://github.com/Rdatatable/data.table/issues/2194), [#2238](https://github.com/Rdatatable/data.table/issues/2238), [#2228](https://github.com/Rdatatable/data.table/issues/2228), [#1464](https://github.com/Rdatatable/data.table/issues/1464), [#2201](https://github.com/Rdatatable/data.table/issues/2201), [#2287](https://github.com/Rdatatable/data.table/issues/2287), [#2299](https://github.com/Rdatatable/data.table/issues/2299), [#2285](https://github.com/Rdatatable/data.table/issues/2285), [#2251](https://github.com/Rdatatable/data.table/issues/2251), [#2347](https://github.com/Rdatatable/data.table/issues/2347), [#2222](https://github.com/Rdatatable/data.table/issues/2222), [#2352](https://github.com/Rdatatable/data.table/issues/2352), [#2246](https://github.com/Rdatatable/data.table/issues/2246), [#2370](https://github.com/Rdatatable/data.table/issues/2370), [#2371](https://github.com/Rdatatable/data.table/issues/2371), [#2404](https://github.com/Rdatatable/data.table/issues/2404), [#2196](https://github.com/Rdatatable/data.table/issues/2196), [#2322](https://github.com/Rdatatable/data.table/issues/2322), [#2453](https://github.com/Rdatatable/data.table/issues/2453), [#2446](https://github.com/Rdatatable/data.table/issues/2446), [#2464](https://github.com/Rdatatable/data.table/issues/2464), [#2457](https://github.com/Rdatatable/data.table/issues/2457), [#1895](https://github.com/Rdatatable/data.table/issues/1895), [#2481](https://github.com/Rdatatable/data.table/pull/2481), [#2499](https://github.com/Rdatatable/data.table/issues/2499), [#2516](https://github.com/Rdatatable/data.table/issues/2516), [#2520](https://github.com/Rdatatable/data.table/issues/2520), [#2512](https://github.com/Rdatatable/data.table/issues/2512), [#2523](https://github.com/Rdatatable/data.table/issues/2523), [#2542](https://github.com/Rdatatable/data.table/issues/2542), [#2526](https://github.com/Rdatatable/data.table/issues/2526), [#2518](https://github.com/Rdatatable/data.table/issues/2518), [#2515](https://github.com/Rdatatable/data.table/issues/2515)
+    * Many thanks to @yaakovfeldman, Guillermo Ponce, Arun Srinivasan, Hugh Parsonage, Mark Klik, Pasha Stetsenko, Mahyar K, Tom Crockett, @cnoelke, @qinjs, @etienne-s, Mark Danese, Avraham Adler, @franknarf1, @MichaelChirico, @tdhock, Luke Tierney for testing before release to CRAN: [#2070](https://github.com/Rdatatable/data.table/issues/2070), [#2073](https://github.com/Rdatatable/data.table/issues/2073), [#2087](https://github.com/Rdatatable/data.table/issues/2087), [#2091](https://github.com/Rdatatable/data.table/issues/2091), [#2107](https://github.com/Rdatatable/data.table/issues/2107), [fst#50](https://github.com/fstpackage/fst/issues/50#issuecomment-294287846), [#2118](https://github.com/Rdatatable/data.table/issues/2118), [#2092](https://github.com/Rdatatable/data.table/issues/2092), [#1888](https://github.com/Rdatatable/data.table/issues/1888), [#2123](https://github.com/Rdatatable/data.table/issues/2123), [#2167](https://github.com/Rdatatable/data.table/issues/2167), [#2194](https://github.com/Rdatatable/data.table/issues/2194), [#2238](https://github.com/Rdatatable/data.table/issues/2238), [#2228](https://github.com/Rdatatable/data.table/issues/2228), [#1464](https://github.com/Rdatatable/data.table/issues/1464), [#2201](https://github.com/Rdatatable/data.table/issues/2201), [#2287](https://github.com/Rdatatable/data.table/issues/2287), [#2299](https://github.com/Rdatatable/data.table/issues/2299), [#2285](https://github.com/Rdatatable/data.table/issues/2285), [#2251](https://github.com/Rdatatable/data.table/issues/2251), [#2347](https://github.com/Rdatatable/data.table/issues/2347), [#2222](https://github.com/Rdatatable/data.table/issues/2222), [#2352](https://github.com/Rdatatable/data.table/issues/2352), [#2246](https://github.com/Rdatatable/data.table/issues/2246), [#2370](https://github.com/Rdatatable/data.table/issues/2370), [#2371](https://github.com/Rdatatable/data.table/issues/2371), [#2404](https://github.com/Rdatatable/data.table/issues/2404), [#2196](https://github.com/Rdatatable/data.table/issues/2196), [#2322](https://github.com/Rdatatable/data.table/issues/2322), [#2453](https://github.com/Rdatatable/data.table/issues/2453), [#2446](https://github.com/Rdatatable/data.table/issues/2446), [#2464](https://github.com/Rdatatable/data.table/issues/2464), [#2457](https://github.com/Rdatatable/data.table/issues/2457), [#1895](https://github.com/Rdatatable/data.table/issues/1895), [#2481](https://github.com/Rdatatable/data.table/pull/2481), [#2499](https://github.com/Rdatatable/data.table/issues/2499), [#2516](https://github.com/Rdatatable/data.table/issues/2516), [#2520](https://github.com/Rdatatable/data.table/issues/2520), [#2512](https://github.com/Rdatatable/data.table/issues/2512), [#2523](https://github.com/Rdatatable/data.table/issues/2523), [#2542](https://github.com/Rdatatable/data.table/issues/2542), [#2526](https://github.com/Rdatatable/data.table/issues/2526), [#2518](https://github.com/Rdatatable/data.table/issues/2518), [#2515](https://github.com/Rdatatable/data.table/issues/2515), [#1671](https://github.com/Rdatatable/data.table/issues/1671)
 
 2. `fwrite()`:
     * empty strings are now always quoted (`,"",`) to distinguish them from `NA` which by default is still empty (`,,`) but can be changed using `na=` as before. If `na=` is provided and `quote=` is the default `'auto'` then `quote=` is set to `TRUE` so that if the `na=` value occurs in the data, it can be distinguished from `NA`. Thanks to Ethan Welty for the request [#2214](https://github.com/Rdatatable/data.table/issues/2214) and Pasha for the code change and tests, [#2215](https://github.com/Rdatatable/data.table/issues/2215).
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 408399de4d..aec8c35a79 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11542,6 +11542,7 @@ test(1871.11, fread(txt, skip=0, nrows=1, header=TRUE), ans)
 test(1871.12, fread(txt, skip=0, nrows=1, header=FALSE), data.table(V1="V1", V2="V2", V3="V3"))
 test(1871.13, fread(txt, skip=0, nrows=2, header=FALSE), data.table(V1=c("V1","2"), V2=c("V2","3"), V3=c("V3","4")))
 test(1871.14, fread("A\n100\n200", verbose=TRUE), data.table(A=c(100L,200L)), output="All rows were sampled since file is small so we know nrow=2 exactly")
+test(1871.15, fread("col1, col2, col3\n1, 2, 3\n3, 5, 6\n7, 8, 9\n\nsome text to ignore", nrows = 3L), data.table(col1=INT(1,3,7), col2=INT(2,5,8), col3=INT(3,6,9)))  # from #1671 (no warning expected)
 for (i in 100:1) {
   lines <- paste(c(rep("2,3,4",i), "2,3"), collapse='\n')
   test(1871.2 + i/1000, fread(lines, nrows=i), data.table(V1=rep.int(2L,i), V2=3L, V3=4L))

From 7f48c74989ca0144a5002b1f478b265970229f49 Mon Sep 17 00:00:00 2001
From: Matt Dowle <mattjdowle@gmail.com>
Date: Mon, 12 Feb 2018 18:09:11 -0800
Subject: [PATCH 14/14] Pencilled in test from #2267. Added 'nocov' in C code
 to see if that works.

---
 inst/tests/tests.Rraw |  4 ++++
 src/fread.c           | 10 +++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index aec8c35a79..d98afeef0f 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -11613,6 +11613,10 @@ fwrite(DT,f<-tempfile())
 test(1873, fread(f), DT)
 unlink(f)
 
+# no good jump start, #2267
+# At 35MB, the bad_fill.csv file size exceeds CRAN limit. Need to reduce its size.
+# test(1874.1, fread(testDir("bad_fill.csv")), error="No good line could be found from jump point")
+# test(1874.2, fread(testDir("bad_fill.csv"), fill=TRUE), error="No good line could be found from jump point")
 
 
 ##########################
diff --git a/src/fread.c b/src/fread.c
index 13850f8799..dd19b37bd9 100644
--- a/src/fread.c
+++ b/src/fread.c
@@ -139,10 +139,10 @@ bool freadCleanup(void)
     // may call freadCleanup(), thus resulting in an infinite loop.
     #ifdef WIN32
       if (!UnmapViewOfFile(mmp))
-        DTPRINT("System error %d unmapping view of file\n", GetLastError());
+        DTPRINT("System error %d unmapping view of file\n", GetLastError());      // nocov
     #else
       if (munmap(mmp, fileSize))
-        DTPRINT("System errno %d unmapping file: %s\n", errno, strerror(errno));
+        DTPRINT("System errno %d unmapping file: %s\n", errno, strerror(errno));  // nocov
     #endif
     mmp = NULL;
   }
@@ -206,9 +206,9 @@ static char *typesAsString(int ncol) {
   if (ncol<=100) {
     for (; i<ncol; i++) str[i] = typeLetter[type[i]];
   } else {
-    for (; i<80; i++) str[i] = typeLetter[type[i]];
-    str[i++]='.'; str[i++]='.'; str[i++]='.';
-    for (int j=ncol-10; j<ncol; j++) str[i++] = typeLetter[type[j]];
+    for (; i<80; i++) str[i] = typeLetter[type[i]];                   // nocov
+    str[i++]='.'; str[i++]='.'; str[i++]='.';                         // nocov
+    for (int j=ncol-10; j<ncol; j++) str[i++] = typeLetter[type[j]];  // nocov
   }
   str[i] = '\0';
   return str;