diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 443487c6a..4b35ab027 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8073,6 +8073,8 @@ test(1578.6, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), outpu test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 +input = "x y\n\n1 a\n\n2 b\n\n3 c" +test(1578.10, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n") # test 1579 moved to optimize.Rraw diff --git a/src/fread.c b/src/fread.c index 2902dfc50..2c889097a 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1843,6 +1843,7 @@ int freadMain(freadMainArgs _args) int topNumFields = 1; // how many fields that was, to resolve ties enum quote_rule_t topQuoteRule = -1; // which quote rule that was int topSkip = 0; // how many rows to auto-skip + // #7707 'topSkip' accumulates as blank lines are encountered; can be used to differentiate between a file where the header and data are separated by a blank line and a file where block(s) of lines or each line is separated by a blank line const char *topStart = NULL; for (quoteRule = quote ? QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED : QUOTE_RULE_IGNORE_QUOTES; quoteRule < QUOTE_RULE_COUNT; quoteRule++) { // #loop_counter_not_local_scope_ok @@ -1946,6 +1947,10 @@ int freadMain(freadMainArgs _args) } } } + if (!prevStart && topSkip > 1 && !skipEmptyLines) + { + DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); + } if (!firstJumpEnd) { if (verbose) DTPRINT(_(" No sep and quote rule found a block of 2x2 or greater. Single column input.\n")); topNumFields = 1;