From 68dd57e4a543007ccd19c3b67cdff4cabe38811d Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Mon, 13 Apr 2026 08:17:55 -0700 Subject: [PATCH 1/5] Added a check to detect when blank lines should have been skipped, but it doesn't solve the problem yet. --- src/fread.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/fread.c b/src/fread.c index 3df73156d..647c423fb 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1946,6 +1946,10 @@ int freadMain(freadMainArgs _args) } } } + if (!prevStart && topStart && topSkip > 0) + { + DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); + } if (!firstJumpEnd) { if (verbose) DTPRINT(_(" No sep and quote rule found a block of 2x2 or greater. Single column input.\n")); topNumFields = 1; From 0a317ae4d817b6b98d1dd3008550ad796b8bfe8e Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Mon, 13 Apr 2026 17:31:16 -0700 Subject: [PATCH 2/5] Fixed check added to only check if 'topSkip' is greater than 0. 'topSkip' is greater than 0 when blank lines are present, so I also check if blank lines should be skipped so I can throw a warning to let the user know. --- src/fread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 647c423fb..93aaa3cd6 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1946,7 +1946,7 @@ int freadMain(freadMainArgs _args) } } } - if (!prevStart && topStart && topSkip > 0) + if (topSkip > 0 && !skipEmptyLines) { DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); } From 9164f60265ffab76ac0413ac1e06efca6febea8b Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Mon, 20 Apr 2026 11:10:23 -0700 Subject: [PATCH 3/5] Updated check for blank lines to ask if 'topSkip' isgreater than 1 to accomodate situation where the header and data are separated by a blank line. --- src/fread.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 82d314505..2aca02110 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1843,6 +1843,7 @@ int freadMain(freadMainArgs _args) int topNumFields = 1; // how many fields that was, to resolve ties enum quote_rule_t topQuoteRule = -1; // which quote rule that was int topSkip = 0; // how many rows to auto-skip + // #7707 'topSkip' accumulates as blank lines are encountered; can be used to differentiate between a file where the header and data are separated by a blank line and a file where block(s) of lines or each line is separated by a blank line const char *topStart = NULL; for (quoteRule = quote ? QUOTE_RULE_EMBEDDED_QUOTES_DOUBLED : QUOTE_RULE_IGNORE_QUOTES; quoteRule < QUOTE_RULE_COUNT; quoteRule++) { // #loop_counter_not_local_scope_ok @@ -1946,7 +1947,7 @@ int freadMain(freadMainArgs _args) } } } - if (topSkip > 0 && !skipEmptyLines) + if (topSkip > 1 && !skipEmptyLines) { DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); } From 8a8056d00555efb6dcfac8c327fc0f9bf1c23baf Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Mon, 20 Apr 2026 13:11:54 -0700 Subject: [PATCH 4/5] Used the 'prevStart' variable to detect when each line is separated by a blank line. In the case of each line separated by a blank line, 'prevStart' is always NULL because each line could be a possible header. --- src/fread.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fread.c b/src/fread.c index 2aca02110..2c889097a 100644 --- a/src/fread.c +++ b/src/fread.c @@ -1947,7 +1947,7 @@ int freadMain(freadMainArgs _args) } } } - if (topSkip > 1 && !skipEmptyLines) + if (!prevStart && topSkip > 1 && !skipEmptyLines) { DTWARN(_("The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n")); } From 03e482d316492ff4c486f0733f6b0221daabe025 Mon Sep 17 00:00:00 2001 From: Asa-Henry Date: Tue, 21 Apr 2026 16:22:16 -0700 Subject: [PATCH 5/5] Added test '1578.10' for initial case which issue #3339 pointed out. Causes an error in test 1578.1? --- inst/tests/tests.Rraw | 2 ++ 1 file changed, 2 insertions(+) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 443487c6a..4b35ab027 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -8073,6 +8073,8 @@ test(1578.6, fread(f, skip=47L, verbose=TRUE), data.table(V1=1:2, V2=3:4), outpu test(1578.7, fread(f, skip=49L), data.table(V1=1:2, V2=3:4)) test(1578.8, fread(f, skip=47L, blank.lines.skip=TRUE), data.table(a=1:2, b=3:4)) test(1578.9, fread(f, skip=48L), data.table(V1=1:2, V2=3:4)) # start on blank line 49 and skip="auto" to first data row on line 50 +input = "x y\n\n1 a\n\n2 b\n\n3 c" +test(1578.10, fread(input), data.table(V1=3L, V2="c"), warning="The rows in this file appear to be separated by blank lines. This resulted in most rows being skipped. If this was not the intended outcome, please consider setting 'blank.lines.skip' to TRUE.\n") # test 1579 moved to optimize.Rraw