diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 1e9ea9c12d..3e408f87d7 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14033,8 +14033,11 @@ for (nNUL in 0:3) { test(2025.08, fread(f, skip=1, header=TRUE), ans) test(2025.09, fread(f), ans) } -writeBin(c(charToRaw("A,B,C\n1,foo,5\n2,"), as.raw(0),charToRaw("bar"), as.raw(0),as.raw(0), charToRaw(",6\n")), con=f) +makeNul = function(str){ tt=charToRaw(str); tt[tt==42L]=as.raw(0); writeBin(tt, con=f)} # "*" (42) represents NUL +makeNul("A,B,C\n1,foo,5\n2,*bar**,6\n") test(2025.10, fread(f), data.table(A=1:2, B=c("foo","bar"), C=5:6)) +makeNul('A,B,C\n1,foo*bar,3\n2,**"**b*az*",4\n') +test(2025.11, fread(f), data.table(A=1:2, B=c("foobar","baz"), C=3:4)) # printing timezone, #2842 DT = data.table(t1 = as.POSIXct("1982-04-26 13:34:56", tz = "Europe/Madrid"),t2 = as.POSIXct("2019-01-01 19:00:01",tz = "UTC")) diff --git a/src/fread.c b/src/fread.c index 4ded050a7d..bcbda32a59 100644 --- a/src/fread.c +++ b/src/fread.c @@ -497,7 +497,7 @@ static void Field(FieldParseContext *ctx) const char *fieldStart=ch; if (*ch!=quote || quoteRule==3) { // Most common case. Unambiguously not quoted. Simply search for sep|eol. If field contains sep|eol then it should have been quoted and we do not try to heal that. - while(!end_of_field(ch)) ch++; // sep, \r, \n or \0 will end + while(!end_of_field(ch)) ch++; // sep, \r, \n or eof will end *(ctx->ch) = ch; int fieldLen = (int)(ch-fieldStart); //if (stripWhite) { // TODO: do this if and the next one together once in bulk afterwards before push @@ -516,7 +516,7 @@ static void Field(FieldParseContext *ctx) fieldStart++; // step over opening quote switch(quoteRule) { case 0: // quoted with embedded quotes doubled; the final unescaped " must be followed by sep|eol - while (*++ch) { + while (*++ch || chch) = ch; } else { *(ctx->ch) = ch; - if (*ch=='\0' && quoteRule!=2) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2 + if (ch==eof && quoteRule!=2) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2 while(target->len>0 && ((ch[-1]==' ' && stripWhite) || ch[-1]=='\0')) { target->len--; ch--; } // test 1551.6; trailing whitespace in field [67,V37] == "\"\"A\"\" ST " } } diff --git a/src/freadR.c b/src/freadR.c index 7d025c81a8..84ed093745 100644 --- a/src/freadR.c +++ b/src/freadR.c @@ -427,10 +427,24 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx) lenOff *source = buff8_lenoffs + off8; for (int i=0; ilen; - if (strLen) { + if (strLen<=0) { // stringLen == INT_MIN => NA, otherwise not a NAstring was checked inside fread_mean - SET_STRING_ELT(dest, DTi+i, strLen<0 ? NA_STRING : mkCharLenCE(anchor + source->off, strLen, ienc)); - } // else dest was already initialized with R_BlankString by allocVector() + if (strLen<0) SET_STRING_ELT(dest, DTi+i, NA_STRING); // else leave the "" in place that was initialized by allocVector() + } else { + const char *str = anchor + source->off; + int c=0; + while (c1) schedule(auto) num_threads(getDTthreads()) for (R_len_t i=0; i