Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion inst/tests/tests.Rraw
Original file line number Diff line number Diff line change
Expand Up @@ -14033,8 +14033,11 @@ for (nNUL in 0:3) {
test(2025.08, fread(f, skip=1, header=TRUE), ans)
test(2025.09, fread(f), ans)
}
writeBin(c(charToRaw("A,B,C\n1,foo,5\n2,"), as.raw(0),charToRaw("bar"), as.raw(0),as.raw(0), charToRaw(",6\n")), con=f)
makeNul = function(str){ tt=charToRaw(str); tt[tt==42L]=as.raw(0); writeBin(tt, con=f)} # "*" (42) represents NUL
makeNul("A,B,C\n1,foo,5\n2,*bar**,6\n")
test(2025.10, fread(f), data.table(A=1:2, B=c("foo","bar"), C=5:6))
makeNul('A,B,C\n1,foo*bar,3\n2,**"**b*az*",4\n')
test(2025.11, fread(f), data.table(A=1:2, B=c("foobar","baz"), C=3:4))

# printing timezone, #2842
DT = data.table(t1 = as.POSIXct("1982-04-26 13:34:56", tz = "Europe/Madrid"),t2 = as.POSIXct("2019-01-01 19:00:01",tz = "UTC"))
Expand Down
12 changes: 6 additions & 6 deletions src/fread.c
Original file line number Diff line number Diff line change
Expand Up @@ -497,7 +497,7 @@ static void Field(FieldParseContext *ctx)
const char *fieldStart=ch;
if (*ch!=quote || quoteRule==3) {
// Most common case. Unambiguously not quoted. Simply search for sep|eol. If field contains sep|eol then it should have been quoted and we do not try to heal that.
while(!end_of_field(ch)) ch++; // sep, \r, \n or \0 will end
while(!end_of_field(ch)) ch++; // sep, \r, \n or eof will end
*(ctx->ch) = ch;
int fieldLen = (int)(ch-fieldStart);
//if (stripWhite) { // TODO: do this if and the next one together once in bulk afterwards before push
Expand All @@ -516,15 +516,15 @@ static void Field(FieldParseContext *ctx)
fieldStart++; // step over opening quote
switch(quoteRule) {
case 0: // quoted with embedded quotes doubled; the final unescaped " must be followed by sep|eol
while (*++ch) {
while (*++ch || ch<eof) {
if (*ch==quote) {
if (ch[1]==quote) { ch++; continue; }
break; // found undoubled closing quote
}
}
break;
case 1: // quoted with embedded quotes escaped; the final unescaped " must be followed by sep|eol
while (*++ch) {
while (*++ch || ch<eof) {
if (*ch=='\\' && (ch[1]==quote || ch[1]=='\\')) { ch++; continue; }
if (*ch==quote) break;
}
Expand All @@ -538,14 +538,14 @@ static void Field(FieldParseContext *ctx)
// Under this rule, no eol may occur inside fields.
{
const char *ch2 = ch;
while (*++ch && *ch!='\n' && *ch!='\r') {
while ((*++ch || ch<eof) && *ch!='\n' && *ch!='\r') {
if (*ch==quote && end_of_field(ch+1)) {ch2=ch; break;} // (*1) regular ", ending; leave *ch on closing quote
if (*ch==sep) {
// first sep in this field
// if there is a ", afterwards but before the next \n, use that; the field was quoted and it's still case (i) above.
// Otherwise break here at this first sep as it's case (ii) above (the data contains a quote at the start and no sep)
ch2 = ch;
while (*++ch2 && *ch2!='\n' && *ch2!='\r') {
while ((*++ch2 || ch2<eof) && *ch2!='\n' && *ch2!='\r') {
if (*ch2==quote && end_of_field(ch2+1)) {
ch = ch2; // (*2) move on to that first ", -- that's this field's ending
break;
Expand All @@ -568,7 +568,7 @@ static void Field(FieldParseContext *ctx)
*(ctx->ch) = ch;
} else {
*(ctx->ch) = ch;
if (*ch=='\0' && quoteRule!=2) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2
if (ch==eof && quoteRule!=2) { target->off--; target->len++; } // test 1324 where final field has open quote but not ending quote; include the open quote like quote rule 2
while(target->len>0 && ((ch[-1]==' ' && stripWhite) || ch[-1]=='\0')) { target->len--; ch--; } // test 1551.6; trailing whitespace in field [67,V37] == "\"\"A\"\" ST "
}
}
Expand Down
20 changes: 17 additions & 3 deletions src/freadR.c
Original file line number Diff line number Diff line change
Expand Up @@ -427,10 +427,24 @@ void pushBuffer(ThreadLocalFreadParsingContext *ctx)
lenOff *source = buff8_lenoffs + off8;
for (int i=0; i<nRows; i++) {
int strLen = source->len;
if (strLen) {
if (strLen<=0) {
// stringLen == INT_MIN => NA, otherwise not a NAstring was checked inside fread_mean
SET_STRING_ELT(dest, DTi+i, strLen<0 ? NA_STRING : mkCharLenCE(anchor + source->off, strLen, ienc));
} // else dest was already initialized with R_BlankString by allocVector()
if (strLen<0) SET_STRING_ELT(dest, DTi+i, NA_STRING); // else leave the "" in place that was initialized by allocVector()
} else {
const char *str = anchor + source->off;
int c=0;
while (c<strLen && str[c]) c++;
if (c<strLen) {
// embedded nul found; any at the beginning or the end of the field should have already been excluded but this will strip those too if present just in case
char *last = (char *)str+c; // obtain write access to (const char *)anchor;
while (c<strLen) {
if (str[c]) *last++=str[c]; // cow page write: saves allocation and management of a temp that would need to thread-safe in future.
c++; // This is only thread accessing this region. For non-mmap direct input nul are not possible (R would not have accepted nul earlier).
}
strLen = last-str;
}
SET_STRING_ELT(dest, DTi+i, mkCharLenCE(str, strLen, ienc));
}
source += cnt8;
}
done++; // if just one string col near the start, don't loop over the other 10,000 cols. TODO? start on first too
Expand Down
6 changes: 3 additions & 3 deletions src/nafill.c
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ SEXP colnamesInt(SEXP x, SEXP cols) {
}

void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, ans_t *ans, bool verbose) {
double tic;
double tic=0.0;
if (verbose) tic = omp_get_wtime();
if (type==0) { // const
for (uint_fast64_t i=0; i<nx; i++) {
Expand All @@ -62,7 +62,7 @@ void nafillDouble(double *x, uint_fast64_t nx, unsigned int type, double fill, a
}

void nafillInteger(int32_t *x, uint_fast64_t nx, unsigned int type, int32_t fill, ans_t *ans, bool verbose) {
double tic;
double tic=0.0;
if (verbose) tic = omp_get_wtime();
if (type==0) { // const
for (uint_fast64_t i=0; i<nx; i++) {
Expand Down Expand Up @@ -161,7 +161,7 @@ SEXP nafillR(SEXP obj, SEXP type, SEXP fill, SEXP inplace, SEXP cols, SEXP verbo
}
}

double tic, toc;
double tic=0.0, toc=0.0;
if (bverbose) tic = omp_get_wtime();
#pragma omp parallel for if (nx>1) schedule(auto) num_threads(getDTthreads())
for (R_len_t i=0; i<nx; i++) {
Expand Down