From f1a9bc64018fbd831c5a1739cccbf11a64266dff Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sat, 12 Jan 2019 15:04:02 +0100 Subject: [PATCH 1/9] Add gzip support to fwrite Use zlib and gzopen/gzwrite/gzclose function to write buffer directly in a gzipped csv file. zlib is thread-safe and the gzip compression use the fwrite threads. Option compress="gzip" is added to fwrite et is automatically set when file ends with ".gz" --- R/fwrite.R | 11 +++++-- src/fwrite.c | 82 ++++++++++++++++++++++++++++++++++++++++----------- src/fwrite.h | 19 +----------- src/fwriteR.c | 6 ++-- 4 files changed, 79 insertions(+), 39 deletions(-) diff --git a/R/fwrite.R b/R/fwrite.R index 9f918c46da..a2d0776ef4 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -7,10 +7,13 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", dateTimeAs = c("ISO","squash","epoch","write.csv"), buffMB=8, nThread=getDTthreads(verbose), showProgress=getOption("datatable.showProgress", interactive()), - verbose=getOption("datatable.verbose", FALSE)) { + compress = c("none", "gzip"), + verbose=getOption("datatable.verbose", FALSE) + ) { isLOGICAL = function(x) isTRUE(x) || identical(FALSE, x) # it seems there is no isFALSE in R? na = as.character(na[1L]) # fix for #1725 if (missing(qmethod)) qmethod = qmethod[1L] + if (missing(compress)) compress = compress[1L] if (missing(dateTimeAs)) { dateTimeAs = dateTimeAs[1L] } else if (length(dateTimeAs)>1L) stop("dateTimeAs must be a single string") dateTimeAs = chmatch(dateTimeAs, c("ISO","squash","epoch","write.csv"))-1L @@ -38,6 +41,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", dec != sep, # sep2!=dec and sep2!=sep checked at C level when we know if list columns are present is.character(eol) && length(eol)==1L, length(qmethod) == 1L && qmethod %chin% c("double", "escape"), + length(compress) == 1L && compress %chin% c("none", "gzip"), isLOGICAL(col.names), isLOGICAL(append), isLOGICAL(row.names), isLOGICAL(verbose), isLOGICAL(showProgress), isLOGICAL(logical01), length(na) == 1L, #1725, handles NULL or character(0) input @@ -45,6 +49,9 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", length(buffMB)==1L && !is.na(buffMB) && 1L<=buffMB && buffMB<=1024, length(nThread)==1L && !is.na(nThread) && nThread>=1L ) + + is_gzip <- compress == "gzip" || grepl("\\.gz$", file) + file <- path.expand(file) # "~/foo/bar" if (append && missing(col.names) && (file=="" || file.exists(file))) col.names = FALSE # test 1658.16 checks this @@ -71,7 +78,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", file <- enc2native(file) # CfwriteR cannot handle UTF-8 if that is not the native encoding, see #3078. .Call(CfwriteR, x, file, sep, sep2, eol, na, dec, quote, qmethod=="escape", append, row.names, col.names, logical01, dateTimeAs, buffMB, nThread, - showProgress, verbose) + showProgress, is_gzip, verbose) invisible() } diff --git a/src/fwrite.c b/src/fwrite.c index d6a01c1c30..c42297e512 100644 --- a/src/fwrite.c +++ b/src/fwrite.c @@ -7,6 +7,7 @@ #include // isfinite, isnan #include // abs #include // strlen, strerror + #ifdef WIN32 #include #include @@ -17,6 +18,8 @@ #define WRITE write #define CLOSE close #endif + +#include "zlib.h" // for writing gzip file #include "myomp.h" #include "fwrite.h" @@ -643,11 +646,14 @@ void fwriteMain(fwriteMainArgs args) maxLineLen += eolLen; if (args.verbose) DTPRINT("maxLineLen=%d from sample. Found in %.3fs\n", maxLineLen, 1.0*(wallclock()-t0)); - int f; + int f=0; + gzFile zf=NULL; + int err; if (*args.filename=='\0') { f=-1; // file="" means write to standard output + args.is_gzip = false; // gzip is only for file // eol = "\n"; // We'll use DTPRINT which converts \n to \r\n inside it on Windows - } else { + } else if (!args.is_gzip) { #ifdef WIN32 f = _open(args.filename, _O_WRONLY | _O_BINARY | _O_CREAT | (args.append ? _O_APPEND : _O_TRUNC), _S_IWRITE); // O_BINARY rather than O_TEXT for explicit control and speed since it seems that write() has a branch inside it @@ -655,7 +661,6 @@ void fwriteMain(fwriteMainArgs args) #else f = open(args.filename, O_WRONLY | O_CREAT | (args.append ? O_APPEND : O_TRUNC), 0666); // There is no binary/text mode distinction on Linux and Mac -#endif if (f == -1) { int erropen = errno; STOP(access( args.filename, F_OK ) != -1 ? @@ -663,7 +668,23 @@ void fwriteMain(fwriteMainArgs args) "%s: '%s'. Unable to create new file for writing (it does not exist already). Do you have permission to write here, is there space on the disk and does the path exist?", strerror(erropen), args.filename); } + } else { +#endif + zf = gzopen(args.filename, "wb"); + if (zf == NULL) { + int erropen = errno; + STOP(access( args.filename, F_OK ) != -1 ? + "%s: '%s'. Failed to open existing file for writing. Do you have write permission to it? Is this Windows and does another process such as Excel have it open?" : + "%s: '%s'. Unable to create new file for writing (it does not exist already). Do you have permission to write here, is there space on the disk and does the path exist?", + strerror(erropen), args.filename); + } + // alloc gzip buffer : buff + 10% + 16 + size_t buffzSize = (size_t)(1024*1024*buffMB + 1024*1024*buffMB / 10 + 16); + if (gzbuffer(zf, buffzSize)) { + STOP("Error allocate buffer for gzip file"); + } } + t0=wallclock(); if (args.verbose) { @@ -683,32 +704,50 @@ void fwriteMain(fwriteMainArgs args) } for (int j=0; j 1 million bytes long *ch++ = args.sep; // this sep after the last column name won't be written to the file } if (f==-1) { DTPRINT(args.eol); - } else if (WRITE(f, args.eol, eolLen)==-1) { + } else if (!args.is_gzip && WRITE(f, args.eol, eolLen)==-1) { int errwrite=errno; - close(f); + CLOSE(f); free(buff); STOP("%s: '%s'", strerror(errwrite), args.filename); + } else if (args.is_gzip && (!gzwrite(zf, args.eol, eolLen))) { + int errwrite=gzclose(zf); + free(buff); + STOP("Error gzwrite %d: %s", errwrite, args.filename); } + } free(buff); // TODO: also to be free'd in cleanup when there's an error opening file above if (args.verbose) DTPRINT("done in %.3fs\n", 1.0*(wallclock()-t0)); if (args.nrow == 0) { if (args.verbose) DTPRINT("No data rows present (nrow==0)\n"); - if (f!=-1 && CLOSE(f)) STOP("%s: '%s'", strerror(errno), args.filename); + if (args.is_gzip) { + if ( (err = gzclose(zf)) ) STOP("gzclose error %d: '%s'", err, args.filename); + } else { + if (f!=-1 && CLOSE(f)) STOP("%s: '%s'", strerror(errno), args.filename); + } return; } @@ -815,8 +854,10 @@ void fwriteMain(fwriteMainArgs args) // by slave threads, even when one-at-a-time. Anyway, made this single-threaded when output to console // to be safe (setDTthreads(1) in fwrite.R) since output to console doesn't need to be fast. } else { - if (WRITE(f, myBuff, (int)(ch-myBuff)) == -1) { + if (!args.is_gzip && WRITE(f, myBuff, (int)(ch-myBuff)) == -1) { failed=errno; + } else if (args.is_gzip && (!gzwrite(zf, myBuff, (int)(ch-myBuff)))) { + gzerror(zf, &failed); } if (myAlloc > buffSize) anyBufferGrown = true; int used = 100*((double)(ch-myBuff))/buffSize; // percentage of original buffMB @@ -873,8 +914,15 @@ void fwriteMain(fwriteMainArgs args) DTPRINT("\n"); } } - if (f!=-1 && CLOSE(f) && !failed) - STOP("%s: '%s'", strerror(errno), args.filename); + + if (!args.is_gzip) { + if (f!=-1 && CLOSE(f) && !failed) + STOP("%s: '%s'", strerror(errno), args.filename); + } else { + if ( (err=gzclose(zf)) ) { + STOP("gzclose error %d: '%s'", err, args.filename); + } + } // quoted '%s' in case of trailing spaces in the filename // If a write failed, the line above tries close() to clean up, but that might fail as well. So the // '&& !failed' is to not report the error as just 'closing file' but the next line for more detail diff --git a/src/fwrite.h b/src/fwrite.h index 2a6933b785..3bc2942c10 100644 --- a/src/fwrite.h +++ b/src/fwrite.h @@ -32,14 +32,10 @@ typedef struct fwriteMainArgs // contains non-ASCII characters, it should be UTF-8 encoded (however fread // will not validate the encoding). const char *filename; - int ncol; - int64_t nrow; - // a vector of pointers to all-same-length column vectors void **columns; - writer_fun_t *funs; // a vector of writer_fun_t function pointers // length ncol vector containing which fun[] to use for each column @@ -48,19 +44,12 @@ typedef struct fwriteMainArgs uint8_t *whichFun; void *colNames; // NULL means no header, otherwise ncol strings - bool doRowNames; // optional, likely false - void *rowNames; // if doRowNames is true and rowNames is not NULL then they're used, otherwise row numbers are output. - char sep; - char sep2; - char dec; - const char *eol; - const char *na; // The quote character is always " (ascii 34) and cannot be changed since nobody on Earth uses a different quoting character, surely @@ -69,19 +58,13 @@ typedef struct fwriteMainArgs int8_t doQuote; bool qmethodEscape; // true means escape quotes using backslash, else double-up double quotes. - bool squashDateTime; - bool append; - int buffMB; // [1-1024] default 8MB - int nth; - bool showProgress; - bool verbose; - + bool is_gzip; } fwriteMainArgs; void fwriteMain(fwriteMainArgs args); diff --git a/src/fwriteR.c b/src/fwriteR.c index e3affcc3dc..dcea9fffdc 100644 --- a/src/fwriteR.c +++ b/src/fwriteR.c @@ -1,4 +1,3 @@ - #include #include "data.table.h" #include "fwrite.h" @@ -128,10 +127,13 @@ SEXP fwriteR( SEXP buffMB_Arg, // [1-1024] default 8MB SEXP nThread_Arg, SEXP showProgress_Arg, - SEXP verbose_Arg) + SEXP is_gzip_Arg, + SEXP verbose_Arg + ) { if (!isNewList(DF)) error("fwrite must be passed an object of type list; e.g. data.frame, data.table"); fwriteMainArgs args; + args.is_gzip = LOGICAL(is_gzip_Arg)[0]; args.verbose = LOGICAL(verbose_Arg)[0]; args.filename = CHAR(STRING_ELT(filename_Arg, 0)); args.ncol = length(DF); From 6053b7c9d585230075fc8627e12bbf28a0ef6b85 Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sat, 12 Jan 2019 19:01:23 +0100 Subject: [PATCH 2/9] Add compress= option in fwrite documentation --- man/fwrite.Rd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/man/fwrite.Rd b/man/fwrite.Rd index 8baf0d2c78..ec2322490e 100644 --- a/man/fwrite.Rd +++ b/man/fwrite.Rd @@ -17,6 +17,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto", dateTimeAs = c("ISO","squash","epoch","write.csv"), buffMB = 8L, nThread = getDTthreads(verbose), showProgress = getOption("datatable.showProgress", interactive()), + compress = c("none", "gzip"), verbose = getOption("datatable.verbose", FALSE)) } \arguments{ @@ -52,6 +53,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto", \item{buffMB}{The buffer size (MB) per thread in the range 1 to 1024, default 8MB. Experiment to see what works best for your data on your hardware.} \item{nThread}{The number of threads to use. Experiment to see what works best for your data on your hardware.} \item{showProgress}{ Display a progress meter on the console? Ignored when \code{file==""}. } + \item{compress}{If compress = \code{"gzip"} or if \code{file} ends in \code{.gz}, even if compress = \code{"none"}, then the output format is gzipped csv. Output to console is never gzipped even if compress = \code{"gzip"}. By default, compress = \code{"none"}.} \item{verbose}{Be chatty and report timings?} } \details{ From 73947c6de6490ddc105bd61477f82f935636eba3 Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sat, 12 Jan 2019 22:31:46 +0100 Subject: [PATCH 3/9] Add tests for fwrite with compress="gzip" option --- inst/tests/tests.Rraw | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 0e7a318d11..3d2164d8b4 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9266,13 +9266,14 @@ test(1658.8, fwrite(data.table(a=c(1:5), b=c(1:5)), quote=TRUE), test(1658.9, fwrite(data.table(a=c(1:3), b=c(1:3)), quote=TRUE), output='"a","b"\n1,1\n2,2\n3,3') -# block size one bigger than number of rows -test(1658.11, fwrite(data.table(a=c(1:3), b=c(1:3)), quote=TRUE), - output='"a","b"\n1,1\n2,2\n3,3') +# fwrite output to console ignore compress +test(1658.11, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), + output='a,b\n1,1\n2,2\n3,3') -# block size one less than number of rows -test(1658.12, fwrite(data.table(a=c(1:3), b=c(1:3)), quote=TRUE), - output='"a","b"\n1,1\n2,2\n3,3') +# fwrite gzipped output +f <- tempfile() +test(1658.12, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), + output='a,b\n1,1\n2,2\n3,3') # writing a data.frame test(1658.13, fwrite(data.frame(a="foo", b="bar"), quote=TRUE), From 041bb4a044ca39fc08c5b8176f289012e64e4352 Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sat, 12 Jan 2019 23:38:41 +0100 Subject: [PATCH 4/9] Rewrite test 1658.12 --- inst/tests/tests.Rraw | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 3d2164d8b4..10ded2d750 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9271,9 +9271,12 @@ test(1658.11, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), output='a,b\n1,1\n2,2\n3,3') # fwrite gzipped output -f <- tempfile() -test(1658.12, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), - output='a,b\n1,1\n2,2\n3,3') +if (.Platform$OS.type=="unix") { + f <- tempfile() + fwrite(data.table(a=c(1:3), b=c(1:3)), file=f, compress="gzip") + test(1658.12, system(paste("zcat", f), intern=T), output='[1] "a,b" "1,1" "2,2" "3,3"') + unlink(f) +} # writing a data.frame test(1658.13, fwrite(data.frame(a="foo", b="bar"), quote=TRUE), From 75af89e496217c37802de1b871cbd2399ec70bc6 Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sun, 13 Jan 2019 10:19:41 +0100 Subject: [PATCH 5/9] Add default option in compress In fwrite, compress has now 3 options : * default : gzip if file ends with .gz, else csv * none : force csv * gzip : force gzip --- R/fwrite.R | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/R/fwrite.R b/R/fwrite.R index a2d0776ef4..59ecb8f5f7 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -7,7 +7,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", dateTimeAs = c("ISO","squash","epoch","write.csv"), buffMB=8, nThread=getDTthreads(verbose), showProgress=getOption("datatable.showProgress", interactive()), - compress = c("none", "gzip"), + compress = c("default", "none", "gzip"), verbose=getOption("datatable.verbose", FALSE) ) { isLOGICAL = function(x) isTRUE(x) || identical(FALSE, x) # it seems there is no isFALSE in R? @@ -41,7 +41,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", dec != sep, # sep2!=dec and sep2!=sep checked at C level when we know if list columns are present is.character(eol) && length(eol)==1L, length(qmethod) == 1L && qmethod %chin% c("double", "escape"), - length(compress) == 1L && compress %chin% c("none", "gzip"), + length(compress) == 1L && compress %chin% c("default", "none", "gzip"), isLOGICAL(col.names), isLOGICAL(append), isLOGICAL(row.names), isLOGICAL(verbose), isLOGICAL(showProgress), isLOGICAL(logical01), length(na) == 1L, #1725, handles NULL or character(0) input @@ -50,7 +50,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", length(nThread)==1L && !is.na(nThread) && nThread>=1L ) - is_gzip <- compress == "gzip" || grepl("\\.gz$", file) + is_gzip <- compress == "gzip" || (compress == "default" && grepl("\\.gz$", file)) file <- path.expand(file) # "~/foo/bar" if (append && missing(col.names) && (file=="" || file.exists(file))) @@ -81,4 +81,3 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", showProgress, is_gzip, verbose) invisible() } - From b3c2ae9fe8d0e8fd5d382bc44f580630e1b3cbb3 Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sun, 13 Jan 2019 10:39:44 +0100 Subject: [PATCH 6/9] Adapt fwrite compress option documentation --- man/fwrite.Rd | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/man/fwrite.Rd b/man/fwrite.Rd index ec2322490e..853e490656 100644 --- a/man/fwrite.Rd +++ b/man/fwrite.Rd @@ -17,7 +17,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto", dateTimeAs = c("ISO","squash","epoch","write.csv"), buffMB = 8L, nThread = getDTthreads(verbose), showProgress = getOption("datatable.showProgress", interactive()), - compress = c("none", "gzip"), + compress = c("default", "none", "gzip"), verbose = getOption("datatable.verbose", FALSE)) } \arguments{ @@ -53,7 +53,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto", \item{buffMB}{The buffer size (MB) per thread in the range 1 to 1024, default 8MB. Experiment to see what works best for your data on your hardware.} \item{nThread}{The number of threads to use. Experiment to see what works best for your data on your hardware.} \item{showProgress}{ Display a progress meter on the console? Ignored when \code{file==""}. } - \item{compress}{If compress = \code{"gzip"} or if \code{file} ends in \code{.gz}, even if compress = \code{"none"}, then the output format is gzipped csv. Output to console is never gzipped even if compress = \code{"gzip"}. By default, compress = \code{"none"}.} + \item{compress}{If \code{compress = "default"} and if \code{file} ends in \code{.gz} then output format is gzipped csv else csv. If \code{compress = "none"}, output format is always csv. If \code{compress = "gzip"} then format is gzipped csv. Output to the console is never gzipped even if \code{compress = "gzip"}. By default, \code{compress = "default"}.} \item{verbose}{Be chatty and report timings?} } \details{ From 3fb7ecf9961b4110e599e745106fc73cdb5891ba Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sun, 13 Jan 2019 11:37:13 +0100 Subject: [PATCH 7/9] Replace 'default' by 'auto' in fwrite compress option --- R/fwrite.R | 6 +++--- man/fwrite.Rd | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/fwrite.R b/R/fwrite.R index 59ecb8f5f7..c775510935 100644 --- a/R/fwrite.R +++ b/R/fwrite.R @@ -7,7 +7,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", dateTimeAs = c("ISO","squash","epoch","write.csv"), buffMB=8, nThread=getDTthreads(verbose), showProgress=getOption("datatable.showProgress", interactive()), - compress = c("default", "none", "gzip"), + compress = c("auto", "none", "gzip"), verbose=getOption("datatable.verbose", FALSE) ) { isLOGICAL = function(x) isTRUE(x) || identical(FALSE, x) # it seems there is no isFALSE in R? @@ -41,7 +41,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", dec != sep, # sep2!=dec and sep2!=sep checked at C level when we know if list columns are present is.character(eol) && length(eol)==1L, length(qmethod) == 1L && qmethod %chin% c("double", "escape"), - length(compress) == 1L && compress %chin% c("default", "none", "gzip"), + length(compress) == 1L && compress %chin% c("auto", "none", "gzip"), isLOGICAL(col.names), isLOGICAL(append), isLOGICAL(row.names), isLOGICAL(verbose), isLOGICAL(showProgress), isLOGICAL(logical01), length(na) == 1L, #1725, handles NULL or character(0) input @@ -50,7 +50,7 @@ fwrite <- function(x, file="", append=FALSE, quote="auto", length(nThread)==1L && !is.na(nThread) && nThread>=1L ) - is_gzip <- compress == "gzip" || (compress == "default" && grepl("\\.gz$", file)) + is_gzip <- compress == "gzip" || (compress == "auto" && grepl("\\.gz$", file)) file <- path.expand(file) # "~/foo/bar" if (append && missing(col.names) && (file=="" || file.exists(file))) diff --git a/man/fwrite.Rd b/man/fwrite.Rd index 853e490656..59519281a7 100644 --- a/man/fwrite.Rd +++ b/man/fwrite.Rd @@ -53,7 +53,7 @@ fwrite(x, file = "", append = FALSE, quote = "auto", \item{buffMB}{The buffer size (MB) per thread in the range 1 to 1024, default 8MB. Experiment to see what works best for your data on your hardware.} \item{nThread}{The number of threads to use. Experiment to see what works best for your data on your hardware.} \item{showProgress}{ Display a progress meter on the console? Ignored when \code{file==""}. } - \item{compress}{If \code{compress = "default"} and if \code{file} ends in \code{.gz} then output format is gzipped csv else csv. If \code{compress = "none"}, output format is always csv. If \code{compress = "gzip"} then format is gzipped csv. Output to the console is never gzipped even if \code{compress = "gzip"}. By default, \code{compress = "default"}.} + \item{compress}{If \code{compress = "auto"} and if \code{file} ends in \code{.gz} then output format is gzipped csv else csv. If \code{compress = "none"}, output format is always csv. If \code{compress = "gzip"} then format is gzipped csv. Output to the console is never gzipped even if \code{compress = "gzip"}. By default, \code{compress = "auto"}.} \item{verbose}{Be chatty and report timings?} } \details{ From da79731b6d4443973cbcc07809b5c9d1119674c8 Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sun, 13 Jan 2019 11:47:08 +0100 Subject: [PATCH 8/9] Tests for gzip compression in fwrite and restore tests 1658.11,12 --- inst/tests/tests.Rraw | 37 +++++++++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 10 deletions(-) diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 10ded2d750..5b9ac5f34d 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -9266,17 +9266,13 @@ test(1658.8, fwrite(data.table(a=c(1:5), b=c(1:5)), quote=TRUE), test(1658.9, fwrite(data.table(a=c(1:3), b=c(1:3)), quote=TRUE), output='"a","b"\n1,1\n2,2\n3,3') -# fwrite output to console ignore compress -test(1658.11, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), - output='a,b\n1,1\n2,2\n3,3') +# block size one bigger than number of rows +test(1658.11, fwrite(data.table(a=c(1:3), b=c(1:3)), quote=TRUE), + output='"a","b"\n1,1\n2,2\n3,3') -# fwrite gzipped output -if (.Platform$OS.type=="unix") { - f <- tempfile() - fwrite(data.table(a=c(1:3), b=c(1:3)), file=f, compress="gzip") - test(1658.12, system(paste("zcat", f), intern=T), output='[1] "a,b" "1,1" "2,2" "3,3"') - unlink(f) -} +# block size one less than number of rows +test(1658.12, fwrite(data.table(a=c(1:3), b=c(1:3)), quote=TRUE), + output='"a","b"\n1,1\n2,2\n3,3') # writing a data.frame test(1658.13, fwrite(data.frame(a="foo", b="bar"), quote=TRUE), @@ -9353,6 +9349,27 @@ test(1658.34, fwrite(matrix(1:4, nrow=2, ncol=2), quote = TRUE), output = '"V1", test(1658.35, fwrite(matrix(1:3, nrow=3, ncol=1), quote = TRUE), output = '"V1"\n.*1\n2\n3', message = "x being coerced from class: matrix to data.table") test(1658.36, fwrite(matrix(1:4, nrow=2, ncol=2, dimnames = list(c("ra","rb"),c("ca","cb"))), quote = TRUE), output = '"ca","cb"\n.*1,3\n2,4', message = "x being coerced from class: matrix to data.table") +# fwrite output to console ignore compress +test(1658.37, fwrite(data.table(a=c(1:3), b=c(1:3)), compress="gzip"), + output='a,b\n1,1\n2,2\n3,3') + +# fwrite force gzipped output +if (.Platform$OS.type=="unix") { + f <- tempfile() + fwrite(data.table(a=c(1:3), b=c(1:3)), file=f, compress="gzip") + test(1658.38, system(paste("zcat", f), intern=T), output='[1] "a,b" "1,1" "2,2" "3,3"') + unlink(f) +} + + +# fwrite force csv output +if (.Platform$OS.type=="unix") { + f <- tempfile() + fwrite(data.table(a=c(1:3), b=c(1:3)), file=f, compress="none") + test(1658.39, system(paste("cat", f), intern=T), output='[1] "a,b" "1,1" "2,2" "3,3"') + unlink(f) +} + ## End fwrite tests # tests for #679, inrange(), FR #707 From 67a9c36f3ffe975012889783f9401a0fd163c001 Mon Sep 17 00:00:00 2001 From: Philippe Chataignon Date: Sun, 13 Jan 2019 13:09:41 +0100 Subject: [PATCH 9/9] \#endif was in wrong place --- src/fwrite.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/fwrite.c b/src/fwrite.c index c42297e512..cd3e45a450 100644 --- a/src/fwrite.c +++ b/src/fwrite.c @@ -661,6 +661,7 @@ void fwriteMain(fwriteMainArgs args) #else f = open(args.filename, O_WRONLY | O_CREAT | (args.append ? O_APPEND : O_TRUNC), 0666); // There is no binary/text mode distinction on Linux and Mac +#endif if (f == -1) { int erropen = errno; STOP(access( args.filename, F_OK ) != -1 ? @@ -669,7 +670,6 @@ void fwriteMain(fwriteMainArgs args) strerror(erropen), args.filename); } } else { -#endif zf = gzopen(args.filename, "wb"); if (zf == NULL) { int erropen = errno;