diff --git a/NEWS.md b/NEWS.md index e0ea1222ea..df1b96436c 100644 --- a/NEWS.md +++ b/NEWS.md @@ -10,6 +10,8 @@ ## NOTES +1. Compiling from source no longer requires `zlib` header files to be available, [#4844](https://github.com/Rdatatable/data.table/pull/4844). The output suggests installing `zlib` headers, and how (e.g. `zlib1g-dev` on Ubuntu) as before, but now proceeds with `gzip` compression disabled in `fwrite`. Upon calling `fwrite(DT, "file.csv.gz")` at runtime, an error message suggests to reinstall `data.table` with `zlib` headers available. This does not apply to users on Windows or Mac who install the pre-compiled binary package from CRAN. + # data.table [v1.13.6](https://github.com/Rdatatable/data.table/milestone/22?closed=1) (30 Dec 2020) diff --git a/configure b/configure index e29156d61a..f2e98ec312 100755 --- a/configure +++ b/configure @@ -20,6 +20,7 @@ esac # and R-exts note 24 now suggests 'checkbashisms' as we proposed. msg=0 +NOZLIB=1 # if pkg-config is not available then zlib will be disabled for higher chance of compilation success pkg-config --version >/dev/null 2>&1 if [ $? -ne 0 ]; then echo "*** pkg-config is not installed." @@ -30,6 +31,7 @@ else echo "*** pkg-config is installed but 'pkg-config --exists zlib' did not return 0." msg=1 else + NOZLIB=0 lib=`pkg-config --libs zlib` expr -- "$lib" : ".*-lz$" >/dev/null # -- for FreeBSD, #4652 if [ $? -ne 0 ]; then @@ -46,9 +48,9 @@ fi if [ $msg -ne 0 ]; then echo "*** Compilation will now be attempted and if it works you can ignore this message. In" - echo "*** particular, this should be the case on Mac where zlib is built in." - echo "*** However, if compilation fails, try 'locate zlib.h zconf.h' and ensure the zlib" - echo "*** development library is installed :" + echo "*** particular, this should be the case on Mac where zlib is built in or pkg-config" + echo "*** is not installed. However, if compilation fails, try 'locate zlib.h zconf.h' and" + echo "*** ensure the zlib development library is installed :" echo "*** deb: zlib1g-dev (Debian, Ubuntu, ...)" echo "*** rpm: zlib-devel (Fedora, EPEL, ...)" echo "*** There is a zlib in brew for OSX but the built in zlib should work." @@ -109,5 +111,14 @@ fi # retain user supplied PKG_ env variables, #4664. See comments in Makevars.in too. sed -e "s|@PKG_CFLAGS@|$PKG_CFLAGS|" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars sed -e "s|@PKG_LIBS@|$PKG_LIBS|" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars +# optional dependency on zlib +if [ "$NOZLIB" = "1" ]; then + echo "*** Compilation without compression support in fwrite" + sed -e "s|@zlib_cflags@|-DNOZLIB|" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars + sed -e "s|@zlib_libs@||" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars +else + sed -e "s|@zlib_cflags@||" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars + sed -e "s|@zlib_libs@|-lz|" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars +fi exit 0 diff --git a/src/Makevars.in b/src/Makevars.in index 7750c1e8ac..b411786283 100644 --- a/src/Makevars.in +++ b/src/Makevars.in @@ -1,10 +1,11 @@ -PKG_CFLAGS = @PKG_CFLAGS@ @openmp_cflags@ -PKG_LIBS = @PKG_LIBS@ @openmp_cflags@ -lz +PKG_CFLAGS = @PKG_CFLAGS@ @openmp_cflags@ @zlib_cflags@ +PKG_LIBS = @PKG_LIBS@ @openmp_cflags@ @zlib_libs@ # See WRE $1.2.1.1. But retain user supplied PKG_* too, #4664. # WRE states ($1.6) that += isn't portable and that we aren't allowed to use it. # Otherwise we could use the much simpler PKG_LIBS += @openmp_cflags@ -lz. # Can't do PKG_LIBS = $(PKG_LIBS)... either because that's a 'recursive variable reference' error in make # Hence the onerous @...@ substitution. Is it still appropriate in 2020 that we can't use +=? +# Note that -lz is now escaped via @zlib_libs@ when zlib is not installed all: $(SHLIB) @echo PKG_CFLAGS = $(PKG_CFLAGS) diff --git a/src/fwrite.c b/src/fwrite.c index 499d24bec9..b85d513a6f 100644 --- a/src/fwrite.c +++ b/src/fwrite.c @@ -7,7 +7,9 @@ #include // isfinite, isnan #include // abs #include // strlen, strerror +#ifndef NOZLIB #include // for compression to .gz +#endif #ifdef WIN32 #include @@ -552,6 +554,7 @@ void writeCategString(const void *col, int64_t row, char **pch) write_string(getCategString(col, row), pch); } +#ifndef NOZLIB int init_stream(z_stream *stream) { memset(stream, 0, sizeof(z_stream)); // shouldn't be needed, done as part of #4099 to be sure stream->next_in = Z_NULL; @@ -578,6 +581,7 @@ int compressbuff(z_stream *stream, void* dest, size_t *destLen, const void* sour *destLen = stream->total_out; return err == Z_STREAM_END ? Z_OK : err; } +#endif void fwriteMain(fwriteMainArgs args) { @@ -683,6 +687,10 @@ void fwriteMain(fwriteMainArgs args) // # nocov end } } +#ifdef NOZLIB + if (args.is_gzip) + STOP(_("Compression in fwrite uses zlib library. Its header files were not found at the time data.table was compiled. To enable fwrite compression, please reinstall data.table and study the output for further guidance.")); // # nocov +#endif int yamlLen = strlen(args.yaml); if (verbose) { @@ -724,6 +732,7 @@ void fwriteMain(fwriteMainArgs args) } else { int ret1=0, ret2=0; if (args.is_gzip) { +#ifndef NOZLIB z_stream stream = {0}; if(init_stream(&stream)) { free(buff); // # nocov @@ -740,6 +749,7 @@ void fwriteMain(fwriteMainArgs args) if (ret1==Z_OK) ret2 = WRITE(f, zbuff, (int)zbuffUsed); deflateEnd(&stream); free(zbuff); +#endif } else { ret2 = WRITE(f, buff, (int)(ch-buff)); } @@ -785,12 +795,14 @@ void fwriteMain(fwriteMainArgs args) // compute zbuffSize which is the same for each thread size_t zbuffSize = 0; if(args.is_gzip){ +#ifndef NOZLIB z_stream stream = {0}; if(init_stream(&stream)) STOP(_("Can't allocate gzip stream structure")); // # nocov zbuffSize = deflateBound(&stream, buffSize); if (verbose) DTPRINT("zbuffSize=%d returned from deflateBound\n", (int)zbuffSize); deflateEnd(&stream); +#endif } errno=0; @@ -804,6 +816,7 @@ void fwriteMain(fwriteMainArgs args) char *zbuffPool = NULL; if (args.is_gzip) { zbuffPool = malloc(nth*(size_t)zbuffSize); +#ifndef NOZLIB if (!zbuffPool) { // # nocov start free(buffPool); @@ -811,20 +824,21 @@ void fwriteMain(fwriteMainArgs args) (size_t)zbuffSize/(1024^2), nth, errno, strerror(errno)); // # nocov end } +#endif } bool failed = false; // naked (unprotected by atomic) write to bool ok because only ever write true in this special paradigm int failed_compress = 0; // the first thread to fail writes their reason here when they first get to ordered section - char failed_msg[1001] = ""; // to hold zlib's msg; copied out of zlib in ordered section just in case the msg is allocated within zlib int failed_write = 0; // same. could use +ve and -ve in the same code but separate it out to trace Solaris problem, #3931 - if (nth>1) verbose=false; // printing isn't thread safe (there's a temporary print in compressbuff for tracing solaris; #4099) - +#ifndef NOZLIB z_stream thread_streams[nth]; // VLA on stack should be fine for nth structs; in zlib v1.2.11 sizeof(struct)==112 on 64bit // not declared inside the parallel region because solaris appears to move the struct in // memory when the #pragma omp for is entered, which causes zlib's internal self reference // pointer to mismatch, #4099 + char failed_msg[1001] = ""; // to hold zlib's msg; copied out of zlib in ordered section just in case the msg is allocated within zlib +#endif #pragma omp parallel num_threads(nth) { @@ -835,6 +849,7 @@ void fwriteMain(fwriteMainArgs args) void *myzBuff = NULL; size_t myzbuffUsed = 0; +#ifndef NOZLIB z_stream *mystream = &thread_streams[me]; if (args.is_gzip) { myzBuff = zbuffPool + me*zbuffSize; @@ -843,6 +858,7 @@ void fwriteMain(fwriteMainArgs args) my_failed_compress = -998; // # nocov } } +#endif #pragma omp for ordered schedule(dynamic) for(int64_t start=0; startmsg!=NULL) strncpy(failed_msg, mystream->msg, 1000); // copy zlib's msg for safe use after deflateEnd just in case zlib allocated the message +#endif } // else another thread could have failed below while I was working or waiting above; their reason got here first // # nocov end @@ -941,7 +961,9 @@ void fwriteMain(fwriteMainArgs args) // all threads will call this free on their buffer, even if one or more threads had malloc // or realloc fail. If the initial malloc failed, free(NULL) is ok and does nothing. if (args.is_gzip) { +#ifndef NOZLIB deflateEnd(mystream); +#endif } } free(buffPool); @@ -967,11 +989,13 @@ void fwriteMain(fwriteMainArgs args) // from the original error. if (failed) { // # nocov start +#ifndef NOZLIB if (failed_compress) STOP(_("zlib %s (zlib.h %s) deflate() returned error %d with z_stream->msg==\"%s\" Z_FINISH=%d Z_BLOCK=%d. %s"), zlibVersion(), ZLIB_VERSION, failed_compress, failed_msg, Z_FINISH, Z_BLOCK, verbose ? _("Please include the full output above and below this message in your data.table bug report.") : _("Please retry fwrite() with verbose=TRUE and include the full output with your data.table bug report.")); +#endif if (failed_write) STOP("%s: '%s'", strerror(failed_write), args.filename); // # nocov end diff --git a/src/utils.c b/src/utils.c index 8a3598f575..b1cb3b1aed 100644 --- a/src/utils.c +++ b/src/utils.c @@ -374,10 +374,16 @@ SEXP coerceUtf8IfNeeded(SEXP x) { return(ans); } +#ifndef NOZLIB #include +#endif SEXP dt_zlib_version() { char out[51]; +#ifndef NOZLIB snprintf(out, 50, "zlibVersion()==%s ZLIB_VERSION==%s", zlibVersion(), ZLIB_VERSION); +#else + snprintf(out, 50, "zlib header files were not found when data.table was compiled"); +#endif return ScalarString(mkChar(out)); }