Skip to content
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@

## NOTES

1. Compiling from source no longer requires `zlib` header files to be available, [#4844](https://github.com/Rdatatable/data.table/pull/4844). The output suggests installing `zlib` headers, and how (e.g. `zlib1g-dev` on Ubuntu) as before, but now proceeds with `gzip` compression disabled in `fwrite`. Upon calling `fwrite(DT, "file.csv.gz")` at runtime, an error message suggests to reinstall `data.table` with `zlib` headers available. This does not apply to users on Windows or Mac who install the pre-compiled binary package from CRAN.


# data.table [v1.13.6](https://github.com/Rdatatable/data.table/milestone/22?closed=1) (30 Dec 2020)

Expand Down
17 changes: 14 additions & 3 deletions configure
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ esac
# and R-exts note 24 now suggests 'checkbashisms' as we proposed.

msg=0
NOZLIB=1 # if pkg-config is not available then zlib will be disabled for higher chance of compilation success
pkg-config --version >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "*** pkg-config is not installed."
Expand All @@ -30,6 +31,7 @@ else
echo "*** pkg-config is installed but 'pkg-config --exists zlib' did not return 0."
msg=1
else
NOZLIB=0
lib=`pkg-config --libs zlib`
expr -- "$lib" : ".*-lz$" >/dev/null # -- for FreeBSD, #4652
if [ $? -ne 0 ]; then
Expand All @@ -46,9 +48,9 @@ fi

if [ $msg -ne 0 ]; then
echo "*** Compilation will now be attempted and if it works you can ignore this message. In"
echo "*** particular, this should be the case on Mac where zlib is built in."
echo "*** However, if compilation fails, try 'locate zlib.h zconf.h' and ensure the zlib"
echo "*** development library is installed :"
echo "*** particular, this should be the case on Mac where zlib is built in or pkg-config"
echo "*** is not installed. However, if compilation fails, try 'locate zlib.h zconf.h' and"
echo "*** ensure the zlib development library is installed :"
echo "*** deb: zlib1g-dev (Debian, Ubuntu, ...)"
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

doesn't this turn off zlib support on mac by default

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If pkg-config is not available on Mac then yes, it turns off. Needs testing anyway.
There is a related comment above:

if pkg-config is not available then zlib will be disabled for higher chance of compilation success

Copy link
Copy Markdown
Member Author

@jangorecki jangorecki Dec 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Note that this verbose message branch was hit before when pkg-config was not installed, just the message was not mentioning that.

Copy link
Copy Markdown
Member Author

@jangorecki jangorecki Dec 12, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We could eventually try to check if zlib is available by attempting to compile dummy code, like we do for openmp. Docker image in your report didn't have neither pkg-config and zlib. So then we could still compile with zlib support. For now I don't think it is worth. Messages are clear and points users in the proper direction. Installing pkg-config and/or zlib.

echo "*** rpm: zlib-devel (Fedora, EPEL, ...)"
echo "*** There is a zlib in brew for OSX but the built in zlib should work."
Expand Down Expand Up @@ -109,5 +111,14 @@ fi
# retain user supplied PKG_ env variables, #4664. See comments in Makevars.in too.
sed -e "s|@PKG_CFLAGS@|$PKG_CFLAGS|" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars
sed -e "s|@PKG_LIBS@|$PKG_LIBS|" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars
# optional dependency on zlib
if [ "$NOZLIB" = "1" ]; then
echo "*** Compilation without compression support in fwrite"
sed -e "s|@zlib_cflags@|-DNOZLIB|" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars
sed -e "s|@zlib_libs@||" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars
else
sed -e "s|@zlib_cflags@||" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars
sed -e "s|@zlib_libs@|-lz|" src/Makevars > src/Makevars.tmp && mv src/Makevars.tmp src/Makevars
fi

exit 0
5 changes: 3 additions & 2 deletions src/Makevars.in
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
PKG_CFLAGS = @PKG_CFLAGS@ @openmp_cflags@
PKG_LIBS = @PKG_LIBS@ @openmp_cflags@ -lz
PKG_CFLAGS = @PKG_CFLAGS@ @openmp_cflags@ @zlib_cflags@
PKG_LIBS = @PKG_LIBS@ @openmp_cflags@ @zlib_libs@
# See WRE $1.2.1.1. But retain user supplied PKG_* too, #4664.
# WRE states ($1.6) that += isn't portable and that we aren't allowed to use it.
# Otherwise we could use the much simpler PKG_LIBS += @openmp_cflags@ -lz.
# Can't do PKG_LIBS = $(PKG_LIBS)... either because that's a 'recursive variable reference' error in make
# Hence the onerous @...@ substitution. Is it still appropriate in 2020 that we can't use +=?
# Note that -lz is now escaped via @zlib_libs@ when zlib is not installed

all: $(SHLIB)
@echo PKG_CFLAGS = $(PKG_CFLAGS)
Expand Down
30 changes: 27 additions & 3 deletions src/fwrite.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
#include <math.h> // isfinite, isnan
#include <stdlib.h> // abs
#include <string.h> // strlen, strerror
#ifndef NOZLIB
#include <zlib.h> // for compression to .gz
#endif

#ifdef WIN32
#include <sys/types.h>
Expand Down Expand Up @@ -552,6 +554,7 @@ void writeCategString(const void *col, int64_t row, char **pch)
write_string(getCategString(col, row), pch);
}

#ifndef NOZLIB
int init_stream(z_stream *stream) {
memset(stream, 0, sizeof(z_stream)); // shouldn't be needed, done as part of #4099 to be sure
stream->next_in = Z_NULL;
Expand All @@ -578,6 +581,7 @@ int compressbuff(z_stream *stream, void* dest, size_t *destLen, const void* sour
*destLen = stream->total_out;
return err == Z_STREAM_END ? Z_OK : err;
}
#endif

void fwriteMain(fwriteMainArgs args)
{
Expand Down Expand Up @@ -683,6 +687,10 @@ void fwriteMain(fwriteMainArgs args)
// # nocov end
}
}
#ifdef NOZLIB
if (args.is_gzip)
STOP(_("Compression in fwrite uses zlib library. Its header files were not found at the time data.table was compiled. To enable fwrite compression, please reinstall data.table and study the output for further guidance.")); // # nocov
#endif

int yamlLen = strlen(args.yaml);
if (verbose) {
Expand Down Expand Up @@ -724,6 +732,7 @@ void fwriteMain(fwriteMainArgs args)
} else {
int ret1=0, ret2=0;
if (args.is_gzip) {
#ifndef NOZLIB
z_stream stream = {0};
if(init_stream(&stream)) {
free(buff); // # nocov
Expand All @@ -740,6 +749,7 @@ void fwriteMain(fwriteMainArgs args)
if (ret1==Z_OK) ret2 = WRITE(f, zbuff, (int)zbuffUsed);
deflateEnd(&stream);
free(zbuff);
#endif
} else {
ret2 = WRITE(f, buff, (int)(ch-buff));
}
Expand Down Expand Up @@ -785,12 +795,14 @@ void fwriteMain(fwriteMainArgs args)
// compute zbuffSize which is the same for each thread
size_t zbuffSize = 0;
if(args.is_gzip){
#ifndef NOZLIB
z_stream stream = {0};
if(init_stream(&stream))
STOP(_("Can't allocate gzip stream structure")); // # nocov
zbuffSize = deflateBound(&stream, buffSize);
if (verbose) DTPRINT("zbuffSize=%d returned from deflateBound\n", (int)zbuffSize);
deflateEnd(&stream);
#endif
}

errno=0;
Expand All @@ -804,27 +816,29 @@ void fwriteMain(fwriteMainArgs args)
char *zbuffPool = NULL;
if (args.is_gzip) {
zbuffPool = malloc(nth*(size_t)zbuffSize);
#ifndef NOZLIB
if (!zbuffPool) {
// # nocov start
free(buffPool);
STOP(_("Unable to allocate %d MB * %d thread compressed buffers; '%d: %s'. Please read ?fwrite for nThread, buffMB and verbose options."),
(size_t)zbuffSize/(1024^2), nth, errno, strerror(errno));
// # nocov end
}
#endif
}

bool failed = false; // naked (unprotected by atomic) write to bool ok because only ever write true in this special paradigm
int failed_compress = 0; // the first thread to fail writes their reason here when they first get to ordered section
char failed_msg[1001] = ""; // to hold zlib's msg; copied out of zlib in ordered section just in case the msg is allocated within zlib
int failed_write = 0; // same. could use +ve and -ve in the same code but separate it out to trace Solaris problem, #3931

if (nth>1) verbose=false; // printing isn't thread safe (there's a temporary print in compressbuff for tracing solaris; #4099)

#ifndef NOZLIB
z_stream thread_streams[nth];
// VLA on stack should be fine for nth structs; in zlib v1.2.11 sizeof(struct)==112 on 64bit
// not declared inside the parallel region because solaris appears to move the struct in
// memory when the #pragma omp for is entered, which causes zlib's internal self reference
// pointer to mismatch, #4099
char failed_msg[1001] = ""; // to hold zlib's msg; copied out of zlib in ordered section just in case the msg is allocated within zlib
#endif

#pragma omp parallel num_threads(nth)
{
Expand All @@ -835,6 +849,7 @@ void fwriteMain(fwriteMainArgs args)

void *myzBuff = NULL;
size_t myzbuffUsed = 0;
#ifndef NOZLIB
z_stream *mystream = &thread_streams[me];
if (args.is_gzip) {
myzBuff = zbuffPool + me*zbuffSize;
Expand All @@ -843,6 +858,7 @@ void fwriteMain(fwriteMainArgs args)
my_failed_compress = -998; // # nocov
}
}
#endif

#pragma omp for ordered schedule(dynamic)
for(int64_t start=0; start<args.nrow; start+=rowsPerBatch) {
Expand Down Expand Up @@ -871,19 +887,23 @@ void fwriteMain(fwriteMainArgs args)
write_chars(args.eol, &ch); // overwrite last sep with eol instead
}
// compress buffer if gzip
#ifndef NOZLIB
if (args.is_gzip && !failed) {
myzbuffUsed = zbuffSize;
int ret = compressbuff(mystream, myzBuff, &myzbuffUsed, myBuff, (size_t)(ch-myBuff));
if (ret) { failed=true; my_failed_compress=ret; }
else deflateReset(mystream);
}
#endif
#pragma omp ordered
{
if (failed) {
// # nocov start
if (failed_compress==0 && my_failed_compress!=0) {
failed_compress = my_failed_compress;
#ifndef NOZLIB
if (mystream->msg!=NULL) strncpy(failed_msg, mystream->msg, 1000); // copy zlib's msg for safe use after deflateEnd just in case zlib allocated the message
#endif
}
// else another thread could have failed below while I was working or waiting above; their reason got here first
// # nocov end
Expand Down Expand Up @@ -941,7 +961,9 @@ void fwriteMain(fwriteMainArgs args)
// all threads will call this free on their buffer, even if one or more threads had malloc
// or realloc fail. If the initial malloc failed, free(NULL) is ok and does nothing.
if (args.is_gzip) {
#ifndef NOZLIB
deflateEnd(mystream);
#endif
}
}
free(buffPool);
Expand All @@ -967,11 +989,13 @@ void fwriteMain(fwriteMainArgs args)
// from the original error.
if (failed) {
// # nocov start
#ifndef NOZLIB
if (failed_compress)
STOP(_("zlib %s (zlib.h %s) deflate() returned error %d with z_stream->msg==\"%s\" Z_FINISH=%d Z_BLOCK=%d. %s"),
zlibVersion(), ZLIB_VERSION, failed_compress, failed_msg, Z_FINISH, Z_BLOCK,
verbose ? _("Please include the full output above and below this message in your data.table bug report.")
: _("Please retry fwrite() with verbose=TRUE and include the full output with your data.table bug report."));
#endif
if (failed_write)
STOP("%s: '%s'", strerror(failed_write), args.filename);
// # nocov end
Expand Down
6 changes: 6 additions & 0 deletions src/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -374,10 +374,16 @@ SEXP coerceUtf8IfNeeded(SEXP x) {
return(ans);
}

#ifndef NOZLIB
#include <zlib.h>
#endif
SEXP dt_zlib_version() {
char out[51];
#ifndef NOZLIB
snprintf(out, 50, "zlibVersion()==%s ZLIB_VERSION==%s", zlibVersion(), ZLIB_VERSION);
#else
snprintf(out, 50, "zlib header files were not found when data.table was compiled");
#endif
return ScalarString(mkChar(out));
}