diff --git a/NEWS.md b/NEWS.md index 4d1c16aebf..05e595a768 100644 --- a/NEWS.md +++ b/NEWS.md @@ -123,6 +123,8 @@ unit = "s") 19. Matrices resulting from logical operators or comparisons on `data.table`s, e.g. in `dta == dtb`, can no longer have their colnames changed by reference later, [#4323](https://github.com/Rdatatable/data.table/issues/4323). Thanks to @eyherabh for reporting and @tlapak for the PR. +20. The environment variable `R_DATATABLE_NUM_THREADS` was being limited by `R_DATATABLE_NUM_PROCS_PERCENT` (by default 50%), [#4514](https://github.com/Rdatatable/data.table/issues/4514). It is now consistent with `setDTthreads()` and only limited by the full number of logical CPUs. For example, on a machine with 8 logical CPUs, `R_DATATABLE_NUM_THREADS=6` now results in 6 threads rather than 4 (50% of 8). + ## NOTES 0. Retrospective license change permission was sought from and granted by 4 contributors who were missed in [PR#2456](https://github.com/Rdatatable/data.table/pull/2456), [#4140](https://github.com/Rdatatable/data.table/pull/4140). We had used [GitHub's contributor page](https://github.com/Rdatatable/data.table/graphs/contributors) which omits 3 of these due to invalid email addresses, unlike GitLab's contributor page which includes the ids. The 4th omission was a PR to a script which should not have been excluded; a script is code too. We are sorry these contributors were not properly credited before. They have now been added to the contributors list as displayed on CRAN. All the contributors of code to data.table hold its copyright jointly; your contributions belong to you. You contributed to data.table when it had a particular license at that time, and you contributed on that basis. This is why in the last license change, all contributors of code were consulted and each had a veto. diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 4b50a3613b..5c607bcb27 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -14195,7 +14195,9 @@ test(1997.06, setDTthreads(percent=NULL), error="but is length 0") test(1997.07, setDTthreads(percent=1:2), error="but is length 2") test(1997.08, setDTthreads(restore_after_fork=21), error="must be TRUE, FALSE, or NULL") old = getDTthreads() # (1) -oldenv = Sys.getenv("R_DATATABLE_NUM_PROCS_PERCENT") +oldenv1 = Sys.getenv("R_DATATABLE_NUM_PROCS_PERCENT") +oldenv2 = Sys.getenv("R_DATATABLE_NUM_THREADS") +Sys.setenv(R_DATATABLE_NUM_THREADS="") # in case user has this set, so we can test PROCS_PERCENT Sys.setenv(R_DATATABLE_NUM_PROCS_PERCENT="3.0") test(1997.09, setDTthreads(), old, warning="Ignoring invalid.*Please remove any.*not a digit") new = getDTthreads() # old above at (1) may not have been default. new now is. @@ -14208,12 +14210,20 @@ test(1997.13, setDTthreads(), new) new = getDTthreads() setDTthreads(percent=75) test(1997.14, getDTthreads(), new) -Sys.setenv(R_DATATABLE_NUM_PROCS_PERCENT=oldenv) -test(1997.15, setDTthreads(old), new) -test(1997.16, getDTthreads(), old) -test(1997.17, setDTthreads(throttle=NA), error="throttle.*must be a single number, non-NA, and >=1") +Sys.setenv(R_DATATABLE_NUM_PROCS_PERCENT="100") +setDTthreads() +allcpu = getDTthreads() +Sys.setenv(R_DATATABLE_NUM_PROCS_PERCENT="75") +Sys.setenv(R_DATATABLE_NUM_THREADS=allcpu) +setDTthreads() +test(1997.15, getDTthreads(), allcpu) +Sys.setenv(R_DATATABLE_NUM_PROCS_PERCENT=oldenv1) +Sys.setenv(R_DATATABLE_NUM_THREADS=oldenv2) +test(1997.16, setDTthreads(old), allcpu) +test(1997.17, getDTthreads(), old) +test(1997.18, setDTthreads(throttle=NA), error="throttle.*must be a single number, non-NA, and >=1") setDTthreads(throttle=65536) -test(1997.18, getDTthreads(TRUE), output="throttle==65536") +test(1997.19, getDTthreads(TRUE), output="throttle==65536") setDTthreads(throttle=1024) # test that a copy is being made and output is printed, #3385 after partial revert of #3281 diff --git a/src/openmp-utils.c b/src/openmp-utils.c index cfd0e3806f..51393f3b7c 100644 --- a/src/openmp-utils.c +++ b/src/openmp-utils.c @@ -20,7 +20,7 @@ static int getIntEnv(const char *name, int def) long int ans = strtol(val, &end, 10); // ignores leading whitespace. If it fully consumed the string, *end=='\0' and isspace('\0')==false while (isspace(*end)) end++; // ignore trailing whitespace if (errno || (size_t)(end-val)!=nchar || ans<1 || ans>INT_MAX) { - warning(_("Ignoring invalid %s==\")%s\". Not an integer >= 1. Please remove any characters that are not a digit [0-9]. See ?data.table::setDTthreads."), name, val); + warning(_("Ignoring invalid %s==\"%s\". Not an integer >= 1. Please remove any characters that are not a digit [0-9]. See ?data.table::setDTthreads."), name, val); return def; } return (int)ans; @@ -33,23 +33,26 @@ void initDTthreads() { // called at package startup from init.c // also called by setDTthreads(threads=NULL) (default) to reread environment variables; see setDTthreads below // No verbosity here in this setter. Verbosity is in getDTthreads(verbose=TRUE) - int ans = omp_get_num_procs(); // starting point is all logical CPUs. This is a hard limit; user cannot achieve more than this. - // ifndef _OPENMP then myomp.h defines this to be 1 - int perc = getIntEnv("R_DATATABLE_NUM_PROCS_PERCENT", 50); // use "NUM_PROCS" to use the same name as the OpenMP function this uses - // 50% of logical CPUs by default; half of 8 is 4 on laptop with 4 cores. Leaves plenty of room for other processes: #3395 & #3298 - if (perc<=1 || perc>100) { - warning(_("Ignoring invalid R_DATATABLE_NUM_PROCS_PERCENT==%d. If used it must be an integer between 2 and 100. Default is 50. See ?setDTtheads."), perc); - // not allowing 1 is to catch attempts to use 1 or 1.0 to represent 100%. - perc = 50; + int ans = getIntEnv("R_DATATABLE_NUM_THREADS", INT_MIN); + if (ans>=1) { + ans = imin(ans, omp_get_num_procs()); // num_procs is a hard limit; user cannot achieve more. ifndef _OPENMP then myomp.h defines this to be 1 + } else { + // Only when R_DATATABLE_NUM_THREADS is unset (or <=0) do we use PROCS_PERCENT; #4514 + int perc = getIntEnv("R_DATATABLE_NUM_PROCS_PERCENT", 50); // use "NUM_PROCS" to use the same name as the OpenMP function this uses + // 50% of logical CPUs by default; half of 8 is 4 on laptop with 4 cores. Leaves plenty of room for other processes: #3395 & #3298 + if (perc<=1 || perc>100) { + warning(_("Ignoring invalid R_DATATABLE_NUM_PROCS_PERCENT==%d. If used it must be an integer between 2 and 100. Default is 50. See ?setDTtheads."), perc); + // not allowing 1 is to catch attempts to use 1 or 1.0 to represent 100%. + perc = 50; + } + ans = imax(omp_get_num_procs()*perc/100, 1); // imax for when formula would result in 0. } - ans = imax(ans*perc/100, 1); ans = imin(ans, omp_get_thread_limit()); // honors OMP_THREAD_LIMIT when OpenMP started; e.g. CRAN sets this to 2. Often INT_MAX meaning unlimited/unset ans = imin(ans, omp_get_max_threads()); // honors OMP_NUM_THREADS when OpenMP started, plus reflects any omp_set_* calls made since - ans = imax(ans, 1); // just in case omp_get_* returned <= 0 for any reason // max_threads() -vs- num_procs(): https://software.intel.com/en-us/forums/intel-visual-fortran-compiler-for-windows/topic/302866 - ans = imin(ans, getIntEnv("R_DATATABLE_NUM_THREADS", INT_MAX)); ans = imin(ans, getIntEnv("OMP_THREAD_LIMIT", INT_MAX)); // user might expect `Sys.setenv(OMP_THREAD_LIMIT=2);setDTthreads()` to work. Satisfy this ans = imin(ans, getIntEnv("OMP_NUM_THREADS", INT_MAX)); // expectation by reading them again now. OpenMP just reads them on startup (quite reasonably) + ans = imax(ans, 1); // just in case omp_get_* returned <=0 for any reason, or the env variables above are set <=0 DTthreads = ans; DTthrottle = imax(1, getIntEnv("R_DATATABLE_THROTTLE", 1024)); // 2nd thread is used only when n>1024, 3rd thread when n>2048, etc }