diff --git a/CRAN_Release.cmd b/CRAN_Release.cmd index b1a6f71a4e..ec21b2b9a5 100644 --- a/CRAN_Release.cmd +++ b/CRAN_Release.cmd @@ -24,7 +24,8 @@ grep -RI --exclude-dir=".git" --exclude="*.md" --exclude="*~" --color='auto' -P grep -RI --exclude-dir=".git" --exclude="*.md" --exclude="*~" --color='auto' -n "[\]u[0-9]" ./ # Ensure no calls to omp_set_num_threads() [to avoid affecting other packages and base R] -grep --exclude="./src/openmp-utils.c" omp_set_num_threads ./src/* +# Only comments referring to it should be in openmp-utils.c +grep omp_set_num_threads ./src/* # Ensure no calls to omp_get_max_threads() also since access should be via getDTthreads() grep --exclude="./src/openmp-utils.c" omp_get_max_threads ./src/* diff --git a/NEWS.md b/NEWS.md index 7470191c12..3b25ec1369 100644 --- a/NEWS.md +++ b/NEWS.md @@ -193,6 +193,8 @@ Thanks to @sritchie73 for reporting and fixing [PR#2631](https://github.com/Rdat 36. `NA` in character columns now display as `` just like base R to distinguish from `""` and `"NA"`. +37. `getDTthreads()` could return INT_MAX (2 billion) after an explicit call to `setDTthreads(0)`, [PR#2708](https://github.com/Rdatatable/data.table/pull/2708). + #### NOTES 0. The license has been changed from GPL to MPL (Mozilla Public License). All contributors were consulted and approved. [PR#2456](https://github.com/Rdatatable/data.table/pull/2456) details the reasons for the change. diff --git a/R/openmp-utils.R b/R/openmp-utils.R index 15d3acc57a..aa4a965064 100644 --- a/R/openmp-utils.R +++ b/R/openmp-utils.R @@ -2,7 +2,7 @@ setDTthreads <- function(threads) { invisible(.Call(CsetDTthreads, as.integer(threads))) } -getDTthreads <- function() { - .Call(CgetDTthreads) +getDTthreads <- function(verbose=getOption("datatable.verbose", FALSE)) { + .Call(CgetDTthreads, verbose) } diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw index 6d80adb312..f7f431e1a1 100644 --- a/inst/tests/tests.Rraw +++ b/inst/tests/tests.Rraw @@ -11544,6 +11544,7 @@ test(1894.11, DT[, sum(z)*..z], 72L) setnames(DT, "z", "..z") test(1894.12, DT[, sum(y)*..z], error="..z in j is looking for z in calling scope, but a column '..z' exists. Column names should not start with ..") +test(1895, getDTthreads(verbose=TRUE), output="omp_get_max_threads.*omp_get_thread_limit.*DTthreads") ################################### # Add new tests above this line # diff --git a/man/openmp-utils.Rd b/man/openmp-utils.Rd index 299afaaf61..3f29985b6f 100644 --- a/man/openmp-utils.Rd +++ b/man/openmp-utils.Rd @@ -4,13 +4,16 @@ \title{ Set or get number of threads that data.table should use } \description{ Set and get number of threads to be used in \code{data.table} functions that are parallelized with OpenMP. Default value 0 means to utilize all CPU available with an appropriate number of threads calculated by OpenMP. \code{getDTthreads()} returns the number of threads that will be used. This affects \code{data.table} only and does not change R itself or other packages using OpenMP. The most common usage expected is \code{setDTthreads(1)} to limit \code{data.table} to one thread for pre-existing explicitly parallel user code; e.g. via packages parallel and foreach. Otherwise, nested parallelism may bite. As \code{data.table} becomes more parallel automatically internally, we expect explicit user parallelism to be needed less often. + +Attempting to \code{setDTthreads()} to more than the number of logical CPUs is intended to be ineffective; i.e., \code{getDTthreads()} will still return the number of logical CPUs in that case. Further, there is a hard coded limit of 1024 threads (with warning when imposed) to prevent accidentally picking up the value of \code{INT_MAX} (2 billion; i.e. unlimited) from \code{omp_get_thread_limit()}. We have followed the advice of section 1.2.1.1 in the R-exts manual: "... or, better, for the regions in your code as part of their specification... num_threads(nthreads).. That way you only control your own code and not that of other OpenMP users." All the parallel region in data.table contain this directive. This is mandated by a \code{grep} in the package's quality control release procedure script. } \usage{ setDTthreads(threads) -getDTthreads() +getDTthreads(verbose = getOption("datatable.verbose", FALSE)) } \arguments{ \item{threads}{ An integer >= 0. Default 0 means use all CPU available and leave the operating system to multi task. } + \item{verbose}{ Display the value returned by some OpenMP function calls. } } \value{ A length 1 \code{integer}. The old value is returned by \code{setDTthreads} so you can store that value and pass it to \code{setDTthreads} again after the section of your code where you, probably, limited to one thread. diff --git a/src/myomp.h b/src/myomp.h index 798d071e3c..9841e761d3 100644 --- a/src/myomp.h +++ b/src/myomp.h @@ -5,5 +5,6 @@ #define omp_get_num_threads() 1 #define omp_get_thread_num() 0 #define omp_get_max_threads() 1 + #define omp_get_thread_limit() 1 #endif diff --git a/src/openmp-utils.c b/src/openmp-utils.c index cd85ee9e01..c09cdb99be 100644 --- a/src/openmp-utils.c +++ b/src/openmp-utils.c @@ -22,14 +22,33 @@ static int DTthreads = 0; int getDTthreads() { #ifdef _OPENMP - int ans = DTthreads == 0 ? omp_get_max_threads() : MIN(DTthreads, omp_get_max_threads()); - return MAX(1, ans); + int ans = omp_get_max_threads(); + if (ans>1024) { + warning("omp_get_max_threads() has returned %d. This is unreasonably large. Applying hard limit of 1024. Please check OpenMP environment variables and other packages using OpenMP to see where this very large number has come from. Try getDTthreads(verbose=TRUE).", ans); + // to catch INT_MAX for example, which may be the case if user or another package has called omp_set_num_threads(omp_get_thread_limit()) + // 1024 is a reasonable hard limit based on a comfortable margin above the most number of CPUs in one server I have heard about + ans=1024; + } + if (DTthreads>0 && DTthreads omp_get_max_threads()) { - omp_set_num_threads( MIN(DTthreads, omp_get_thread_limit()) ); - } - } -#endif + + // Do not call omp_set_num_threads() here, and particularly not to omp_get_thread_limit() + // which is likely INT_MAX (unlimited). Any calls to omp_set_num_threads() affect other + // packages and R itself too which has some OpenMP usage. Instead we set our own DTthreads + // static global variable, read that from getDTthreads() and ensure num_threads(getDTthreads()) + // directive is always present via the grep in CRAN_Release.cmd. + return ScalarInteger(old); }