From 7cf58a2c6554cabf528556405b32d54863186fd2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 20 Oct 2022 12:01:18 +0800 Subject: [PATCH 001/126] Make 1.7.0rc1. (#8365) --- cmake/Python_version.in | 2 +- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 8 ++++---- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- python-package/xgboost/VERSION | 2 +- python-package/xgboost/core.py | 1 + 10 files changed, 19 insertions(+), 18 deletions(-) diff --git a/cmake/Python_version.in b/cmake/Python_version.in index 67b48af46bf1..ef2754d2d860 100644 --- a/cmake/Python_version.in +++ b/cmake/Python_version.in @@ -1 +1 @@ -@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@-dev \ No newline at end of file +@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@rc1 \ No newline at end of file diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index e07b987830a5..77343820fc4f 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index a6857364dcb2..443358b34220 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 xgboost4j-example_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 jar @@ -26,7 +26,7 @@ ml.dmlc xgboost4j-spark_${scala.binary.version} - 1.7.0-SNAPSHOT + 1.7.0-RC1 org.apache.spark @@ -37,7 +37,7 @@ ml.dmlc xgboost4j-flink_${scala.binary.version} - 1.7.0-SNAPSHOT + 1.7.0-RC1 org.apache.commons diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index f6ba1747d3ef..b2ed002a0a45 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 xgboost4j-flink_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.0-SNAPSHOT + 1.7.0-RC1 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 6e4651a4c39f..ee6c46b962ca 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 xgboost4j-gpu_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index bac014776f20..a6bbda378985 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.0-SNAPSHOT + 1.7.0-RC1 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 5e5e0becb189..440a6be9417d 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.0-SNAPSHOT + 1.7.0-RC1 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 9551ffcd97a5..adf60b92bd34 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 xgboost4j_2.12 - 1.7.0-SNAPSHOT + 1.7.0-RC1 jar diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index de023c91b16b..d88d8e71e7f8 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -1.7.0-dev +1.7.0rc1 diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index cd028ba0bedf..633cce452ca4 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -237,6 +237,7 @@ def parse(ver: str) -> Tuple[int, int, int]: """Avoid dependency on packaging (PEP 440).""" # 2.0.0-dev or 2.0.0 major, minor, patch = ver.split("-")[0].split(".") + patch = patch.split("rc")[0] # 2.0.0rc1 return int(major), int(minor), int(patch) libver = _lib_version(lib) From 463313d9be3a0aa6da65da852cfb2c1e2a026cd2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 20 Oct 2022 14:22:13 +0800 Subject: [PATCH 002/126] Remove cleanup script in R package. (#8370) --- Makefile | 1 + 1 file changed, 1 insertion(+) diff --git a/Makefile b/Makefile index c4411de3cdc0..1dc909ef26f7 100644 --- a/Makefile +++ b/Makefile @@ -126,6 +126,7 @@ Rpack: clean_all cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it + rm -f xgboost/cleanup bash R-package/remove_warning_suppression_pragma.sh bash xgboost/remove_warning_suppression_pragma.sh rm xgboost/remove_warning_suppression_pragma.sh From 153d995b581a2c75d28213a9beddb777f0d93e38 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Wed, 26 Oct 2022 00:43:10 -0700 Subject: [PATCH 003/126] Fix building XGBoost with libomp 15 (#8384) (#8387) --- CMakeLists.txt | 20 ++++++++++++++++++-- R-package/configure | 11 +++++++++-- R-package/configure.ac | 11 +++++++++-- 3 files changed, 36 insertions(+), 6 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 64aeae29c737..d2746176f185 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -171,8 +171,24 @@ if (USE_OPENMP) # Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating # OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706 cmake_minimum_required(VERSION 3.16) - endif (APPLE) - find_package(OpenMP REQUIRED) + find_package(OpenMP) + if (NOT OpenMP_FOUND) + # Try again with extra path info; required for libomp 15+ from Homebrew + execute_process(COMMAND brew --prefix libomp + OUTPUT_VARIABLE HOMEBREW_LIBOMP_PREFIX + OUTPUT_STRIP_TRAILING_WHITESPACE) + set(OpenMP_C_FLAGS + "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include") + set(OpenMP_CXX_FLAGS + "-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include") + set(OpenMP_C_LIB_NAMES omp) + set(OpenMP_CXX_LIB_NAMES omp) + set(OpenMP_omp_LIBRARY ${HOMEBREW_LIBOMP_PREFIX}/lib/libomp.dylib) + find_package(OpenMP REQUIRED) + endif () + else () + find_package(OpenMP REQUIRED) + endif () endif (USE_OPENMP) #Add for IBM i if (${CMAKE_SYSTEM_NAME} MATCHES "OS400") diff --git a/R-package/configure b/R-package/configure index 154ed08fa047..5092cbdf62b9 100755 --- a/R-package/configure +++ b/R-package/configure @@ -2709,8 +2709,15 @@ fi if test `uname -s` = "Darwin" then - OPENMP_CXXFLAGS='-Xclang -fopenmp' - OPENMP_LIB='-lomp' + if command -v brew &> /dev/null + then + HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp` + else + # Homebrew not found + HOMEBREW_LIBOMP_PREFIX='' + fi + OPENMP_CXXFLAGS="-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include" + OPENMP_LIB="-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib" ac_pkg_openmp=no { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5 $as_echo_n "checking whether OpenMP will work in a package... " >&6; } diff --git a/R-package/configure.ac b/R-package/configure.ac index 8be57fee79f3..fc36207b3d98 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -28,8 +28,15 @@ fi if test `uname -s` = "Darwin" then - OPENMP_CXXFLAGS='-Xclang -fopenmp' - OPENMP_LIB='-lomp' + if command -v brew &> /dev/null + then + HOMEBREW_LIBOMP_PREFIX=`brew --prefix libomp` + else + # Homebrew not found + HOMEBREW_LIBOMP_PREFIX='' + fi + OPENMP_CXXFLAGS="-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include" + OPENMP_LIB="-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib" ac_pkg_openmp=no AC_MSG_CHECKING([whether OpenMP will work in a package]) AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include ]], [[ return (omp_get_max_threads() <= 1); ]])]) From aa30ce10dabd0bc1d7362d46639e4c24e5254d93 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 26 Oct 2022 19:31:34 +0800 Subject: [PATCH 004/126] [backport][pyspark] Improve tutorial on enabling GPU support. (#8385) [skip ci] (#8391) - Quote the databricks doc on how to manage dependencies. - Some wording changes. Co-authored-by: Philip Hyunsu Cho --- doc/tutorials/spark_estimator.rst | 95 +++++++++++++++++++------------ 1 file changed, 60 insertions(+), 35 deletions(-) diff --git a/doc/tutorials/spark_estimator.rst b/doc/tutorials/spark_estimator.rst index acacada0b3eb..44e7a957513b 100644 --- a/doc/tutorials/spark_estimator.rst +++ b/doc/tutorials/spark_estimator.rst @@ -83,17 +83,52 @@ generate result dataset with 3 new columns: XGBoost PySpark GPU support *************************** -XGBoost PySpark supports GPU training and prediction. To enable GPU support, first you -need to install the XGBoost and the `cuDF `_ -package. Then you can set `use_gpu` parameter to `True`. +XGBoost PySpark fully supports GPU acceleration. Users are not only able to enable +efficient training but also utilize their GPUs for the whole PySpark pipeline including +ETL and inference. In below sections, we will walk through an example of training on a +PySpark standalone GPU cluster. To get started, first we need to install some additional +packages, then we can set the `use_gpu` parameter to `True`. -Below tutorial demonstrates how to train a model with XGBoost PySpark GPU on Spark -standalone cluster. +Prepare the necessary packages +============================== + +Aside from the PySpark and XGBoost modules, we also need the `cuDF +`_ package for handling Spark dataframe. We +recommend using either Conda or Virtualenv to manage python dependencies for PySpark +jobs. Please refer to `How to Manage Python Dependencies in PySpark +`_ +for more details on PySpark dependency management. + +In short, to create a Python environment that can be sent to a remote cluster using +virtualenv and pip: + +.. code-block:: bash + + python -m venv xgboost_env + source xgboost_env/bin/activate + pip install pyarrow pandas venv-pack xgboost + # https://rapids.ai/pip.html#install + pip install cudf-cu11 --extra-index-url=https://pypi.ngc.nvidia.com + venv-pack -o xgboost_env.tar.gz + +With Conda: + +.. code-block:: bash + + conda create -y -n xgboost_env -c conda-forge conda-pack python=3.9 + conda activate xgboost_env + # use conda when the supported version of xgboost (1.7) is released on conda-forge + pip install xgboost + conda install cudf pyarrow pandas -c rapids -c nvidia -c conda-forge + conda pack -f -o xgboost_env.tar.gz Write your PySpark application ============================== +Below snippet is a small example for training xgboost model with PySpark. Notice that we are +using a list of feature names and the additional parameter ``use_gpu``: + .. code-block:: python from xgboost.spark import SparkXGBRegressor @@ -127,26 +162,11 @@ Write your PySpark application predict_df = model.transform(test_df) predict_df.show() -Prepare the necessary packages -============================== - -We recommend using Conda or Virtualenv to manage python dependencies -in PySpark. Please refer to -`How to Manage Python Dependencies in PySpark `_. - -.. code-block:: bash - - conda create -y -n xgboost-env -c conda-forge conda-pack python=3.9 - conda activate xgboost-env - pip install xgboost - conda install cudf -c rapids -c nvidia -c conda-forge - conda pack -f -o xgboost-env.tar.gz - Submit the PySpark application ============================== -Assuming you have configured your Spark cluster with GPU support, if not yet, please +Assuming you have configured your Spark cluster with GPU support. Otherwise, please refer to `spark standalone configuration with GPU support `_. .. code-block:: bash @@ -158,10 +178,13 @@ refer to `spark standalone configuration with GPU support :7077 \ --conf spark.executor.resource.gpu.amount=1 \ --conf spark.task.resource.gpu.amount=1 \ - --archives xgboost-env.tar.gz#environment \ + --archives xgboost_env.tar.gz#environment \ xgboost_app.py +The submit command sends the Python environment created by pip or conda along with the +specification of GPU allocation. We will revisit this command later on. + Model Persistence ================= @@ -186,26 +209,27 @@ To export the underlying booster model used by XGBoost: # the same booster object returned by xgboost.train booster: xgb.Booster = model.get_booster() booster.predict(...) - booster.save_model("model.json") + booster.save_model("model.json") # or model.ubj, depending on your choice of format. -This booster is shared by other Python interfaces and can be used by other language -bindings like the C and R packages. Lastly, one can extract a booster file directly from -saved spark estimator without going through the getter: +This booster is not only shared by other Python interfaces but also used by all the +XGBoost bindings including the C, Java, and the R package. Lastly, one can extract the +booster file directly from a saved spark estimator without going through the getter: .. code-block:: python import xgboost as xgb bst = xgb.Booster() + # Loading the model saved in previous snippet bst.load_model("/tmp/xgboost-pyspark-model/model/part-00000") -Accelerate the whole pipeline of xgboost pyspark -================================================ -With `RAPIDS Accelerator for Apache Spark `_, -you can accelerate the whole pipeline (ETL, Train, Transform) for xgboost pyspark -without any code change by leveraging GPU. +Accelerate the whole pipeline for xgboost pyspark +================================================= -Below is a simple example submit command for enabling GPU acceleration: +With `RAPIDS Accelerator for Apache Spark `_, you +can leverage GPUs to accelerate the whole pipeline (ETL, Train, Transform) for xgboost +pyspark without any Python code change. An example submit command is shown below with +additional spark configurations and dependencies: .. code-block:: bash @@ -219,8 +243,9 @@ Below is a simple example submit command for enabling GPU acceleration: --packages com.nvidia:rapids-4-spark_2.12:22.08.0 \ --conf spark.plugins=com.nvidia.spark.SQLPlugin \ --conf spark.sql.execution.arrow.maxRecordsPerBatch=1000000 \ - --archives xgboost-env.tar.gz#environment \ + --archives xgboost_env.tar.gz#environment \ xgboost_app.py -When rapids plugin is enabled, both of the JVM rapids plugin and the cuDF Python are -required for the acceleration. +When rapids plugin is enabled, both of the JVM rapids plugin and the cuDF Python package +are required. More configuration options can be found in the RAPIDS link above along with +details on the plugin. From e17f7010bfca6d1d6f9386dabc95af1a07543335 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 26 Oct 2022 19:49:00 +0800 Subject: [PATCH 005/126] [backport][doc] Cleanup outdated documents for GPU. [skip ci] (#8378) (#8393) --- doc/gpu/index.rst | 150 +++------------------------------------------- 1 file changed, 8 insertions(+), 142 deletions(-) diff --git a/doc/gpu/index.rst b/doc/gpu/index.rst index 82309523f4cf..4187030c28fa 100644 --- a/doc/gpu/index.rst +++ b/doc/gpu/index.rst @@ -4,36 +4,21 @@ XGBoost GPU Support This page contains information about GPU algorithms supported in XGBoost. -.. note:: CUDA 10.1, Compute Capability 3.5 required - - The GPU algorithms in XGBoost require a graphics card with compute capability 3.5 or higher, with - CUDA toolkits 10.1 or later. - (See `this list `_ to look up compute capability of your GPU card.) +.. note:: CUDA 11.0, Compute Capability 5.0 required (See `this list `_ to look up compute capability of your GPU card.) ********************************************* CUDA Accelerated Tree Construction Algorithms ********************************************* -Tree construction (training) and prediction can be accelerated with CUDA-capable GPUs. + +Most of the algorithms in XGBoost including training, prediction and evaluation can be accelerated with CUDA-capable GPUs. Usage ===== -Specify the ``tree_method`` parameter as one of the following algorithms. - -Algorithms ----------- - -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ -| tree_method | Description | -+=======================+=======================================================================================================================================================================+ -| gpu_hist | Equivalent to the XGBoost fast histogram algorithm. Much faster and uses considerably less memory. NOTE: May run very slowly on GPUs older than Pascal architecture. | -+-----------------------+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------+ +Specify the ``tree_method`` parameter as ``gpu_hist``. For details around the ``tree_method`` parameter, see :doc:`tree method `. Supported parameters -------------------- -.. |tick| unicode:: U+2714 -.. |cross| unicode:: U+2718 - GPU accelerated prediction is enabled by default for the above mentioned ``tree_method`` parameters but can be switched to CPU prediction by setting ``predictor`` to ``cpu_predictor``. This could be useful if you want to conserve GPU memory. Likewise when using CPU algorithms, GPU accelerated prediction can be enabled by setting ``predictor`` to ``gpu_predictor``. The device ordinal (which GPU to use if you have many of them) can be selected using the @@ -69,128 +54,9 @@ See examples `here Multi-node Multi-GPU Training ============================= -XGBoost supports fully distributed GPU training using `Dask `_. For -getting started see our tutorial :doc:`/tutorials/dask` and worked examples `here -`__, also Python documentation -:ref:`dask_api` for complete reference. - - -Objective functions -=================== -Most of the objective functions implemented in XGBoost can be run on GPU. Following table shows current support status. - -+----------------------+-------------+ -| Objectives | GPU support | -+----------------------+-------------+ -| reg:squarederror | |tick| | -+----------------------+-------------+ -| reg:squaredlogerror | |tick| | -+----------------------+-------------+ -| reg:logistic | |tick| | -+----------------------+-------------+ -| reg:pseudohubererror | |tick| | -+----------------------+-------------+ -| binary:logistic | |tick| | -+----------------------+-------------+ -| binary:logitraw | |tick| | -+----------------------+-------------+ -| binary:hinge | |tick| | -+----------------------+-------------+ -| count:poisson | |tick| | -+----------------------+-------------+ -| reg:gamma | |tick| | -+----------------------+-------------+ -| reg:tweedie | |tick| | -+----------------------+-------------+ -| multi:softmax | |tick| | -+----------------------+-------------+ -| multi:softprob | |tick| | -+----------------------+-------------+ -| survival:cox | |cross| | -+----------------------+-------------+ -| survival:aft | |tick| | -+----------------------+-------------+ -| rank:pairwise | |tick| | -+----------------------+-------------+ -| rank:ndcg | |tick| | -+----------------------+-------------+ -| rank:map | |tick| | -+----------------------+-------------+ - -Objective will run on GPU if GPU updater (``gpu_hist``), otherwise they will run on CPU by -default. For unsupported objectives XGBoost will fall back to using CPU implementation by -default. Note that when using GPU ranking objective, the result is not deterministic due -to the non-associative aspect of floating point summation. - -Metric functions -=================== -Following table shows current support status for evaluation metrics on the GPU. - -+------------------------------+-------------+ -| Metric | GPU Support | -+==============================+=============+ -| rmse | |tick| | -+------------------------------+-------------+ -| rmsle | |tick| | -+------------------------------+-------------+ -| mae | |tick| | -+------------------------------+-------------+ -| mape | |tick| | -+------------------------------+-------------+ -| mphe | |tick| | -+------------------------------+-------------+ -| logloss | |tick| | -+------------------------------+-------------+ -| error | |tick| | -+------------------------------+-------------+ -| merror | |tick| | -+------------------------------+-------------+ -| mlogloss | |tick| | -+------------------------------+-------------+ -| auc | |tick| | -+------------------------------+-------------+ -| aucpr | |tick| | -+------------------------------+-------------+ -| ndcg | |tick| | -+------------------------------+-------------+ -| map | |tick| | -+------------------------------+-------------+ -| poisson-nloglik | |tick| | -+------------------------------+-------------+ -| gamma-nloglik | |tick| | -+------------------------------+-------------+ -| cox-nloglik | |cross| | -+------------------------------+-------------+ -| aft-nloglik | |tick| | -+------------------------------+-------------+ -| interval-regression-accuracy | |tick| | -+------------------------------+-------------+ -| gamma-deviance | |tick| | -+------------------------------+-------------+ -| tweedie-nloglik | |tick| | -+------------------------------+-------------+ - -Similar to objective functions, default device for metrics is selected based on tree -updater and predictor (which is selected based on tree updater). - -Benchmarks -========== -You can run benchmarks on synthetic data for binary classification: - -.. code-block:: bash - - python tests/benchmark/benchmark_tree.py --tree_method=gpu_hist - python tests/benchmark/benchmark_tree.py --tree_method=hist - -Training time on 1,000,000 rows x 50 columns of random data with 500 boosting iterations and 0.25/0.75 test/train split with AMD Ryzen 7 2700 8 core @3.20GHz and NVIDIA 1080ti yields the following results: - -+--------------+----------+ -| tree_method | Time (s) | -+==============+==========+ -| gpu_hist | 12.57 | -+--------------+----------+ -| hist | 36.01 | -+--------------+----------+ + +XGBoost supports fully distributed GPU training using `Dask `_, ``Spark`` and ``PySpark``. For getting started with Dask see our tutorial :doc:`/tutorials/dask` and worked examples `here `__, also Python documentation :ref:`dask_api` for complete reference. For usage with ``Spark`` using Scala see :doc:`/jvm/xgboost4j_spark_gpu_tutorial`. Lastly for distributed GPU training with ``PySpark``, see :doc:`/tutorials/spark_estimator`. + Memory usage ============ @@ -202,7 +68,7 @@ The dataset itself is stored on device in a compressed ELLPACK format. The ELLPA Working memory is allocated inside the algorithm proportional to the number of rows to keep track of gradients, tree positions and other per row statistics. Memory is allocated for histogram bins proportional to the number of bins, number of features and nodes in the tree. For performance reasons we keep histograms in memory from previous nodes in the tree, when a certain threshold of memory usage is passed we stop doing this to conserve memory at some performance loss. -If you are getting out-of-memory errors on a big dataset, try the or :py:class:`xgboost.DeviceQuantileDMatrix` or :doc:`external memory version `. +If you are getting out-of-memory errors on a big dataset, try the or :py:class:`xgboost.QuantileDMatrix` or :doc:`external memory version `. Note that when ``external memory`` is used for GPU hist, it's best to employ gradient based sampling as well. Last but not least, ``inplace_predict`` can be preferred over ``predict`` when data is already on GPU. Both ``QuantileDMatrix`` and ``inplace_predict`` are automatically enabled if you are using the scikit-learn interface. Developer notes =============== From 3f92970a39033ad0d46956706402c2cbba428240 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 26 Oct 2022 20:46:38 +0800 Subject: [PATCH 006/126] [backport] Fix CUDA async stream. (#8380) (#8392) --- src/objective/adaptive.cu | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/objective/adaptive.cu b/src/objective/adaptive.cu index aa01370b05fa..f77f4a4a858b 100644 --- a/src/objective/adaptive.cu +++ b/src/objective/adaptive.cu @@ -67,6 +67,10 @@ void EncodeTreeLeafDevice(Context const* ctx, common::Span pos auto pinned = pinned_pool.GetSpan(sizeof(size_t) + sizeof(bst_node_t)); dh::CUDAStream copy_stream; size_t* h_num_runs = reinterpret_cast(pinned.subspan(0, sizeof(size_t)).data()); + + dh::CUDAEvent e; + e.Record(dh::DefaultStream()); + copy_stream.View().Wait(e); // flag for whether there's ignored position bst_node_t* h_first_unique = reinterpret_cast(pinned.subspan(sizeof(size_t), sizeof(bst_node_t)).data()); From e43cd60c0ea298edc87ffeb96f9e638a0a8fda03 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 26 Oct 2022 20:47:16 +0800 Subject: [PATCH 007/126] [backport] Type fix for WebAssembly. (#8369) (#8394) Co-authored-by: Yizhi Liu --- python-package/xgboost/core.py | 2 +- python-package/xgboost/data.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index 633cce452ca4..aa5014c29d11 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -2308,7 +2308,7 @@ def inplace_predict( _array_interface(csr.indptr), _array_interface(csr.indices), _array_interface(csr.data), - ctypes.c_size_t(csr.shape[1]), + c_bst_ulong(csr.shape[1]), from_pystr_to_cstr(json.dumps(args)), p_handle, ctypes.byref(shape), diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index e486fb57a5f5..775eedd5776f 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -103,7 +103,7 @@ def _from_scipy_csr( _array_interface(data.indptr), _array_interface(data.indices), _array_interface(data.data), - ctypes.c_size_t(data.shape[1]), + c_bst_ulong(data.shape[1]), config, ctypes.byref(handle), ) From 4bc59ef7c33061d17820137253d617b051a72d65 Mon Sep 17 00:00:00 2001 From: Hyunsu Cho Date: Mon, 31 Oct 2022 10:53:07 -0700 Subject: [PATCH 008/126] Release 1.7 --- cmake/Python_version.in | 2 +- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 8 ++++---- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- python-package/xgboost/VERSION | 2 +- 9 files changed, 18 insertions(+), 18 deletions(-) diff --git a/cmake/Python_version.in b/cmake/Python_version.in index ef2754d2d860..c55458ec2c28 100644 --- a/cmake/Python_version.in +++ b/cmake/Python_version.in @@ -1 +1 @@ -@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@rc1 \ No newline at end of file +@xgboost_VERSION_MAJOR@.@xgboost_VERSION_MINOR@.@xgboost_VERSION_PATCH@ diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 77343820fc4f..f205dedb6aa6 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-RC1 + 1.7.0 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 443358b34220..2a1a4c05da23 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-RC1 + 1.7.0 xgboost4j-example_2.12 - 1.7.0-RC1 + 1.7.0 jar @@ -26,7 +26,7 @@ ml.dmlc xgboost4j-spark_${scala.binary.version} - 1.7.0-RC1 + 1.7.0 org.apache.spark @@ -37,7 +37,7 @@ ml.dmlc xgboost4j-flink_${scala.binary.version} - 1.7.0-RC1 + 1.7.0 org.apache.commons diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index b2ed002a0a45..5fea708dad34 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-RC1 + 1.7.0 xgboost4j-flink_2.12 - 1.7.0-RC1 + 1.7.0 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.0-RC1 + 1.7.0 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index ee6c46b962ca..13f2b0f779ca 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-RC1 + 1.7.0 xgboost4j-gpu_2.12 - 1.7.0-RC1 + 1.7.0 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index a6bbda378985..744b28d34f07 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-RC1 + 1.7.0 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.0-RC1 + 1.7.0 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 440a6be9417d..fbd69fbcead4 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-RC1 + 1.7.0 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.0-RC1 + 1.7.0 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index adf60b92bd34..711f30de4929 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0-RC1 + 1.7.0 xgboost4j_2.12 - 1.7.0-RC1 + 1.7.0 jar diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index d88d8e71e7f8..bd8bf882d061 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -1.7.0rc1 +1.7.0 From 5b76acccffb42eb058934a095b34a1022ed96dd8 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Wed, 2 Nov 2022 07:56:55 -0700 Subject: [PATCH 009/126] Add back xgboost.rabit for backwards compatibility (#8408) (#8411) --- python-package/xgboost/__init__.py | 2 +- python-package/xgboost/rabit.py | 168 +++++++++++++++++++++++++++++ tests/python/test_collective.py | 31 ++++++ 3 files changed, 200 insertions(+), 1 deletion(-) create mode 100644 python-package/xgboost/rabit.py diff --git a/python-package/xgboost/__init__.py b/python-package/xgboost/__init__.py index 220093b47c4c..f17ac23ba61c 100644 --- a/python-package/xgboost/__init__.py +++ b/python-package/xgboost/__init__.py @@ -4,7 +4,7 @@ """ from . import tracker # noqa -from . import collective, dask +from . import collective, dask, rabit from .core import ( Booster, DataIter, diff --git a/python-package/xgboost/rabit.py b/python-package/xgboost/rabit.py new file mode 100644 index 000000000000..258ec4b6da4e --- /dev/null +++ b/python-package/xgboost/rabit.py @@ -0,0 +1,168 @@ +"""Compatibility shim for xgboost.rabit; to be removed in 2.0""" +import logging +import warnings +from enum import IntEnum, unique +from typing import Any, TypeVar, Callable, Optional, List + +import numpy as np + +from . import collective + +LOGGER = logging.getLogger("[xgboost.rabit]") + + +def _deprecation_warning() -> str: + return ( + "The xgboost.rabit submodule is marked as deprecated in 1.7 and will be removed " + "in 2.0. Please use xgboost.collective instead." + ) + + +def init(args: Optional[List[bytes]] = None) -> None: + """Initialize the rabit library with arguments""" + warnings.warn(_deprecation_warning(), FutureWarning) + parsed = {} + if args: + for arg in args: + kv = arg.decode().split('=') + if len(kv) == 2: + parsed[kv[0]] = kv[1] + collective.init(**parsed) + + +def finalize() -> None: + """Finalize the process, notify tracker everything is done.""" + collective.finalize() + + +def get_rank() -> int: + """Get rank of current process. + Returns + ------- + rank : int + Rank of current process. + """ + return collective.get_rank() + + +def get_world_size() -> int: + """Get total number workers. + Returns + ------- + n : int + Total number of process. + """ + return collective.get_world_size() + + +def is_distributed() -> int: + """If rabit is distributed.""" + return collective.is_distributed() + + +def tracker_print(msg: Any) -> None: + """Print message to the tracker. + This function can be used to communicate the information of + the progress to the tracker + Parameters + ---------- + msg : str + The message to be printed to tracker. + """ + collective.communicator_print(msg) + + +def get_processor_name() -> bytes: + """Get the processor name. + Returns + ------- + name : str + the name of processor(host) + """ + return collective.get_processor_name().encode() + + +T = TypeVar("T") # pylint:disable=invalid-name + + +def broadcast(data: T, root: int) -> T: + """Broadcast object from one node to all other nodes. + Parameters + ---------- + data : any type that can be pickled + Input data, if current rank does not equal root, this can be None + root : int + Rank of the node to broadcast data from. + Returns + ------- + object : int + the result of broadcast. + """ + return collective.broadcast(data, root) + + +@unique +class Op(IntEnum): + """Supported operations for rabit.""" + MAX = 0 + MIN = 1 + SUM = 2 + OR = 3 + + +def allreduce( # pylint:disable=invalid-name + data: np.ndarray, op: Op, prepare_fun: Optional[Callable[[np.ndarray], None]] = None +) -> np.ndarray: + """Perform allreduce, return the result. + Parameters + ---------- + data : + Input data. + op : + Reduction operators, can be MIN, MAX, SUM, BITOR + prepare_fun : + Lazy preprocessing function, if it is not None, prepare_fun(data) + will be called by the function before performing allreduce, to initialize the data + If the result of Allreduce can be recovered directly, + then prepare_fun will NOT be called + Returns + ------- + result : + The result of allreduce, have same shape as data + Notes + ----- + This function is not thread-safe. + """ + if prepare_fun is None: + return collective.allreduce(data, collective.Op(op)) + raise Exception("preprocessing function is no longer supported") + + +def version_number() -> int: + """Returns version number of current stored model. + This means how many calls to CheckPoint we made so far. + Returns + ------- + version : int + Version number of currently stored model + """ + return 0 + + +class RabitContext: + """A context controlling rabit initialization and finalization.""" + + def __init__(self, args: List[bytes] = None) -> None: + if args is None: + args = [] + self.args = args + + def __enter__(self) -> None: + init(self.args) + assert is_distributed() + LOGGER.warning(_deprecation_warning()) + LOGGER.debug("-------------- rabit say hello ------------------") + + def __exit__(self, *args: List) -> None: + finalize() + LOGGER.debug("--------------- rabit say bye ------------------") diff --git a/tests/python/test_collective.py b/tests/python/test_collective.py index f7de0400d21f..32b0a67a76e3 100644 --- a/tests/python/test_collective.py +++ b/tests/python/test_collective.py @@ -39,6 +39,37 @@ def test_rabit_communicator(): assert worker.exitcode == 0 +# TODO(rongou): remove this once we remove the rabit api. +def run_rabit_api_worker(rabit_env, world_size): + with xgb.rabit.RabitContext(rabit_env): + assert xgb.rabit.get_world_size() == world_size + assert xgb.rabit.is_distributed() + assert xgb.rabit.get_processor_name().decode() == socket.gethostname() + ret = xgb.rabit.broadcast('test1234', 0) + assert str(ret) == 'test1234' + ret = xgb.rabit.allreduce(np.asarray([1, 2, 3]), xgb.rabit.Op.SUM) + assert np.array_equal(ret, np.asarray([2, 4, 6])) + + +# TODO(rongou): remove this once we remove the rabit api. +def test_rabit_api(): + world_size = 2 + tracker = RabitTracker(host_ip='127.0.0.1', n_workers=world_size) + tracker.start(world_size) + rabit_env = [] + for k, v in tracker.worker_envs().items(): + rabit_env.append(f"{k}={v}".encode()) + workers = [] + for _ in range(world_size): + worker = multiprocessing.Process(target=run_rabit_api_worker, + args=(rabit_env, world_size)) + workers.append(worker) + worker.start() + for worker in workers: + worker.join() + assert worker.exitcode == 0 + + def run_federated_worker(port, world_size, rank): with xgb.collective.CommunicatorContext(xgboost_communicator='federated', federated_server_address=f'localhost:{port}', From 534c940a7ea50ab3b8a827546ac9908f859379f2 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Thu, 3 Nov 2022 15:37:54 -0700 Subject: [PATCH 010/126] Release 1.7.1 (#8413) * Release 1.7.1 * Review comment --- CMakeLists.txt | 2 +- R-package/DESCRIPTION | 4 ++-- R-package/configure | 18 +++++++++--------- R-package/configure.ac | 2 +- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 8 ++++---- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- python-package/xgboost/VERSION | 2 +- 12 files changed, 30 insertions(+), 30 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d2746176f185..6a004fbe3b82 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) -project(xgboost LANGUAGES CXX C VERSION 1.7.0) +project(xgboost LANGUAGES CXX C VERSION 1.7.1) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") cmake_policy(SET CMP0022 NEW) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 2f77f9734ad0..6fd158a1c3aa 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 1.7.0.1 -Date: 2022-10-18 +Version: 1.7.1.1 +Date: 2022-11-03 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), diff --git a/R-package/configure b/R-package/configure index 5092cbdf62b9..395510ad5812 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for xgboost 1.7.0. +# Generated by GNU Autoconf 2.69 for xgboost 1.7.1. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -576,8 +576,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='1.7.0' -PACKAGE_STRING='xgboost 1.7.0' +PACKAGE_VERSION='1.7.1' +PACKAGE_STRING='xgboost 1.7.1' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 1.7.0 to adapt to many kinds of systems. +\`configure' configures xgboost 1.7.1 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1257,7 +1257,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 1.7.0:";; + short | recursive ) echo "Configuration of xgboost 1.7.1:";; esac cat <<\_ACEOF @@ -1336,7 +1336,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 1.7.0 +xgboost configure 1.7.1 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 1.7.0, which was +It was created by xgboost $as_me 1.7.1, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 1.7.0, which was +This file was extended by xgboost $as_me 1.7.1, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3347,7 +3347,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -xgboost config.status 1.7.0 +xgboost config.status 1.7.1 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index fc36207b3d98..0215ec634df9 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[1.7.0],[],[xgboost],[]) +AC_INIT([xgboost],[1.7.1],[],[xgboost],[]) # Use this line to set CC variable to a C compiler AC_PROG_CC diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index f205dedb6aa6..f39b5dcfa47f 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0 + 1.7.1 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 2a1a4c05da23..2f27ae924242 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0 + 1.7.1 xgboost4j-example_2.12 - 1.7.0 + 1.7.1 jar @@ -26,7 +26,7 @@ ml.dmlc xgboost4j-spark_${scala.binary.version} - 1.7.0 + 1.7.1 org.apache.spark @@ -37,7 +37,7 @@ ml.dmlc xgboost4j-flink_${scala.binary.version} - 1.7.0 + 1.7.1 org.apache.commons diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 5fea708dad34..e90b74d27e95 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0 + 1.7.1 xgboost4j-flink_2.12 - 1.7.0 + 1.7.1 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.0 + 1.7.1 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 13f2b0f779ca..5d97e8681d39 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0 + 1.7.1 xgboost4j-gpu_2.12 - 1.7.0 + 1.7.1 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 744b28d34f07..f5a3587fe1e6 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0 + 1.7.1 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.0 + 1.7.1 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index fbd69fbcead4..66f9859926a7 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0 + 1.7.1 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.0 + 1.7.1 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 711f30de4929..bc9654f36dd0 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.0 + 1.7.1 xgboost4j_2.12 - 1.7.0 + 1.7.1 jar diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index bd8bf882d061..943f9cbc4ec7 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -1.7.0 +1.7.1 From 9ff0c0832a22ee318b85dbe24139f11414710d86 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 6 Nov 2022 03:19:54 +0800 Subject: [PATCH 011/126] Fix 1.7.1 version file. (#8427) --- include/xgboost/version_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index b8b4517f0468..b91107f068b5 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MINOR 7 -#define XGBOOST_VER_PATCH 0 +#define XGBOOST_VER_PATCH 1 #endif // XGBOOST_VERSION_CONFIG_H_ From a347cd512b45cc9293f8e8496fc6b8e33959339d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 9 Nov 2022 07:12:46 +0800 Subject: [PATCH 012/126] [backport] [R] Fix CRAN test notes. (#8428) (#8440) - Limit the number of used CPU cores in examples. - Add a note for the constraint. - Bring back the cleanup script. --- .github/workflows/r_tests.yml | 6 ++++++ R-package/DESCRIPTION | 2 +- R-package/R/callbacks.R | 12 +++++++----- R-package/R/xgb.DMatrix.R | 14 +++++++------- R-package/R/xgb.DMatrix.save.R | 2 +- R-package/R/xgb.create.features.R | 14 +++++++++----- R-package/R/xgb.cv.R | 6 +++--- R-package/R/xgb.train.R | 4 ++-- R-package/man/cb.gblinear.history.Rd | 12 +++++++----- R-package/man/dim.xgb.DMatrix.Rd | 2 +- R-package/man/dimnames.xgb.DMatrix.Rd | 2 +- R-package/man/getinfo.Rd | 2 +- R-package/man/print.xgb.DMatrix.Rd | 2 +- R-package/man/setinfo.Rd | 2 +- R-package/man/slice.xgb.DMatrix.Rd | 2 +- R-package/man/xgb.DMatrix.Rd | 2 +- R-package/man/xgb.DMatrix.save.Rd | 2 +- R-package/man/xgb.create.features.Rd | 12 ++++++++---- R-package/man/xgb.cv.Rd | 4 ++-- R-package/man/xgb.train.Rd | 4 ++-- doc/contrib/release.rst | 20 +++++++++++++++++--- doc/tutorials/dask.rst | 2 +- 22 files changed, 81 insertions(+), 49 deletions(-) diff --git a/.github/workflows/r_tests.yml b/.github/workflows/r_tests.yml index 6bc95941d250..6174ea92046d 100644 --- a/.github/workflows/r_tests.yml +++ b/.github/workflows/r_tests.yml @@ -5,6 +5,7 @@ on: [push, pull_request] env: R_PACKAGES: c('XML', 'data.table', 'ggplot2', 'DiagrammeR', 'Ckmeans.1d.dp', 'vcd', 'testthat', 'lintr', 'knitr', 'rmarkdown', 'e1071', 'cplm', 'devtools', 'float', 'titanic') GITHUB_PAT: ${{ secrets.GITHUB_TOKEN }} + _R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5 permissions: contents: read # to fetch code (actions/checkout) @@ -68,6 +69,7 @@ jobs: - {os: windows-latest, r: 'release', compiler: 'mingw', build: 'cmake'} env: R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + _R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5 RSPM: ${{ matrix.config.rspm }} steps: @@ -121,6 +123,10 @@ jobs: config: - {r: 'release'} + env: + _R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_: 2.5 + MAKE: "make -j$(nproc)" + steps: - uses: actions/checkout@v2 with: diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 6fd158a1c3aa..d4e7375c64bb 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -66,5 +66,5 @@ Imports: methods, data.table (>= 1.9.6), jsonlite (>= 1.0), -RoxygenNote: 7.1.1 +RoxygenNote: 7.2.1 SystemRequirements: GNU make, C++14 diff --git a/R-package/R/callbacks.R b/R-package/R/callbacks.R index d5c69f52d3a8..fa947346937d 100644 --- a/R-package/R/callbacks.R +++ b/R-package/R/callbacks.R @@ -544,9 +544,11 @@ cb.cv.predict <- function(save_models = FALSE) { #' #' @return #' Results are stored in the \code{coefs} element of the closure. -#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it. +#' The \code{\link{xgb.gblinear.history}} convenience function provides an easy +#' way to access it. #' With \code{xgb.train}, it is either a dense of a sparse matrix. -#' While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices. +#' While with \code{xgb.cv}, it is a list (an element per each fold) of such +#' matrices. #' #' @seealso #' \code{\link{callbacks}}, \code{\link{xgb.gblinear.history}}. @@ -558,7 +560,7 @@ cb.cv.predict <- function(save_models = FALSE) { #' # without considering the 2nd order interactions: #' x <- model.matrix(Species ~ .^2, iris)[,-1] #' colnames(x) -#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor")) +#' dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2) #' param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc", #' lambda = 0.0003, alpha = 0.0003, nthread = 2) #' # For 'shotgun', which is a default linear updater, using high eta values may result in @@ -583,14 +585,14 @@ cb.cv.predict <- function(save_models = FALSE) { #' #' # For xgb.cv: #' bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8, -#' callbacks = list(cb.gblinear.history())) +#' callbacks = list(cb.gblinear.history())) #' # coefficients in the CV fold #3 #' matplot(xgb.gblinear.history(bst)[[3]], type = 'l') #' #' #' #### Multiclass classification: #' # -#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1) +#' dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2) #' param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, #' lambda = 0.0003, alpha = 0.0003, nthread = 2) #' # For the default linear updater 'shotgun' it sometimes is helpful diff --git a/R-package/R/xgb.DMatrix.R b/R-package/R/xgb.DMatrix.R index 3aade2396d0f..b843d651833a 100644 --- a/R-package/R/xgb.DMatrix.R +++ b/R-package/R/xgb.DMatrix.R @@ -18,7 +18,7 @@ #' #' @examples #' data(agaricus.train, package='xgboost') -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data') @@ -110,7 +110,7 @@ xgb.get.DMatrix <- function(data, label = NULL, missing = NA, weight = NULL, nth #' @examples #' data(agaricus.train, package='xgboost') #' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) +#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2) #' #' stopifnot(nrow(dtrain) == nrow(train$data)) #' stopifnot(ncol(dtrain) == ncol(train$data)) @@ -138,7 +138,7 @@ dim.xgb.DMatrix <- function(x) { #' @examples #' data(agaricus.train, package='xgboost') #' train <- agaricus.train -#' dtrain <- xgb.DMatrix(train$data, label=train$label) +#' dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2) #' dimnames(dtrain) #' colnames(dtrain) #' colnames(dtrain) <- make.names(1:ncol(train$data)) @@ -193,7 +193,7 @@ dimnames.xgb.DMatrix <- function(x) { #' #' @examples #' data(agaricus.train, package='xgboost') -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) #' #' labels <- getinfo(dtrain, 'label') #' setinfo(dtrain, 'label', 1-labels) @@ -249,7 +249,7 @@ getinfo.xgb.DMatrix <- function(object, name, ...) { #' #' @examples #' data(agaricus.train, package='xgboost') -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) #' #' labels <- getinfo(dtrain, 'label') #' setinfo(dtrain, 'label', 1-labels) @@ -345,7 +345,7 @@ setinfo.xgb.DMatrix <- function(object, name, info, ...) { #' #' @examples #' data(agaricus.train, package='xgboost') -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) #' #' dsub <- slice(dtrain, 1:42) #' labels1 <- getinfo(dsub, 'label') @@ -401,7 +401,7 @@ slice.xgb.DMatrix <- function(object, idxset, ...) { #' #' @examples #' data(agaricus.train, package='xgboost') -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) #' #' dtrain #' print(dtrain, verbose=TRUE) diff --git a/R-package/R/xgb.DMatrix.save.R b/R-package/R/xgb.DMatrix.save.R index e11b793b7d4b..91edb32d0c94 100644 --- a/R-package/R/xgb.DMatrix.save.R +++ b/R-package/R/xgb.DMatrix.save.R @@ -7,7 +7,7 @@ #' #' @examples #' data(agaricus.train, package='xgboost') -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) #' xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') #' dtrain <- xgb.DMatrix('xgb.DMatrix.data') #' if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data') diff --git a/R-package/R/xgb.create.features.R b/R-package/R/xgb.create.features.R index f00ac9314ce8..98b03ea8a984 100644 --- a/R-package/R/xgb.create.features.R +++ b/R-package/R/xgb.create.features.R @@ -48,8 +48,8 @@ #' @examples #' data(agaricus.train, package='xgboost') #' data(agaricus.test, package='xgboost') -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) -#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) +#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2)) #' #' param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic') #' nrounds = 4 @@ -65,8 +65,12 @@ #' new.features.test <- xgb.create.features(model = bst, agaricus.test$data) #' #' # learning with new features -#' new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label) -#' new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label) +#' new.dtrain <- xgb.DMatrix( +#' data = new.features.train, label = agaricus.train$label, nthread = 2 +#' ) +#' new.dtest <- xgb.DMatrix( +#' data = new.features.test, label = agaricus.test$label, nthread = 2 +#' ) #' watchlist <- list(train = new.dtrain) #' bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2) #' @@ -79,7 +83,7 @@ #' accuracy.after, "!\n")) #' #' @export -xgb.create.features <- function(model, data, ...){ +xgb.create.features <- function(model, data, ...) { check.deprecation(...) pred_with_leaf <- predict(model, data, predleaf = TRUE) cols <- lapply(as.data.frame(pred_with_leaf), factor) diff --git a/R-package/R/xgb.cv.R b/R-package/R/xgb.cv.R index c07d452c1522..a7c009db6329 100644 --- a/R-package/R/xgb.cv.R +++ b/R-package/R/xgb.cv.R @@ -110,9 +110,9 @@ #' #' @examples #' data(agaricus.train, package='xgboost') -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) #' cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"), -#' max_depth = 3, eta = 1, objective = "binary:logistic") +#' max_depth = 3, eta = 1, objective = "binary:logistic") #' print(cv) #' print(cv, verbose=TRUE) #' @@ -192,7 +192,7 @@ xgb.cv <- function(params=list(), data, nrounds, nfold, label = NULL, missing = # create the booster-folds # train_folds - dall <- xgb.get.DMatrix(data, label, missing) + dall <- xgb.get.DMatrix(data, label, missing, nthread = params$nthread) bst_folds <- lapply(seq_along(folds), function(k) { dtest <- slice(dall, folds[[k]]) # code originally contributed by @RolandASc on stackoverflow diff --git a/R-package/R/xgb.train.R b/R-package/R/xgb.train.R index 580449791a62..f23700511684 100644 --- a/R-package/R/xgb.train.R +++ b/R-package/R/xgb.train.R @@ -192,8 +192,8 @@ #' data(agaricus.train, package='xgboost') #' data(agaricus.test, package='xgboost') #' -#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) -#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label)) +#' dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) +#' dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2)) #' watchlist <- list(train = dtrain, eval = dtest) #' #' ## A simple xgb.train example: diff --git a/R-package/man/cb.gblinear.history.Rd b/R-package/man/cb.gblinear.history.Rd index b111bd41dda1..fbbb56dbefc6 100644 --- a/R-package/man/cb.gblinear.history.Rd +++ b/R-package/man/cb.gblinear.history.Rd @@ -15,9 +15,11 @@ selected per iteration.} } \value{ Results are stored in the \code{coefs} element of the closure. -The \code{\link{xgb.gblinear.history}} convenience function provides an easy way to access it. +The \code{\link{xgb.gblinear.history}} convenience function provides an easy +way to access it. With \code{xgb.train}, it is either a dense of a sparse matrix. -While with \code{xgb.cv}, it is a list (an element per each fold) of such matrices. +While with \code{xgb.cv}, it is a list (an element per each fold) of such +matrices. } \description{ Callback closure for collecting the model coefficients history of a gblinear booster @@ -38,7 +40,7 @@ Callback function expects the following values to be set in its calling frame: # without considering the 2nd order interactions: x <- model.matrix(Species ~ .^2, iris)[,-1] colnames(x) -dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor")) +dtrain <- xgb.DMatrix(scale(x), label = 1*(iris$Species == "versicolor"), nthread = 2) param <- list(booster = "gblinear", objective = "reg:logistic", eval_metric = "auc", lambda = 0.0003, alpha = 0.0003, nthread = 2) # For 'shotgun', which is a default linear updater, using high eta values may result in @@ -63,14 +65,14 @@ matplot(xgb.gblinear.history(bst), type = 'l') # For xgb.cv: bst <- xgb.cv(param, dtrain, nfold = 5, nrounds = 100, eta = 0.8, - callbacks = list(cb.gblinear.history())) + callbacks = list(cb.gblinear.history())) # coefficients in the CV fold #3 matplot(xgb.gblinear.history(bst)[[3]], type = 'l') #### Multiclass classification: # -dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1) +dtrain <- xgb.DMatrix(scale(x), label = as.numeric(iris$Species) - 1, nthread = 2) param <- list(booster = "gblinear", objective = "multi:softprob", num_class = 3, lambda = 0.0003, alpha = 0.0003, nthread = 2) # For the default linear updater 'shotgun' it sometimes is helpful diff --git a/R-package/man/dim.xgb.DMatrix.Rd b/R-package/man/dim.xgb.DMatrix.Rd index 76c53dec2614..c9ea79177da3 100644 --- a/R-package/man/dim.xgb.DMatrix.Rd +++ b/R-package/man/dim.xgb.DMatrix.Rd @@ -19,7 +19,7 @@ be directly used with an \code{xgb.DMatrix} object. \examples{ data(agaricus.train, package='xgboost') train <- agaricus.train -dtrain <- xgb.DMatrix(train$data, label=train$label) +dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2) stopifnot(nrow(dtrain) == nrow(train$data)) stopifnot(ncol(dtrain) == ncol(train$data)) diff --git a/R-package/man/dimnames.xgb.DMatrix.Rd b/R-package/man/dimnames.xgb.DMatrix.Rd index 032cb95a55dd..9f2a07d8d592 100644 --- a/R-package/man/dimnames.xgb.DMatrix.Rd +++ b/R-package/man/dimnames.xgb.DMatrix.Rd @@ -26,7 +26,7 @@ Since row names are irrelevant, it is recommended to use \code{colnames} directl \examples{ data(agaricus.train, package='xgboost') train <- agaricus.train -dtrain <- xgb.DMatrix(train$data, label=train$label) +dtrain <- xgb.DMatrix(train$data, label=train$label, nthread = 2) dimnames(dtrain) colnames(dtrain) colnames(dtrain) <- make.names(1:ncol(train$data)) diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index 246860bbd3f4..63222f3412c4 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -34,7 +34,7 @@ The \code{name} field can be one of the following: } \examples{ data(agaricus.train, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) labels <- getinfo(dtrain, 'label') setinfo(dtrain, 'label', 1-labels) diff --git a/R-package/man/print.xgb.DMatrix.Rd b/R-package/man/print.xgb.DMatrix.Rd index f6cb2f62bea6..72c3d35e9043 100644 --- a/R-package/man/print.xgb.DMatrix.Rd +++ b/R-package/man/print.xgb.DMatrix.Rd @@ -19,7 +19,7 @@ Currently it displays dimensions and presence of info-fields and colnames. } \examples{ data(agaricus.train, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) dtrain print(dtrain, verbose=TRUE) diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd index 696a04c2ddea..c29fcfa7e976 100644 --- a/R-package/man/setinfo.Rd +++ b/R-package/man/setinfo.Rd @@ -33,7 +33,7 @@ The \code{name} field can be one of the following: } \examples{ data(agaricus.train, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) labels <- getinfo(dtrain, 'label') setinfo(dtrain, 'label', 1-labels) diff --git a/R-package/man/slice.xgb.DMatrix.Rd b/R-package/man/slice.xgb.DMatrix.Rd index 5fffc203410a..cb65083e2aa2 100644 --- a/R-package/man/slice.xgb.DMatrix.Rd +++ b/R-package/man/slice.xgb.DMatrix.Rd @@ -28,7 +28,7 @@ original xgb.DMatrix object } \examples{ data(agaricus.train, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) dsub <- slice(dtrain, 1:42) labels1 <- getinfo(dsub, 'label') diff --git a/R-package/man/xgb.DMatrix.Rd b/R-package/man/xgb.DMatrix.Rd index 52a31cfd1435..742073faddc4 100644 --- a/R-package/man/xgb.DMatrix.Rd +++ b/R-package/man/xgb.DMatrix.Rd @@ -38,7 +38,7 @@ Supported input file formats are either a LIBSVM text file or a binary file that } \examples{ data(agaricus.train, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') dtrain <- xgb.DMatrix('xgb.DMatrix.data') if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data') diff --git a/R-package/man/xgb.DMatrix.save.Rd b/R-package/man/xgb.DMatrix.save.Rd index 63e6930a874b..40151605954f 100644 --- a/R-package/man/xgb.DMatrix.save.Rd +++ b/R-package/man/xgb.DMatrix.save.Rd @@ -16,7 +16,7 @@ Save xgb.DMatrix object to binary file } \examples{ data(agaricus.train, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) xgb.DMatrix.save(dtrain, 'xgb.DMatrix.data') dtrain <- xgb.DMatrix('xgb.DMatrix.data') if (file.exists('xgb.DMatrix.data')) file.remove('xgb.DMatrix.data') diff --git a/R-package/man/xgb.create.features.Rd b/R-package/man/xgb.create.features.Rd index 52ac052cb71d..64d4af158ef3 100644 --- a/R-package/man/xgb.create.features.Rd +++ b/R-package/man/xgb.create.features.Rd @@ -59,8 +59,8 @@ a rule on certain features." \examples{ data(agaricus.train, package='xgboost') data(agaricus.test, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) -dtest <- with(agaricus.test, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) +dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2)) param <- list(max_depth=2, eta=1, silent=1, objective='binary:logistic') nrounds = 4 @@ -76,8 +76,12 @@ new.features.train <- xgb.create.features(model = bst, agaricus.train$data) new.features.test <- xgb.create.features(model = bst, agaricus.test$data) # learning with new features -new.dtrain <- xgb.DMatrix(data = new.features.train, label = agaricus.train$label) -new.dtest <- xgb.DMatrix(data = new.features.test, label = agaricus.test$label) +new.dtrain <- xgb.DMatrix( + data = new.features.train, label = agaricus.train$label, nthread = 2 +) +new.dtest <- xgb.DMatrix( + data = new.features.test, label = agaricus.test$label, nthread = 2 +) watchlist <- list(train = new.dtrain) bst <- xgb.train(params = param, data = new.dtrain, nrounds = nrounds, nthread = 2) diff --git a/R-package/man/xgb.cv.Rd b/R-package/man/xgb.cv.Rd index 09f5091bd84f..8a962423de5e 100644 --- a/R-package/man/xgb.cv.Rd +++ b/R-package/man/xgb.cv.Rd @@ -158,9 +158,9 @@ Adapted from \url{https://en.wikipedia.org/wiki/Cross-validation_\%28statistics\ } \examples{ data(agaricus.train, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) cv <- xgb.cv(data = dtrain, nrounds = 3, nthread = 2, nfold = 5, metrics = list("rmse","auc"), - max_depth = 3, eta = 1, objective = "binary:logistic") + max_depth = 3, eta = 1, objective = "binary:logistic") print(cv) print(cv, verbose=TRUE) diff --git a/R-package/man/xgb.train.Rd b/R-package/man/xgb.train.Rd index 5313bb15eea4..b2c9f9edc6d4 100644 --- a/R-package/man/xgb.train.Rd +++ b/R-package/man/xgb.train.Rd @@ -241,8 +241,8 @@ The following callbacks are automatically created when certain parameters are se data(agaricus.train, package='xgboost') data(agaricus.test, package='xgboost') -dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label)) -dtest <- with(agaricus.test, xgb.DMatrix(data, label = label)) +dtrain <- with(agaricus.train, xgb.DMatrix(data, label = label, nthread = 2)) +dtest <- with(agaricus.test, xgb.DMatrix(data, label = label, nthread = 2)) watchlist <- list(train = dtrain, eval = dtest) ## A simple xgb.train example: diff --git a/doc/contrib/release.rst b/doc/contrib/release.rst index c457c6560a64..86dda38fee8b 100644 --- a/doc/contrib/release.rst +++ b/doc/contrib/release.rst @@ -4,7 +4,7 @@ XGBoost Release Policy ======================= Versioning Policy ---------------------------- +----------------- Starting from XGBoost 1.0.0, each XGBoost release will be versioned as [MAJOR].[FEATURE].[MAINTENANCE] @@ -34,6 +34,20 @@ Making a Release + The CRAN package is maintained by `Tong He `_ and `Jiaming Yuan `__. - Before submitting a release, one should test the package on `R-hub `__ and `win-builder `__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder. - + The Maven package is maintained by `Nan Zhu `_ and `Hyunsu Cho `_. + + +R CRAN Package +-------------- +Before submitting a release, one should test the package on `R-hub `__ and `win-builder `__ first. Please note that the R-hub Windows instance doesn't have the exact same environment as the one hosted on win-builder. + +According to the `CRAN policy `__: + + If running a package uses multiple threads/cores it must never use more than two simultaneously: the check farm is a shared resource and will typically be running many checks simultaneously. + +We need to check the number of CPUs used in examples. Export ``_R_CHECK_EXAMPLE_TIMING_CPU_TO_ELAPSED_THRESHOLD_=2.5`` before running ``R CMD check --as-cran`` `[1] <#references>`__ and make sure the machine you are using has enough CPU cores to reveal any potential policy violation. + +References +---------- + +[1] https://stat.ethz.ch/pipermail/r-package-devel/2022q4/008610.html diff --git a/doc/tutorials/dask.rst b/doc/tutorials/dask.rst index e7fd85066646..fdc4dd20b11a 100644 --- a/doc/tutorials/dask.rst +++ b/doc/tutorials/dask.rst @@ -564,7 +564,7 @@ Here are some pratices on reducing memory usage with dask and xgboost. nice summary. - When using GPU input, like dataframe loaded by ``dask_cudf``, you can try - :py:class:`xgboost.dask.DaskDeviceQuantileDMatrix` as a drop in replacement for ``DaskDMatrix`` + :py:class:`xgboost.dask.DaskQuantileDMatrix` as a drop in replacement for ``DaskDMatrix`` to reduce overall memory usage. See :ref:`sphx_glr_python_dask-examples_gpu_training.py` for an example. From 1136a7e0c3d0015f58c7f661b198e60b71b649ab Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 9 Nov 2022 14:22:54 +0800 Subject: [PATCH 013/126] Fix CRAN note on cleanup. (#8447) --- Makefile | 1 - 1 file changed, 1 deletion(-) diff --git a/Makefile b/Makefile index 1dc909ef26f7..c4411de3cdc0 100644 --- a/Makefile +++ b/Makefile @@ -126,7 +126,6 @@ Rpack: clean_all cat R-package/src/Makevars.in|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.in cat R-package/src/Makevars.win|sed '2s/.*/PKGROOT=./' > xgboost/src/Makevars.win rm -f xgboost/src/Makevars.win-e # OSX sed create this extra file; remove it - rm -f xgboost/cleanup bash R-package/remove_warning_suppression_pragma.sh bash xgboost/remove_warning_suppression_pragma.sh rm xgboost/remove_warning_suppression_pragma.sh From 9372370dda37f2118ddd97c286d93ed247890ee0 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Thu, 10 Nov 2022 12:15:25 -0500 Subject: [PATCH 014/126] Work with newer thrust and libcudacxx (#8432) * Thrust 1.17 removes the experimental/pinned_allocator. When xgboost is brought into a large project it can be compiled against Thrust 1.17+ which don't offer this experimental allocator. To ensure that going forward xgboost works in all environments we provide a xgboost namespaced version of the pinned_allocator that previously was in Thrust. * Update gputreeshap to work with libcudacxx 1.9 --- gputreeshap | 2 +- .../xgboost4j-gpu/src/native/xgboost4j-gpu.cu | 4 +- src/common/cuda_pinned_allocator.h | 91 +++++++++++++++++++ src/tree/gpu_hist/evaluate_splits.cuh | 4 +- 4 files changed, 96 insertions(+), 5 deletions(-) create mode 100644 src/common/cuda_pinned_allocator.h diff --git a/gputreeshap b/gputreeshap index acb5be3c17e9..787259b412c1 160000 --- a/gputreeshap +++ b/gputreeshap @@ -1 +1 @@ -Subproject commit acb5be3c17e9adae34ac0b176da6ea8e197cb17e +Subproject commit 787259b412c18ab8d5f24bf2b8bd6a59ff8208f3 diff --git a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu index 4ecf8b0f1da1..bf3f6a0db316 100644 --- a/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu +++ b/jvm-packages/xgboost4j-gpu/src/native/xgboost4j-gpu.cu @@ -1,7 +1,7 @@ #include -#include #include "../../../../src/common/device_helpers.cuh" +#include "../../../../src/common/cuda_pinned_allocator.h" #include "../../../../src/data/array_interface.h" #include "jvm_utils.h" #include @@ -131,7 +131,7 @@ class DataIteratorProxy { bool cache_on_host_{true}; // TODO(Bobby): Make this optional. template - using Alloc = thrust::system::cuda::experimental::pinned_allocator; + using Alloc = xgboost::common::cuda::pinned_allocator; template using HostVector = std::vector>; diff --git a/src/common/cuda_pinned_allocator.h b/src/common/cuda_pinned_allocator.h new file mode 100644 index 000000000000..d11851d99d37 --- /dev/null +++ b/src/common/cuda_pinned_allocator.h @@ -0,0 +1,91 @@ +/*! + * Copyright 2022 by XGBoost Contributors + * \file common.h + * \brief cuda pinned allocator for usage with thrust containers + */ + +#pragma once + +#include +#include + +#include "common.h" + +namespace xgboost { +namespace common { +namespace cuda { + +// \p pinned_allocator is a CUDA-specific host memory allocator +// that employs \c cudaMallocHost for allocation. +// +// This implementation is ported from the experimental/pinned_allocator +// that Thrust used to provide. +// +// \see https://en.cppreference.com/w/cpp/memory/allocator +template +class pinned_allocator; + +template <> +class pinned_allocator { + public: + using value_type = void; // NOLINT: The type of the elements in the allocator + using pointer = void*; // NOLINT: The type returned by address() / allocate() + using const_pointer = const void*; // NOLINT: The type returned by address() + using size_type = std::size_t; // NOLINT: The type used for the size of the allocation + using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers + + template + struct rebind { // NOLINT + using other = pinned_allocator; // NOLINT: The rebound type + }; +}; + + +template +class pinned_allocator { + public: + using value_type = T; // NOLINT: The type of the elements in the allocator + using pointer = T*; // NOLINT: The type returned by address() / allocate() + using const_pointer = const T*; // NOLINT: The type returned by address() + using reference = T&; // NOLINT: The parameter type for address() + using const_reference = const T&; // NOLINT: The parameter type for address() + using size_type = std::size_t; // NOLINT: The type used for the size of the allocation + using difference_type = std::ptrdiff_t; // NOLINT: The type of the distance between two pointers + + template + struct rebind { // NOLINT + using other = pinned_allocator; // NOLINT: The rebound type + }; + + XGBOOST_DEVICE inline pinned_allocator() {}; // NOLINT: host/device markup ignored on defaulted functions + XGBOOST_DEVICE inline ~pinned_allocator() {} // NOLINT: host/device markup ignored on defaulted functions + XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT: host/device markup ignored on defaulted functions + + + template + XGBOOST_DEVICE inline pinned_allocator(pinned_allocator const&) {} // NOLINT + + XGBOOST_DEVICE inline pointer address(reference r) { return &r; } // NOLINT + XGBOOST_DEVICE inline const_pointer address(const_reference r) { return &r; } // NOLINT + + inline pointer allocate(size_type cnt, const_pointer = nullptr) { // NOLINT + if (cnt > this->max_size()) { throw std::bad_alloc(); } // end if + + pointer result(nullptr); + dh::safe_cuda(cudaMallocHost(reinterpret_cast(&result), cnt * sizeof(value_type))); + return result; + } + + inline void deallocate(pointer p, size_type) { dh::safe_cuda(cudaFreeHost(p)); } // NOLINT + + inline size_type max_size() const { return (std::numeric_limits::max)() / sizeof(T); } // NOLINT + + XGBOOST_DEVICE inline bool operator==(pinned_allocator const& x) const { return true; } + + XGBOOST_DEVICE inline bool operator!=(pinned_allocator const& x) const { + return !operator==(x); + } +}; +} // namespace cuda +} // namespace common +} // namespace xgboost diff --git a/src/tree/gpu_hist/evaluate_splits.cuh b/src/tree/gpu_hist/evaluate_splits.cuh index 2da207e79104..d3174c4df651 100644 --- a/src/tree/gpu_hist/evaluate_splits.cuh +++ b/src/tree/gpu_hist/evaluate_splits.cuh @@ -3,10 +3,10 @@ */ #ifndef EVALUATE_SPLITS_CUH_ #define EVALUATE_SPLITS_CUH_ -#include #include #include "../../common/categorical.h" +#include "../../common/cuda_pinned_allocator.h" #include "../split_evaluator.h" #include "../updater_gpu_common.cuh" #include "expand_entry.cuh" @@ -57,7 +57,7 @@ struct CatAccessor { class GPUHistEvaluator { using CatST = common::CatBitField::value_type; // categorical storage type // use pinned memory to stage the categories, used for sort based splits. - using Alloc = thrust::system::cuda::experimental::pinned_allocator; + using Alloc = xgboost::common::cuda::pinned_allocator; private: TreeEvaluator tree_evaluator_; From db14e3feb77f97a689209b8ac60b9d85d2433f70 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Wed, 30 Nov 2022 11:44:54 -0800 Subject: [PATCH 015/126] Support null value in CUDA array interface. (#8486) (#8499) --- src/data/array_interface.h | 26 ++++++++++++++++---------- tests/cpp/data/test_array_interface.cc | 14 +++++++++++--- 2 files changed, 27 insertions(+), 13 deletions(-) diff --git a/src/data/array_interface.h b/src/data/array_interface.h index 0d7b4681accb..e755108069dc 100644 --- a/src/data/array_interface.h +++ b/src/data/array_interface.h @@ -101,7 +101,7 @@ class ArrayInterfaceHandler { template static PtrType GetPtrFromArrayData(Object::Map const &obj) { auto data_it = obj.find("data"); - if (data_it == obj.cend()) { + if (data_it == obj.cend() || IsA(data_it->second)) { LOG(FATAL) << "Empty data passed in."; } auto p_data = reinterpret_cast( @@ -111,7 +111,7 @@ class ArrayInterfaceHandler { static void Validate(Object::Map const &array) { auto version_it = array.find("version"); - if (version_it == array.cend()) { + if (version_it == array.cend() || IsA(version_it->second)) { LOG(FATAL) << "Missing `version' field for array interface"; } if (get(version_it->second) > 3) { @@ -119,17 +119,19 @@ class ArrayInterfaceHandler { } auto typestr_it = array.find("typestr"); - if (typestr_it == array.cend()) { + if (typestr_it == array.cend() || IsA(typestr_it->second)) { LOG(FATAL) << "Missing `typestr' field for array interface"; } auto typestr = get(typestr_it->second); CHECK(typestr.size() == 3 || typestr.size() == 4) << ArrayInterfaceErrors::TypestrFormat(); - if (array.find("shape") == array.cend()) { + auto shape_it = array.find("shape"); + if (shape_it == array.cend() || IsA(shape_it->second)) { LOG(FATAL) << "Missing `shape' field for array interface"; } - if (array.find("data") == array.cend()) { + auto data_it = array.find("data"); + if (data_it == array.cend() || IsA(data_it->second)) { LOG(FATAL) << "Missing `data' field for array interface"; } } @@ -139,8 +141,9 @@ class ArrayInterfaceHandler { static size_t ExtractMask(Object::Map const &column, common::Span *p_out) { auto &s_mask = *p_out; - if (column.find("mask") != column.cend()) { - auto const &j_mask = get(column.at("mask")); + auto const &mask_it = column.find("mask"); + if (mask_it != column.cend() && !IsA(mask_it->second)) { + auto const &j_mask = get(mask_it->second); Validate(j_mask); auto p_mask = GetPtrFromArrayData(j_mask); @@ -173,8 +176,9 @@ class ArrayInterfaceHandler { // assume 1 byte alignment. size_t const span_size = RBitField8::ComputeStorageSize(n_bits); - if (j_mask.find("strides") != j_mask.cend()) { - auto strides = get(column.at("strides")); + auto strides_it = j_mask.find("strides"); + if (strides_it != j_mask.cend() && !IsA(strides_it->second)) { + auto strides = get(strides_it->second); CHECK_EQ(strides.size(), 1) << ArrayInterfaceErrors::Dimension(1); CHECK_EQ(get(strides.at(0)), type_length) << ArrayInterfaceErrors::Contiguous(); } @@ -401,7 +405,9 @@ class ArrayInterface { << "XGBoost doesn't support internal broadcasting."; } } else { - CHECK(array.find("mask") == array.cend()) << "Masked array is not yet supported."; + auto mask_it = array.find("mask"); + CHECK(mask_it == array.cend() || IsA(mask_it->second)) + << "Masked array is not yet supported."; } auto stream_it = array.find("stream"); diff --git a/tests/cpp/data/test_array_interface.cc b/tests/cpp/data/test_array_interface.cc index 3c2e0e38d5c3..5bd771ff08e2 100644 --- a/tests/cpp/data/test_array_interface.cc +++ b/tests/cpp/data/test_array_interface.cc @@ -33,9 +33,8 @@ TEST(ArrayInterface, Error) { Json column { Object() }; std::vector j_shape {Json(Integer(static_cast(kRows)))}; column["shape"] = Array(j_shape); - std::vector j_data { - Json(Integer(reinterpret_cast(nullptr))), - Json(Boolean(false))}; + std::vector j_data{Json(Integer(reinterpret_cast(nullptr))), + Json(Boolean(false))}; auto const& column_obj = get(column); std::string typestr{" s_mask; + EXPECT_THROW(ArrayInterfaceHandler::ExtractMask(column_obj, &s_mask), dmlc::Error); } TEST(ArrayInterface, GetElement) { From 67b657dad02fc2ceea9583a1136d4e29871a6433 Mon Sep 17 00:00:00 2001 From: Philip Hyunsu Cho Date: Wed, 30 Nov 2022 11:47:59 -0800 Subject: [PATCH 016/126] SO_DOMAIN do not support on IBM i, using getsockname instead (#8437) (#8500) --- include/xgboost/collective/socket.h | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/include/xgboost/collective/socket.h b/include/xgboost/collective/socket.h index e058e58a843f..33d14fe8ca19 100644 --- a/include/xgboost/collective/socket.h +++ b/include/xgboost/collective/socket.h @@ -287,11 +287,22 @@ class TCPSocket { #elif defined(__APPLE__) return domain_; #elif defined(__unix__) +#ifndef __PASE__ std::int32_t domain; socklen_t len = sizeof(domain); xgboost_CHECK_SYS_CALL( getsockopt(handle_, SOL_SOCKET, SO_DOMAIN, reinterpret_cast(&domain), &len), 0); return ret_iafamily(domain); +#else + struct sockaddr sa; + socklen_t sizeofsa = sizeof(sa); + xgboost_CHECK_SYS_CALL( + getsockname(handle_, &sa, &sizeofsa), 0); + if (sizeofsa < sizeof(uchar_t)*2) { + return ret_iafamily(AF_INET); + } + return ret_iafamily(sa.sa_family); +#endif // __PASE__ #else LOG(FATAL) << "Unknown platform."; return ret_iafamily(AF_INET); From 850b53100f5d3e294310888255a54d840e4ea6b1 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 6 Dec 2022 18:21:14 +0800 Subject: [PATCH 017/126] [backport] [doc] Fix outdated document [skip ci] (#8527) (#8553) * [doc] Fix document around categorical parameters. [skip ci] * note on validate parameter [skip ci] * Fix dask doc as well [skip ci] --- doc/parameter.rst | 12 ++++-------- doc/python/python_api.rst | 5 +---- 2 files changed, 5 insertions(+), 12 deletions(-) diff --git a/doc/parameter.rst b/doc/parameter.rst index 837a7d1b0b7a..361847501357 100644 --- a/doc/parameter.rst +++ b/doc/parameter.rst @@ -44,8 +44,7 @@ General Parameters * ``validate_parameters`` [default to ``false``, except for Python, R and CLI interface] - When set to True, XGBoost will perform validation of input parameters to check whether - a parameter is used or not. The feature is still experimental. It's expected to have - some false positives. + a parameter is used or not. * ``nthread`` [default to maximum number of threads available if not set] @@ -233,24 +232,21 @@ Parameters for Categorical Feature These parameters are only used for training with categorical data. See :doc:`/tutorials/categorical` for more information. +.. note:: These parameters are experimental. ``exact`` tree method is not yet supported. + + * ``max_cat_to_onehot`` .. versionadded:: 1.6.0 - .. note:: This parameter is experimental. ``exact`` tree method is not yet supported. - - A threshold for deciding whether XGBoost should use one-hot encoding based split for categorical data. When number of categories is lesser than the threshold then one-hot encoding is chosen, otherwise the categories will be partitioned into children nodes. - Only relevant for regression and binary classification. Also, ``exact`` tree method is - not supported * ``max_cat_threshold`` .. versionadded:: 1.7.0 - .. note:: This parameter is experimental. ``exact`` tree method is not yet supported. - - Maximum number of categories considered for each split. Used only by partition-based splits for preventing over-fitting. diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst index a5ddd7831006..03b431c77d41 100644 --- a/doc/python/python_api.rst +++ b/doc/python/python_api.rst @@ -25,9 +25,6 @@ Core Data Structure .. autoclass:: xgboost.QuantileDMatrix :show-inheritance: -.. autoclass:: xgboost.DeviceQuantileDMatrix - :show-inheritance: - .. autoclass:: xgboost.Booster :members: :show-inheritance: @@ -115,7 +112,7 @@ Dask API :inherited-members: :show-inheritance: -.. autoclass:: xgboost.dask.DaskDeviceQuantileDMatrix +.. autoclass:: xgboost.dask.DaskQuantileDMatrix :members: :inherited-members: :show-inheritance: From 58bc22565737f73d63720acd08b8f822c8759a1d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Tue, 6 Dec 2022 21:35:26 +0800 Subject: [PATCH 018/126] [backport] [CI] Fix github action mismatched glibcxx. (#8551) (#8552) Split up the Linux test to use the toolchain from conda forge. --- .github/workflows/main.yml | 14 +-- .github/workflows/python_tests.yml | 108 +++++++++++++++--------- tests/ci_build/conda_env/cpp_test.yml | 11 +++ tests/ci_build/conda_env/sdist_test.yml | 13 +++ 4 files changed, 100 insertions(+), 46 deletions(-) create mode 100644 tests/ci_build/conda_env/cpp_test.yml create mode 100644 tests/ci_build/conda_env/sdist_test.yml diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index abe4b2b71dc8..7b17f9b8c66c 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -75,19 +75,18 @@ jobs: - uses: actions/checkout@v2 with: submodules: 'true' - - name: Install system packages - run: | - sudo apt-get install -y --no-install-recommends ninja-build - - uses: conda-incubator/setup-miniconda@v2 + - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14 with: - auto-update-conda: true - python-version: ${{ matrix.python-version }} - activate-environment: test + cache-downloads: true + cache-env: true + environment-name: cpp_test + environment-file: tests/ci_build/conda_env/cpp_test.yml - name: Display Conda env shell: bash -l {0} run: | conda info conda list + - name: Build and install XGBoost static library shell: bash -l {0} run: | @@ -109,6 +108,7 @@ jobs: cd .. rm -rf ./build popd + - name: Build and install XGBoost shared library shell: bash -l {0} run: | diff --git a/.github/workflows/python_tests.yml b/.github/workflows/python_tests.yml index 7ad742045223..cd88c25b7e2a 100644 --- a/.github/workflows/python_tests.yml +++ b/.github/workflows/python_tests.yml @@ -41,12 +41,46 @@ jobs: run: | python tests/ci_build/lint_python.py --format=0 --type-check=0 --pylint=1 + python-sdist-test-on-Linux: + # Mismatched glibcxx version between system and conda forge. + runs-on: ${{ matrix.os }} + name: Test installing XGBoost Python source package on ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest] + steps: + - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 + with: + submodules: 'true' + - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14 + with: + cache-downloads: true + cache-env: false + environment-name: sdist_test + environment-file: tests/ci_build/conda_env/sdist_test.yml + - name: Display Conda env + shell: bash -l {0} + run: | + conda info + conda list + - name: Build and install XGBoost + shell: bash -l {0} + run: | + cd python-package + python --version + python setup.py sdist + pip install -v ./dist/xgboost-*.tar.gz + cd .. + python -c 'import xgboost' + python-sdist-test: + # Use system toolchain instead of conda toolchain for macos and windows. + # MacOS has linker error if clang++ from conda-forge is used runs-on: ${{ matrix.os }} name: Test installing XGBoost Python source package on ${{ matrix.os }} strategy: matrix: - os: [ubuntu-latest, macos-11, windows-latest] + os: [macos-11, windows-latest] python-version: ["3.8"] steps: - uses: actions/checkout@v2 @@ -56,11 +90,7 @@ jobs: if: matrix.os == 'macos-11' run: | brew install ninja libomp - - name: Install Ubuntu system dependencies - if: matrix.os == 'ubuntu-latest' - run: | - sudo apt-get install -y --no-install-recommends ninja-build - - uses: conda-incubator/setup-miniconda@v2 + - uses: conda-incubator/setup-miniconda@35d1405e78aa3f784fe3ce9a2eb378d5eeb62169 # v2.1.1 with: auto-update-conda: true python-version: ${{ matrix.python-version }} @@ -80,25 +110,26 @@ jobs: cd .. python -c 'import xgboost' - python-tests-on-win: + python-tests-on-macos: name: Test XGBoost Python package on ${{ matrix.config.os }} runs-on: ${{ matrix.config.os }} + timeout-minutes: 60 strategy: matrix: config: - - {os: windows-latest, python-version: '3.8'} + - {os: macos-11} steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@e2f20e631ae6d7dd3b768f56a5d2af784dd54791 # v2.5.0 with: submodules: 'true' - - uses: conda-incubator/setup-miniconda@v2 + - uses: mamba-org/provision-with-micromamba@f347426e5745fe3dfc13ec5baf20496990d0281f # v14 with: - auto-update-conda: true - python-version: ${{ matrix.config.python-version }} - activate-environment: win64_env - environment-file: tests/ci_build/conda_env/win64_cpu_test.yml + cache-downloads: true + cache-env: false + environment-name: macos_test + environment-file: tests/ci_build/conda_env/macos_cpu_test.yml - name: Display Conda env shell: bash -l {0} @@ -106,35 +137,38 @@ jobs: conda info conda list - - name: Build XGBoost on Windows + - name: Build XGBoost on macos shell: bash -l {0} run: | - mkdir build_msvc - cd build_msvc - cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON - cmake --build . --config Release --parallel $(nproc) + brew install ninja + + mkdir build + cd build + # Set prefix, to use OpenMP library from Conda env + # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228 + # to learn why we don't use libomp from Homebrew. + cmake .. -GNinja -DCMAKE_PREFIX_PATH=$CONDA_PREFIX + ninja - name: Install Python package shell: bash -l {0} run: | cd python-package python --version - python setup.py bdist_wheel --universal - pip install ./dist/*.whl + python setup.py install - name: Test Python package shell: bash -l {0} run: | - pytest -s -v ./tests/python + pytest -s -v -rxXs --durations=0 ./tests/python - python-tests-on-macos: + python-tests-on-win: name: Test XGBoost Python package on ${{ matrix.config.os }} runs-on: ${{ matrix.config.os }} - timeout-minutes: 90 strategy: matrix: config: - - {os: macos-11, python-version "3.8" } + - {os: windows-latest, python-version: '3.8'} steps: - uses: actions/checkout@v2 @@ -145,8 +179,8 @@ jobs: with: auto-update-conda: true python-version: ${{ matrix.config.python-version }} - activate-environment: macos_test - environment-file: tests/ci_build/conda_env/macos_cpu_test.yml + activate-environment: win64_env + environment-file: tests/ci_build/conda_env/win64_cpu_test.yml - name: Display Conda env shell: bash -l {0} @@ -154,27 +188,23 @@ jobs: conda info conda list - - name: Build XGBoost on macos + - name: Build XGBoost on Windows shell: bash -l {0} run: | - brew install ninja - - mkdir build - cd build - # Set prefix, to use OpenMP library from Conda env - # See https://github.com/dmlc/xgboost/issues/7039#issuecomment-1025038228 - # to learn why we don't use libomp from Homebrew. - cmake .. -GNinja -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_PREFIX_PATH=$CONDA_PREFIX - ninja + mkdir build_msvc + cd build_msvc + cmake .. -G"Visual Studio 17 2022" -DCMAKE_CONFIGURATION_TYPES="Release" -A x64 -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON + cmake --build . --config Release --parallel $(nproc) - name: Install Python package shell: bash -l {0} run: | cd python-package python --version - python setup.py install + python setup.py bdist_wheel --universal + pip install ./dist/*.whl - name: Test Python package shell: bash -l {0} run: | - pytest -s -v ./tests/python + pytest -s -v -rxXs --durations=0 ./tests/python diff --git a/tests/ci_build/conda_env/cpp_test.yml b/tests/ci_build/conda_env/cpp_test.yml new file mode 100644 index 000000000000..80ce65cc1cea --- /dev/null +++ b/tests/ci_build/conda_env/cpp_test.yml @@ -0,0 +1,11 @@ +# conda environment for CPP test on Linux distributions +name: cpp_test +channels: +- defaults +- conda-forge +dependencies: +- cmake +- ninja +- c-compiler +- cxx-compiler +- gtest diff --git a/tests/ci_build/conda_env/sdist_test.yml b/tests/ci_build/conda_env/sdist_test.yml new file mode 100644 index 000000000000..fc1e710973b2 --- /dev/null +++ b/tests/ci_build/conda_env/sdist_test.yml @@ -0,0 +1,13 @@ +# conda environment for source distribution test. +name: sdist_test +channels: +- defaults +- conda-forge +dependencies: +- python=3.8 +- pip +- wheel +- cmake +- ninja +- c-compiler +- cxx-compiler From 60a8c8ebba7d1d4b08aa19358fc19df08aa83c82 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 7 Dec 2022 02:07:37 +0800 Subject: [PATCH 019/126] [pyspark] sort qid for SparkRanker (#8497) (#8555) * [pyspark] sort qid for SparkRandker * resolve comments Co-authored-by: Bobby Wang --- python-package/xgboost/spark/core.py | 6 +- tests/python/test_spark/test_spark_local.py | 91 ++++++++++++++------- 2 files changed, 65 insertions(+), 32 deletions(-) diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index 1e544c34f228..eb1f4e7dfff8 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -1,7 +1,7 @@ # type: ignore """Xgboost pyspark integration submodule for core code.""" # pylint: disable=fixme, too-many-ancestors, protected-access, no-member, invalid-name -# pylint: disable=too-few-public-methods, too-many-lines +# pylint: disable=too-few-public-methods, too-many-lines, too-many-branches import json from typing import Iterator, Optional, Tuple @@ -728,6 +728,10 @@ def _fit(self, dataset): else: dataset = dataset.repartition(num_workers) + if self.isDefined(self.qid_col) and self.getOrDefault(self.qid_col): + # XGBoost requires qid to be sorted for each partition + dataset = dataset.sortWithinPartitions(alias.qid, ascending=True) + train_params = self._get_distributed_train_params(dataset) booster_params, train_call_kwargs_params = self._get_xgb_train_call_args( train_params diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py index 03981d955040..6757d2e34e5c 100644 --- a/tests/python/test_spark/test_spark_local.py +++ b/tests/python/test_spark/test_spark_local.py @@ -390,28 +390,6 @@ def setUp(self): "expected_prediction_with_base_margin", ], ) - self.ranker_df_train = self.session.createDataFrame( - [ - (Vectors.dense(1.0, 2.0, 3.0), 0, 0), - (Vectors.dense(4.0, 5.0, 6.0), 1, 0), - (Vectors.dense(9.0, 4.0, 8.0), 2, 0), - (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1), - (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1), - (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1), - ], - ["features", "label", "qid"], - ) - self.ranker_df_test = self.session.createDataFrame( - [ - (Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988), - (Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556), - (Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570), - (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988), - (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612), - (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826), - ], - ["features", "qid", "expected_prediction"], - ) self.reg_df_sparse_train = self.session.createDataFrame( [ @@ -1039,15 +1017,6 @@ def test_classifier_with_sparse_optim(self): for row1, row2 in zip(pred_result, pred_result2): self.assertTrue(np.allclose(row1.probability, row2.probability, rtol=1e-3)) - def test_ranker(self): - ranker = SparkXGBRanker(qid_col="qid") - assert ranker.getOrDefault(ranker.objective) == "rank:pairwise" - model = ranker.fit(self.ranker_df_train) - pred_result = model.transform(self.ranker_df_test).collect() - - for row in pred_result: - assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3) - def test_empty_validation_data(self) -> None: for tree_method in [ "hist", @@ -1130,3 +1099,63 @@ def test_early_stop_param_validation(self): def test_unsupported_params(self): with pytest.raises(ValueError, match="evals_result"): SparkXGBClassifier(evals_result={}) + + +class XgboostRankerLocalTest(SparkTestCase): + def setUp(self): + self.session.conf.set("spark.sql.execution.arrow.maxRecordsPerBatch", "8") + self.ranker_df_train = self.session.createDataFrame( + [ + (Vectors.dense(1.0, 2.0, 3.0), 0, 0), + (Vectors.dense(4.0, 5.0, 6.0), 1, 0), + (Vectors.dense(9.0, 4.0, 8.0), 2, 0), + (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 1), + (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 1), + (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 1), + ], + ["features", "label", "qid"], + ) + self.ranker_df_test = self.session.createDataFrame( + [ + (Vectors.dense(1.5, 2.0, 3.0), 0, -1.87988), + (Vectors.dense(4.5, 5.0, 6.0), 0, 0.29556), + (Vectors.dense(9.0, 4.5, 8.0), 0, 2.36570), + (Vectors.sparse(3, {1: 1.0, 2: 6.0}), 1, -1.87988), + (Vectors.sparse(3, {1: 6.0, 2: 7.0}), 1, -0.30612), + (Vectors.sparse(3, {1: 8.0, 2: 10.5}), 1, 2.44826), + ], + ["features", "qid", "expected_prediction"], + ) + self.ranker_df_train_1 = self.session.createDataFrame( + [ + (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 9), + (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 9), + (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 9), + (Vectors.dense(1.0, 2.0, 3.0), 0, 8), + (Vectors.dense(4.0, 5.0, 6.0), 1, 8), + (Vectors.dense(9.0, 4.0, 8.0), 2, 8), + (Vectors.sparse(3, {1: 1.0, 2: 5.5}), 0, 7), + (Vectors.sparse(3, {1: 6.0, 2: 7.5}), 1, 7), + (Vectors.sparse(3, {1: 8.0, 2: 9.5}), 2, 7), + (Vectors.dense(1.0, 2.0, 3.0), 0, 6), + (Vectors.dense(4.0, 5.0, 6.0), 1, 6), + (Vectors.dense(9.0, 4.0, 8.0), 2, 6), + ] + * 4, + ["features", "label", "qid"], + ) + + def test_ranker(self): + ranker = SparkXGBRanker(qid_col="qid") + assert ranker.getOrDefault(ranker.objective) == "rank:pairwise" + model = ranker.fit(self.ranker_df_train) + pred_result = model.transform(self.ranker_df_test).collect() + + for row in pred_result: + assert np.isclose(row.prediction, row.expected_prediction, rtol=1e-3) + + def test_ranker_qid_sorted(self): + ranker = SparkXGBRanker(qid_col="qid", num_workers=4) + assert ranker.getOrDefault(ranker.objective) == "rank:pairwise" + model = ranker.fit(self.ranker_df_train_1) + model.transform(self.ranker_df_test).collect() From 59c54e361b5795b84efab161c5c37abbdb8c5aab Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 7 Dec 2022 03:19:35 +0800 Subject: [PATCH 020/126] [pyspark] Make QDM optional based on cuDF check (#8471) (#8556) Co-authored-by: WeichenXu --- python-package/xgboost/compat.py | 17 +++++++++++++++++ python-package/xgboost/spark/core.py | 13 ++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index 275b6621064d..fab734a01361 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -43,6 +43,7 @@ def lazy_isinstance(instance: Any, module: str, name: str) -> bool: pandas_concat = None PANDAS_INSTALLED = False + # sklearn try: from sklearn.base import BaseEstimator as XGBModelBase @@ -72,6 +73,22 @@ def lazy_isinstance(instance: Any, module: str, name: str) -> bool: XGBStratifiedKFold = None +_logger = logging.getLogger(__name__) + + +def is_cudf_available() -> bool: + """Check cuDF package available or not""" + if importlib.util.find_spec("cudf") is None: + return False + try: + import cudf + + return True + except ImportError: + _logger.exception("Importing cuDF failed, use DMatrix instead of QDM") + return False + + class XGBoostLabelEncoder(LabelEncoder): """Label encoder with JSON serialization methods.""" diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index eb1f4e7dfff8..caa6e3cd0931 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -32,6 +32,7 @@ ShortType, ) from scipy.special import expit, softmax # pylint: disable=no-name-in-module +from xgboost.compat import is_cudf_available from xgboost.core import Booster from xgboost.training import train as worker_train @@ -759,7 +760,8 @@ def _fit(self, dataset): k: v for k, v in train_call_kwargs_params.items() if v is not None } dmatrix_kwargs = {k: v for k, v in dmatrix_kwargs.items() if v is not None} - use_qdm = booster_params.get("tree_method", None) in ("hist", "gpu_hist") + + use_hist = booster_params.get("tree_method", None) in ("hist", "gpu_hist") def _train_booster(pandas_df_iter): """Takes in an RDD partition and outputs a booster for that partition after @@ -773,6 +775,15 @@ def _train_booster(pandas_df_iter): gpu_id = None + # If cuDF is not installed, then using DMatrix instead of QDM, + # because without cuDF, DMatrix performs better than QDM. + # Note: Checking `is_cudf_available` in spark worker side because + # spark worker might has different python environment with driver side. + if use_gpu: + use_qdm = use_hist and is_cudf_available() + else: + use_qdm = use_hist + if use_qdm and (booster_params.get("max_bin", None) is not None): dmatrix_kwargs["max_bin"] = booster_params["max_bin"] From a980e10744e5539ced1d7da650b680a204868a1f Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 7 Dec 2022 23:25:05 +0800 Subject: [PATCH 021/126] Properly await async method client.wait_for_workers (#8558) (#8567) * Properly await async method client.wait_for_workers * ignore mypy error. Co-authored-by: jiamingy Co-authored-by: Matthew Rocklin --- python-package/xgboost/dask.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python-package/xgboost/dask.py b/python-package/xgboost/dask.py index 7d21ff9fcb48..69d1832f1506 100644 --- a/python-package/xgboost/dask.py +++ b/python-package/xgboost/dask.py @@ -853,7 +853,7 @@ async def _get_rabit_args( sched_addr = None # make sure all workers are online so that we can obtain reliable scheduler_info - client.wait_for_workers(n_workers) + await client.wait_for_workers(n_workers) # type: ignore env = await client.run_on_scheduler( _start_tracker, n_workers, sched_addr, user_addr ) From 62ed8b5fef01d960b5e180b6c3ab170b5f7a85d2 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 8 Dec 2022 21:46:26 +0800 Subject: [PATCH 022/126] Bump release version to 1.7.2. (#8569) --- CMakeLists.txt | 2 +- R-package/DESCRIPTION | 4 ++-- R-package/configure | 18 +++++++++--------- R-package/configure.ac | 2 +- include/xgboost/version_config.h | 2 +- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 8 ++++---- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- python-package/xgboost/VERSION | 2 +- 13 files changed, 31 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 6a004fbe3b82..0509f18ed881 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) -project(xgboost LANGUAGES CXX C VERSION 1.7.1) +project(xgboost LANGUAGES CXX C VERSION 1.7.2) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") cmake_policy(SET CMP0022 NEW) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index d4e7375c64bb..ef739646a8fa 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 1.7.1.1 -Date: 2022-11-03 +Version: 1.7.2.1 +Date: 2022-12-08 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), diff --git a/R-package/configure b/R-package/configure index 395510ad5812..a89b42b24b70 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for xgboost 1.7.1. +# Generated by GNU Autoconf 2.69 for xgboost 1.7.2. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -576,8 +576,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='1.7.1' -PACKAGE_STRING='xgboost 1.7.1' +PACKAGE_VERSION='1.7.2' +PACKAGE_STRING='xgboost 1.7.2' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 1.7.1 to adapt to many kinds of systems. +\`configure' configures xgboost 1.7.2 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1257,7 +1257,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 1.7.1:";; + short | recursive ) echo "Configuration of xgboost 1.7.2:";; esac cat <<\_ACEOF @@ -1336,7 +1336,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 1.7.1 +xgboost configure 1.7.2 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 1.7.1, which was +It was created by xgboost $as_me 1.7.2, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 1.7.1, which was +This file was extended by xgboost $as_me 1.7.2, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3347,7 +3347,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -xgboost config.status 1.7.1 +xgboost config.status 1.7.2 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index 0215ec634df9..d99e5b8dab3c 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[1.7.1],[],[xgboost],[]) +AC_INIT([xgboost],[1.7.2],[],[xgboost],[]) # Use this line to set CC variable to a C compiler AC_PROG_CC diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index b91107f068b5..0e0bcb98e46c 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MINOR 7 -#define XGBOOST_VER_PATCH 1 +#define XGBOOST_VER_PATCH 2 #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index f39b5dcfa47f..d18ba5301150 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.2 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 2f27ae924242..bce7bd211c9b 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.2 xgboost4j-example_2.12 - 1.7.1 + 1.7.2 jar @@ -26,7 +26,7 @@ ml.dmlc xgboost4j-spark_${scala.binary.version} - 1.7.1 + 1.7.2 org.apache.spark @@ -37,7 +37,7 @@ ml.dmlc xgboost4j-flink_${scala.binary.version} - 1.7.1 + 1.7.2 org.apache.commons diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index e90b74d27e95..dd210574df62 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.2 xgboost4j-flink_2.12 - 1.7.1 + 1.7.2 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.1 + 1.7.2 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 5d97e8681d39..813fdaa93648 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.2 xgboost4j-gpu_2.12 - 1.7.1 + 1.7.2 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index f5a3587fe1e6..0d42350c59a0 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.2 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.1 + 1.7.2 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 66f9859926a7..48201dc48484 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.2 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.1 + 1.7.2 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index bc9654f36dd0..3300a0d37a40 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.2 xgboost4j_2.12 - 1.7.1 + 1.7.2 jar diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index 943f9cbc4ec7..f8a696c8dc56 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -1.7.1 +1.7.2 From 575fba651b911b9f87e2b85224a816940fe77327 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 5 Jan 2023 19:10:58 +0800 Subject: [PATCH 023/126] [backport] [CI] Fix CI with updated dependencies. (#8631) (#8635) --- jvm-packages/xgboost4j-tester/get_iris.py | 4 ++-- tests/python/test_with_shap.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/jvm-packages/xgboost4j-tester/get_iris.py b/jvm-packages/xgboost4j-tester/get_iris.py index f234bb95e198..728c149b0260 100644 --- a/jvm-packages/xgboost4j-tester/get_iris.py +++ b/jvm-packages/xgboost4j-tester/get_iris.py @@ -1,9 +1,9 @@ -from sklearn.datasets import load_iris import numpy as np import pandas +from sklearn.datasets import load_iris X, y = load_iris(return_X_y=True) -y = y.astype(np.int) +y = y.astype(np.int32) df = pandas.DataFrame(data=X, columns=['sepal length', 'sepal width', 'petal length', 'petal width']) class_id_to_name = {0:'Iris-setosa', 1:'Iris-versicolor', 2:'Iris-virginica'} df['class'] = np.vectorize(class_id_to_name.get)(y) diff --git a/tests/python/test_with_shap.py b/tests/python/test_with_shap.py index 1e03e070035c..080dda3c3ed0 100644 --- a/tests/python/test_with_shap.py +++ b/tests/python/test_with_shap.py @@ -4,7 +4,7 @@ try: import shap -except ImportError: +except Exception: shap = None pass From 83a078b7e543f60e88a428b3da4c29ece2b6a873 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 5 Jan 2023 21:17:05 +0800 Subject: [PATCH 024/126] [backport] Fix sklearn test that calls a removed field (#8579) (#8636) Co-authored-by: Rong Ou --- tests/python/test_with_sklearn.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 17114d2dde39..adbbcd02f93c 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -1030,7 +1030,7 @@ def test_pandas_input(): clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic") clf_isotonic.fit(train, target) assert isinstance( - clf_isotonic.calibrated_classifiers_[0].base_estimator, xgb.XGBClassifier + clf_isotonic.calibrated_classifiers_[0].estimator, xgb.XGBClassifier ) np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1])) From 162b48a1a4ff309ea0043c13a3e4d9e7087394fc Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 5 Jan 2023 23:13:45 +0800 Subject: [PATCH 025/126] [backport] [CI] Disable gtest with RMM (#8620) (#8640) Co-authored-by: Philip Hyunsu Cho --- tests/buildkite/test-cpp-gpu.sh | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh index d1192c763c41..f1ddf9d5f5e0 100755 --- a/tests/buildkite/test-cpp-gpu.sh +++ b/tests/buildkite/test-cpp-gpu.sh @@ -4,7 +4,7 @@ set -euo pipefail source tests/buildkite/conftest.sh -echo "--- Run Google Tests with CUDA, using 4 GPUs" +echo "--- Run Google Tests with CUDA, using a GPU" buildkite-agent artifact download "build/testxgboost" . --step build-cuda chmod +x build/testxgboost tests/ci_build/ci_build.sh gpu nvidia-docker \ @@ -12,11 +12,12 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \ --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION \ build/testxgboost -echo "--- Run Google Tests with CUDA, using 4 GPUs, RMM enabled" -rm -rfv build/ -buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm -chmod +x build/testxgboost -tests/ci_build/ci_build.sh rmm nvidia-docker \ - --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ - --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ - "source activate gpu_test && build/testxgboost --use-rmm-pool" +# Disabled until https://github.com/dmlc/xgboost/issues/8619 is resolved +# echo "--- Run Google Tests with CUDA, using a GPU, RMM enabled" +# rm -rfv build/ +# buildkite-agent artifact download "build/testxgboost" . --step build-cuda-with-rmm +# chmod +x build/testxgboost +# tests/ci_build/ci_build.sh rmm nvidia-docker \ +# --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ +# --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ +# "source activate gpu_test && build/testxgboost --use-rmm-pool" From 1a834b2b85056644aa827aeb5d4e19b681b3d995 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 5 Jan 2023 23:16:10 +0800 Subject: [PATCH 026/126] Fix linalg iterator. (#8603) (#8639) --- src/common/linalg_op.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/common/linalg_op.h b/src/common/linalg_op.h index efb9cf300238..26cb405f526f 100644 --- a/src/common/linalg_op.h +++ b/src/common/linalg_op.h @@ -62,7 +62,7 @@ void ElementWiseKernel(GenericParameter const* ctx, linalg::TensorView t, #endif // !defined(XGBOOST_USE_CUDA) template -auto cbegin(TensorView v) { // NOLINT +auto cbegin(TensorView const& v) { // NOLINT auto it = common::MakeIndexTransformIter([&](size_t i) -> std::remove_cv_t const& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); }); @@ -70,19 +70,19 @@ auto cbegin(TensorView v) { // NOLINT } template -auto cend(TensorView v) { // NOLINT +auto cend(TensorView const& v) { // NOLINT return cbegin(v) + v.Size(); } template -auto begin(TensorView v) { // NOLINT +auto begin(TensorView& v) { // NOLINT auto it = common::MakeIndexTransformIter( [&](size_t i) -> T& { return linalg::detail::Apply(v, linalg::UnravelIndex(i, v.Shape())); }); return it; } template -auto end(TensorView v) { // NOLINT +auto end(TensorView& v) { // NOLINT return begin(v) + v.Size(); } } // namespace linalg From 067b704e58c5a4d97fd64c1871e79e9ace29efe9 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 6 Jan 2023 01:17:49 +0800 Subject: [PATCH 027/126] [backport] Fix inference with categorical feature. (#8591) (#8602) (#8638) * Fix inference with categorical feature. (#8591) * Fix windows build on buildkite. (#8602) * workaround. --- doc/tutorials/categorical.rst | 10 ++--- src/common/categorical.h | 17 ++++---- src/common/partition_builder.h | 4 +- src/predictor/predict_fn.h | 4 +- src/tree/updater_gpu_hist.cu | 5 +-- tests/cpp/common/test_categorical.cc | 64 +++++++++++++++++++++++++--- tests/python/test_with_sklearn.py | 6 +-- 7 files changed, 79 insertions(+), 31 deletions(-) diff --git a/doc/tutorials/categorical.rst b/doc/tutorials/categorical.rst index 1c090801fae4..97877f23c190 100644 --- a/doc/tutorials/categorical.rst +++ b/doc/tutorials/categorical.rst @@ -138,11 +138,11 @@ Miscellaneous By default, XGBoost assumes input categories are integers starting from 0 till the number of categories :math:`[0, n\_categories)`. However, user might provide inputs with invalid -values due to mistakes or missing values. It can be negative value, integer values that -can not be accurately represented by 32-bit floating point, or values that are larger than -actual number of unique categories. During training this is validated but for prediction -it's treated as the same as missing value for performance reasons. Lastly, missing values -are treated as the same as numerical features (using the learned split direction). +values due to mistakes or missing values in training dataset. It can be negative value, +integer values that can not be accurately represented by 32-bit floating point, or values +that are larger than actual number of unique categories. During training this is +validated but for prediction it's treated as the same as not-chosen category for +performance reasons. ********** diff --git a/src/common/categorical.h b/src/common/categorical.h index ead5f570c44f..452aaa8c1cdc 100644 --- a/src/common/categorical.h +++ b/src/common/categorical.h @@ -48,20 +48,21 @@ inline XGBOOST_DEVICE bool InvalidCat(float cat) { return cat < 0 || cat >= kMaxCat; } -/* \brief Whether should it traverse to left branch of a tree. +/** + * \brief Whether should it traverse to left branch of a tree. * - * For one hot split, go to left if it's NOT the matching category. + * Go to left if it's NOT the matching category, which matches one-hot encoding. */ -template -inline XGBOOST_DEVICE bool Decision(common::Span cats, float cat, bool dft_left) { +inline XGBOOST_DEVICE bool Decision(common::Span cats, float cat) { KCatBitField const s_cats(cats); - // FIXME: Size() is not accurate since it represents the size of bit set instead of - // actual number of categories. - if (XGBOOST_EXPECT(validate && (InvalidCat(cat) || cat >= s_cats.Size()), false)) { - return dft_left; + if (XGBOOST_EXPECT(InvalidCat(cat), false)) { + return true; } auto pos = KCatBitField::ToBitPos(cat); + // If the input category is larger than the size of the bit field, it implies that the + // category is not chosen. Otherwise the bit field would have the category instead of + // being smaller than the category value. if (pos.int_pos >= cats.size()) { return true; } diff --git a/src/common/partition_builder.h b/src/common/partition_builder.h index 34864ee90679..568e611b732d 100644 --- a/src/common/partition_builder.h +++ b/src/common/partition_builder.h @@ -144,7 +144,7 @@ class PartitionBuilder { auto gidx = gidx_calc(ridx); bool go_left = default_left; if (gidx > -1) { - go_left = Decision(node_cats, cut_values[gidx], default_left); + go_left = Decision(node_cats, cut_values[gidx]); } return go_left; } else { @@ -157,7 +157,7 @@ class PartitionBuilder { bool go_left = default_left; if (gidx > -1) { if (is_cat) { - go_left = Decision(node_cats, cut_values[gidx], default_left); + go_left = Decision(node_cats, cut_values[gidx]); } else { go_left = cut_values[gidx] <= nodes[node_in_set].split.split_value; } diff --git a/src/predictor/predict_fn.h b/src/predictor/predict_fn.h index 7ce474023e8a..5d0c175fcf65 100644 --- a/src/predictor/predict_fn.h +++ b/src/predictor/predict_fn.h @@ -18,9 +18,7 @@ inline XGBOOST_DEVICE bst_node_t GetNextNode(const RegTree::Node &node, const bs if (has_categorical && common::IsCat(cats.split_type, nid)) { auto node_categories = cats.categories.subspan(cats.node_ptr[nid].beg, cats.node_ptr[nid].size); - return common::Decision(node_categories, fvalue, node.DefaultLeft()) - ? node.LeftChild() - : node.RightChild(); + return common::Decision(node_categories, fvalue) ? node.LeftChild() : node.RightChild(); } else { return node.LeftChild() + !(fvalue < node.SplitCond()); } diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index b3b3004a2cb3..b90a7ce09f0f 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -403,8 +403,7 @@ struct GPUHistMakerDevice { go_left = data.split_node.DefaultLeft(); } else { if (data.split_type == FeatureType::kCategorical) { - go_left = common::Decision(data.node_cats.Bits(), cut_value, - data.split_node.DefaultLeft()); + go_left = common::Decision(data.node_cats.Bits(), cut_value); } else { go_left = cut_value <= data.split_node.SplitCond(); } @@ -481,7 +480,7 @@ struct GPUHistMakerDevice { if (common::IsCat(d_feature_types, position)) { auto node_cats = categories.subspan(categories_segments[position].beg, categories_segments[position].size); - go_left = common::Decision(node_cats, element, node.DefaultLeft()); + go_left = common::Decision(node_cats, element); } else { go_left = element <= node.SplitCond(); } diff --git a/tests/cpp/common/test_categorical.cc b/tests/cpp/common/test_categorical.cc index cc8eb0f7e6c4..4e6e696ecfdd 100644 --- a/tests/cpp/common/test_categorical.cc +++ b/tests/cpp/common/test_categorical.cc @@ -1,11 +1,14 @@ /*! - * Copyright 2021 by XGBoost Contributors + * Copyright 2021-2022 by XGBoost Contributors */ #include +#include +#include #include #include "../../../src/common/categorical.h" +#include "../helpers.h" namespace xgboost { namespace common { @@ -15,29 +18,76 @@ TEST(Categorical, Decision) { ASSERT_TRUE(common::InvalidCat(a)); std::vector cats(256, 0); - ASSERT_TRUE(Decision(cats, a, true)); + ASSERT_TRUE(Decision(cats, a)); // larger than size a = 256; - ASSERT_TRUE(Decision(cats, a, true)); + ASSERT_TRUE(Decision(cats, a)); // negative a = -1; - ASSERT_TRUE(Decision(cats, a, true)); + ASSERT_TRUE(Decision(cats, a)); CatBitField bits{cats}; bits.Set(0); a = -0.5; - ASSERT_TRUE(Decision(cats, a, true)); + ASSERT_TRUE(Decision(cats, a)); // round toward 0 a = 0.5; - ASSERT_FALSE(Decision(cats, a, true)); + ASSERT_FALSE(Decision(cats, a)); // valid a = 13; bits.Set(a); - ASSERT_FALSE(Decision(bits.Bits(), a, true)); + ASSERT_FALSE(Decision(bits.Bits(), a)); +} + +/** + * Test for running inference with input category greater than the one stored in tree. + */ +TEST(Categorical, MinimalSet) { + std::size_t constexpr kRows = 256, kCols = 1, kCat = 3; + std::vector types{FeatureType::kCategorical}; + auto Xy = + RandomDataGenerator{kRows, kCols, 0.0}.Type(types).MaxCategory(kCat).GenerateDMatrix(true); + + std::unique_ptr learner{Learner::Create({Xy})}; + learner->SetParam("max_depth", "1"); + learner->SetParam("tree_method", "hist"); + learner->Configure(); + learner->UpdateOneIter(0, Xy); + + Json model{Object{}}; + learner->SaveModel(&model); + auto tree = model["learner"]["gradient_booster"]["model"]["trees"][0]; + ASSERT_GE(get(tree["categories"]).size(), 1); + auto v = get(tree["categories"])[0]; + + HostDeviceVector predt; + { + std::vector data{static_cast(kCat), + static_cast(kCat + 1), 32.0f, 33.0f, 34.0f}; + auto test = GetDMatrixFromData(data, data.size(), kCols); + learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true); + ASSERT_EQ(predt.Size(), data.size()); + auto const& h_predt = predt.ConstHostSpan(); + for (auto v : h_predt) { + ASSERT_EQ(v, 1); // left child of root node + } + } + + { + std::unique_ptr learner{Learner::Create({Xy})}; + learner->LoadModel(model); + std::vector data = {static_cast(v)}; + auto test = GetDMatrixFromData(data, data.size(), kCols); + learner->Predict(test, false, &predt, 0, 0, false, /*pred_leaf=*/true); + auto const& h_predt = predt.ConstHostSpan(); + for (auto v : h_predt) { + ASSERT_EQ(v, 2); // right child of root node + } + } } } // namespace common } // namespace xgboost diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index adbbcd02f93c..9f1c6462520c 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -1029,9 +1029,9 @@ def test_pandas_input(): clf_isotonic = CalibratedClassifierCV(model, cv="prefit", method="isotonic") clf_isotonic.fit(train, target) - assert isinstance( - clf_isotonic.calibrated_classifiers_[0].estimator, xgb.XGBClassifier - ) + clf = clf_isotonic.calibrated_classifiers_[0] + est = clf.estimator if hasattr(clf, "estimator") else clf.base_estimator + assert isinstance(est, xgb.XGBClassifier) np.testing.assert_allclose(np.array(clf_isotonic.classes_), np.array([0, 1])) From a2085bf2233de818ad919943514049f857af5c30 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 6 Jan 2023 02:28:21 +0800 Subject: [PATCH 028/126] [backport] Fix loading GPU pickle with a CPU-only xgboost distribution. (#8632) (#8641) We can handle loading the pickle on a CPU-only machine if the XGBoost is built with CUDA enabled (Linux and Windows PyPI package), but not if the distribution is CPU-only (macOS PyPI package). --- src/gbm/gbtree.cc | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/src/gbm/gbtree.cc b/src/gbm/gbtree.cc index e9e888a98e46..92b54625d150 100644 --- a/src/gbm/gbtree.cc +++ b/src/gbm/gbtree.cc @@ -28,6 +28,7 @@ #include "xgboost/logging.h" #include "xgboost/objective.h" #include "xgboost/predictor.h" +#include "xgboost/string_view.h" #include "xgboost/tree_updater.h" namespace xgboost { @@ -395,23 +396,36 @@ void GBTree::LoadConfig(Json const& in) { tparam_.process_type = TreeProcessType::kDefault; int32_t const n_gpus = xgboost::common::AllVisibleGPUs(); if (n_gpus == 0 && tparam_.predictor == PredictorType::kGPUPredictor) { - LOG(WARNING) - << "Loading from a raw memory buffer on CPU only machine. " - "Changing predictor to auto."; + LOG(WARNING) << "Loading from a raw memory buffer on CPU only machine. " + "Changing predictor to auto."; tparam_.UpdateAllowUnknown(Args{{"predictor", "auto"}}); } + + auto msg = StringView{ + R"( + Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only + machine. Consider using `save_model/load_model` instead. See: + + https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html + + for more details about differences between saving model and serializing.)"}; + if (n_gpus == 0 && tparam_.tree_method == TreeMethod::kGPUHist) { tparam_.UpdateAllowUnknown(Args{{"tree_method", "hist"}}); - LOG(WARNING) - << "Loading from a raw memory buffer on CPU only machine. " - "Changing tree_method to hist."; + LOG(WARNING) << msg << " Changing `tree_method` to `hist`."; } auto const& j_updaters = get(in["updater"]); updaters_.clear(); + for (auto const& kv : j_updaters) { - std::unique_ptr up( - TreeUpdater::Create(kv.first, ctx_, model_.learner_model_param->task)); + auto name = kv.first; + if (n_gpus == 0 && name == "grow_gpu_hist") { + name = "grow_quantile_histmaker"; + LOG(WARNING) << "Changing updater from `grow_gpu_hist` to `grow_quantile_histmaker`."; + } + std::unique_ptr up{ + TreeUpdater::Create(name, ctx_, model_.learner_model_param->task)}; up->LoadConfig(kv.second); updaters_.push_back(std::move(up)); } From 899e4c8988a82528e9457ca4d9ed4793c64f16c5 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 6 Jan 2023 02:28:39 +0800 Subject: [PATCH 029/126] [backport] Do not return internal value for `get_params`. (#8634) (#8642) --- python-package/xgboost/sklearn.py | 40 +++------------- tests/python/test_with_pandas.py | 1 - tests/python/test_with_sklearn.py | 77 ++++++++++++++++++++++++++----- 3 files changed, 73 insertions(+), 45 deletions(-) diff --git a/python-package/xgboost/sklearn.py b/python-package/xgboost/sklearn.py index 5a873ab88be3..49687e2fbf6c 100644 --- a/python-package/xgboost/sklearn.py +++ b/python-package/xgboost/sklearn.py @@ -674,7 +674,7 @@ def set_params(self, **params: Any) -> "XGBModel": self.kwargs = {} self.kwargs[key] = value - if hasattr(self, "_Booster"): + if self.__sklearn_is_fitted__(): parameters = self.get_xgb_params() self.get_booster().set_param(parameters) @@ -701,39 +701,12 @@ def get_params(self, deep: bool = True) -> Dict[str, Any]: np.iinfo(np.int32).max ) - def parse_parameter(value: Any) -> Optional[Union[int, float, str]]: - for t in (int, float, str): - try: - ret = t(value) - return ret - except ValueError: - continue - return None - - # Get internal parameter values - try: - config = json.loads(self.get_booster().save_config()) - stack = [config] - internal = {} - while stack: - obj = stack.pop() - for k, v in obj.items(): - if k.endswith("_param"): - for p_k, p_v in v.items(): - internal[p_k] = p_v - elif isinstance(v, dict): - stack.append(v) - - for k, v in internal.items(): - if k in params and params[k] is None: - params[k] = parse_parameter(v) - except ValueError: - pass return params def get_xgb_params(self) -> Dict[str, Any]: """Get xgboost specific parameters.""" - params = self.get_params() + params: Dict[str, Any] = self.get_params() + # Parameters that should not go into native learner. wrapper_specific = { "importance_type", @@ -750,6 +723,7 @@ def get_xgb_params(self) -> Dict[str, Any]: for k, v in params.items(): if k not in wrapper_specific and not callable(v): filtered[k] = v + return filtered def get_num_boosting_rounds(self) -> int: @@ -1070,7 +1044,7 @@ def _can_use_inplace_predict(self) -> bool: # error with incompatible data type. # Inplace predict doesn't handle as many data types as DMatrix, but it's # sufficient for dask interface where input is simpiler. - predictor = self.get_params().get("predictor", None) + predictor = self.get_xgb_params().get("predictor", None) if predictor in ("auto", None) and self.booster != "gblinear": return True return False @@ -1336,7 +1310,7 @@ def coef_(self) -> np.ndarray: ------- coef_ : array of shape ``[n_features]`` or ``[n_classes, n_features]`` """ - if self.get_params()["booster"] != "gblinear": + if self.get_xgb_params()["booster"] != "gblinear": raise AttributeError( f"Coefficients are not defined for Booster type {self.booster}" ) @@ -1366,7 +1340,7 @@ def intercept_(self) -> np.ndarray: ------- intercept_ : array of shape ``(1,)`` or ``[n_classes]`` """ - if self.get_params()["booster"] != "gblinear": + if self.get_xgb_params()["booster"] != "gblinear": raise AttributeError( f"Intercept (bias) is not defined for Booster type {self.booster}" ) diff --git a/tests/python/test_with_pandas.py b/tests/python/test_with_pandas.py index e4289c1cdcef..84352d15143e 100644 --- a/tests/python/test_with_pandas.py +++ b/tests/python/test_with_pandas.py @@ -112,7 +112,6 @@ def test_pandas(self): # test Index as columns df = pd.DataFrame([[1, 1.1], [2, 2.2]], columns=pd.Index([1, 2])) - print(df.columns, isinstance(df.columns, pd.Index)) Xy = xgb.DMatrix(df) np.testing.assert_equal(np.array(Xy.feature_names), np.array(["1", "2"])) diff --git a/tests/python/test_with_sklearn.py b/tests/python/test_with_sklearn.py index 9f1c6462520c..62b3fe6b56a6 100644 --- a/tests/python/test_with_sklearn.py +++ b/tests/python/test_with_sklearn.py @@ -2,6 +2,7 @@ import importlib.util import json import os +import pickle import random import tempfile from typing import Callable, Optional @@ -636,26 +637,74 @@ def test_sklearn_n_jobs(): def test_parameters_access(): from sklearn import datasets - params = {'updater': 'grow_gpu_hist', 'subsample': .5, 'n_jobs': -1} + + params = {"updater": "grow_gpu_hist", "subsample": 0.5, "n_jobs": -1} clf = xgb.XGBClassifier(n_estimators=1000, **params) - assert clf.get_params()['updater'] == 'grow_gpu_hist' - assert clf.get_params()['subsample'] == .5 - assert clf.get_params()['n_estimators'] == 1000 + assert clf.get_params()["updater"] == "grow_gpu_hist" + assert clf.get_params()["subsample"] == 0.5 + assert clf.get_params()["n_estimators"] == 1000 clf = xgb.XGBClassifier(n_estimators=1, nthread=4) X, y = datasets.load_iris(return_X_y=True) clf.fit(X, y) config = json.loads(clf.get_booster().save_config()) - assert int(config['learner']['generic_param']['nthread']) == 4 + assert int(config["learner"]["generic_param"]["nthread"]) == 4 clf.set_params(nthread=16) config = json.loads(clf.get_booster().save_config()) - assert int(config['learner']['generic_param']['nthread']) == 16 + assert int(config["learner"]["generic_param"]["nthread"]) == 16 clf.predict(X) config = json.loads(clf.get_booster().save_config()) - assert int(config['learner']['generic_param']['nthread']) == 16 + assert int(config["learner"]["generic_param"]["nthread"]) == 16 + + clf = xgb.XGBClassifier(n_estimators=2) + assert clf.tree_method is None + assert clf.get_params()["tree_method"] is None + clf.fit(X, y) + assert clf.get_params()["tree_method"] is None + + def save_load(clf: xgb.XGBClassifier) -> xgb.XGBClassifier: + with tempfile.TemporaryDirectory() as tmpdir: + path = os.path.join(tmpdir, "model.json") + clf.save_model(path) + clf = xgb.XGBClassifier() + clf.load_model(path) + return clf + + def get_tm(clf: xgb.XGBClassifier) -> str: + tm = json.loads(clf.get_booster().save_config())["learner"]["gradient_booster"][ + "gbtree_train_param" + ]["tree_method"] + return tm + + assert get_tm(clf) == "exact" + + clf = pickle.loads(pickle.dumps(clf)) + + assert clf.tree_method is None + assert clf.n_estimators == 2 + assert clf.get_params()["tree_method"] is None + assert clf.get_params()["n_estimators"] == 2 + assert get_tm(clf) == "exact" # preserved for pickle + + clf = save_load(clf) + + assert clf.tree_method is None + assert clf.n_estimators == 2 + assert clf.get_params()["tree_method"] is None + assert clf.get_params()["n_estimators"] == 2 + assert get_tm(clf) == "auto" # discarded for save/load_model + + clf.set_params(tree_method="hist") + assert clf.get_params()["tree_method"] == "hist" + clf = pickle.loads(pickle.dumps(clf)) + assert clf.get_params()["tree_method"] == "hist" + clf = save_load(clf) + # FIXME(jiamingy): We should remove this behavior once we remove parameters + # serialization for skl save/load_model. + assert clf.get_params()["tree_method"] == "hist" def test_kwargs_error(): @@ -695,13 +744,19 @@ def test_sklearn_clone(): def test_sklearn_get_default_params(): from sklearn.datasets import load_digits + digits_2class = load_digits(n_class=2) - X = digits_2class['data'] - y = digits_2class['target'] + X = digits_2class["data"] + y = digits_2class["target"] cls = xgb.XGBClassifier() - assert cls.get_params()['base_score'] is None + assert cls.get_params()["base_score"] is None cls.fit(X[:4, ...], y[:4, ...]) - assert cls.get_params()['base_score'] is not None + base_score = float( + json.loads(cls.get_booster().save_config())["learner"]["learner_model_param"][ + "base_score" + ] + ) + np.testing.assert_equal(base_score, 0.5) def run_validation_weights(model): From dd58c2ac472b0aa442db63cb18d52379baba567f Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 6 Jan 2023 17:55:51 +0800 Subject: [PATCH 030/126] Bump version to 1.7.3. (#8646) --- CMakeLists.txt | 2 +- R-package/configure | 18 +++++++++--------- R-package/configure.ac | 2 +- include/xgboost/version_config.h | 2 +- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 8 ++++---- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- python-package/xgboost/VERSION | 2 +- 12 files changed, 29 insertions(+), 29 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 0509f18ed881..089e8a2a4dcf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) -project(xgboost LANGUAGES CXX C VERSION 1.7.2) +project(xgboost LANGUAGES CXX C VERSION 1.7.3) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") cmake_policy(SET CMP0022 NEW) diff --git a/R-package/configure b/R-package/configure index a89b42b24b70..f9d27803b903 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for xgboost 1.7.2. +# Generated by GNU Autoconf 2.69 for xgboost 1.7.3. # # # Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. @@ -576,8 +576,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='1.7.2' -PACKAGE_STRING='xgboost 1.7.2' +PACKAGE_VERSION='1.7.3' +PACKAGE_STRING='xgboost 1.7.3' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1195,7 +1195,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 1.7.2 to adapt to many kinds of systems. +\`configure' configures xgboost 1.7.3 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1257,7 +1257,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 1.7.2:";; + short | recursive ) echo "Configuration of xgboost 1.7.3:";; esac cat <<\_ACEOF @@ -1336,7 +1336,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 1.7.2 +xgboost configure 1.7.3 generated by GNU Autoconf 2.69 Copyright (C) 2012 Free Software Foundation, Inc. @@ -1479,7 +1479,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 1.7.2, which was +It was created by xgboost $as_me 1.7.3, which was generated by GNU Autoconf 2.69. Invocation command line was $ $0 $@ @@ -3294,7 +3294,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 1.7.2, which was +This file was extended by xgboost $as_me 1.7.3, which was generated by GNU Autoconf 2.69. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3347,7 +3347,7 @@ _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" ac_cs_version="\\ -xgboost config.status 1.7.2 +xgboost config.status 1.7.3 configured by $0, generated by GNU Autoconf 2.69, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index d99e5b8dab3c..e5d2396e2fbb 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[1.7.2],[],[xgboost],[]) +AC_INIT([xgboost],[1.7.3],[],[xgboost],[]) # Use this line to set CC variable to a C compiler AC_PROG_CC diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index 0e0bcb98e46c..68e748daec6e 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MINOR 7 -#define XGBOOST_VER_PATCH 2 +#define XGBOOST_VER_PATCH 3 #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index d18ba5301150..81ada460db7b 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.2 + 1.7.3 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index bce7bd211c9b..71a7f3ca8572 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.2 + 1.7.3 xgboost4j-example_2.12 - 1.7.2 + 1.7.3 jar @@ -26,7 +26,7 @@ ml.dmlc xgboost4j-spark_${scala.binary.version} - 1.7.2 + 1.7.3 org.apache.spark @@ -37,7 +37,7 @@ ml.dmlc xgboost4j-flink_${scala.binary.version} - 1.7.2 + 1.7.3 org.apache.commons diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index dd210574df62..8fb31cca0e31 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.2 + 1.7.3 xgboost4j-flink_2.12 - 1.7.2 + 1.7.3 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.2 + 1.7.3 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 813fdaa93648..6d440e8a10f1 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.2 + 1.7.3 xgboost4j-gpu_2.12 - 1.7.2 + 1.7.3 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 0d42350c59a0..84fa5ad3d47c 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.2 + 1.7.3 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.2 + 1.7.3 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 48201dc48484..f41e643cdaee 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.2 + 1.7.3 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.2 + 1.7.3 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 3300a0d37a40..9f2a2a86969b 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.2 + 1.7.3 xgboost4j_2.12 - 1.7.2 + 1.7.3 jar diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index f8a696c8dc56..661e7aeadf36 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -1.7.2 +1.7.3 From ccf43d4ba0a94e2f0a3cc5a526197539ae46f410 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 6 Jan 2023 20:34:05 +0800 Subject: [PATCH 031/126] Bump R package version to 1.7.3. (#8649) --- R-package/DESCRIPTION | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index ef739646a8fa..8c372642968a 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 1.7.2.1 -Date: 2022-12-08 +Version: 1.7.3.1 +Date: 2023-01-06 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), @@ -66,5 +66,5 @@ Imports: methods, data.table (>= 1.9.6), jsonlite (>= 1.0), -RoxygenNote: 7.2.1 +RoxygenNote: 7.2.2 SystemRequirements: GNU make, C++14 From e803d06d8c983858897bda052c303e932372d54a Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Fri, 13 Jan 2023 04:55:29 +0800 Subject: [PATCH 032/126] [backport] [R] Remove unused assert definition. (#8526) (#8668) --- R-package/src/Makevars.in | 1 - R-package/src/Makevars.win | 1 - R-package/src/xgboost_assert.c | 26 -------------------------- 3 files changed, 28 deletions(-) delete mode 100644 R-package/src/xgboost_assert.c diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 54f3acaa52bb..9b2610638366 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -23,7 +23,6 @@ PKG_LIBS = @OPENMP_CXXFLAGS@ @OPENMP_LIB@ @ENDIAN_FLAG@ @BACKTRACE_LIB@ -pthread OBJECTS= \ ./xgboost_R.o \ ./xgboost_custom.o \ - ./xgboost_assert.o \ ./init.o \ $(PKGROOT)/src/metric/metric.o \ $(PKGROOT)/src/metric/elementwise_metric.o \ diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index c0815353203c..05399ad4d17b 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -23,7 +23,6 @@ PKG_LIBS = $(SHLIB_OPENMP_CXXFLAGS) -DDMLC_CMAKE_LITTLE_ENDIAN=1 $(SHLIB_PTHRE OBJECTS= \ ./xgboost_R.o \ ./xgboost_custom.o \ - ./xgboost_assert.o \ ./init.o \ $(PKGROOT)/src/metric/metric.o \ $(PKGROOT)/src/metric/elementwise_metric.o \ diff --git a/R-package/src/xgboost_assert.c b/R-package/src/xgboost_assert.c deleted file mode 100644 index 4706a039e8a6..000000000000 --- a/R-package/src/xgboost_assert.c +++ /dev/null @@ -1,26 +0,0 @@ -// Copyright (c) 2014 by Contributors -#include -#include -#include - -// implements error handling -void XGBoostAssert_R(int exp, const char *fmt, ...) { - char buf[1024]; - if (exp == 0) { - va_list args; - va_start(args, fmt); - vsprintf(buf, fmt, args); - va_end(args); - error("AssertError:%s\n", buf); - } -} -void XGBoostCheck_R(int exp, const char *fmt, ...) { - char buf[1024]; - if (exp == 0) { - va_list args; - va_start(args, fmt); - vsprintf(buf, fmt, args); - va_end(args); - error("%s\n", buf); - } -} From 10bb0a74efd8aef2ada0ecf46d41c09920f3d0ad Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 14 Jan 2023 06:40:17 +0800 Subject: [PATCH 033/126] [backport] [CI] Skip pyspark sparse tests. (#8675) (#8678) --- tests/ci_build/conda_env/cpu_test.yml | 5 +++-- tests/python/test_spark/test_spark_local.py | 12 ++++++++++++ 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/tests/ci_build/conda_env/cpu_test.yml b/tests/ci_build/conda_env/cpu_test.yml index 98c7a5928892..4d74c5832e97 100644 --- a/tests/ci_build/conda_env/cpu_test.yml +++ b/tests/ci_build/conda_env/cpu_test.yml @@ -36,7 +36,8 @@ dependencies: - cloudpickle - shap - modin +# TODO: Replace it with pyspark>=3.4 once 3.4 released. +# - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz +- pyspark>=3.3.1 - pip: - datatable - # TODO: Replace it with pyspark>=3.4 once 3.4 released. - - https://ml-team-public-read.s3.us-west-2.amazonaws.com/pyspark-3.4.0.dev0.tar.gz diff --git a/tests/python/test_spark/test_spark_local.py b/tests/python/test_spark/test_spark_local.py index 6757d2e34e5c..574e96388737 100644 --- a/tests/python/test_spark/test_spark_local.py +++ b/tests/python/test_spark/test_spark_local.py @@ -41,6 +41,16 @@ pytestmark = testing.timeout(60) +def no_sparse_unwrap(): + try: + from pyspark.sql.functions import unwrap_udt + + except ImportError: + return {"reason": "PySpark<3.4", "condition": True} + + return {"reason": "PySpark<3.4", "condition": False} + + class XgboostLocalTest(SparkTestCase): def setUp(self): logging.getLogger().setLevel("INFO") @@ -985,6 +995,7 @@ def test_classifier_with_feature_names_types_weights(self): model = classifier.fit(self.cls_df_train) model.transform(self.cls_df_test).collect() + @pytest.mark.skipif(**no_sparse_unwrap()) def test_regressor_with_sparse_optim(self): regressor = SparkXGBRegressor(missing=0.0) model = regressor.fit(self.reg_df_sparse_train) @@ -1001,6 +1012,7 @@ def test_regressor_with_sparse_optim(self): for row1, row2 in zip(pred_result, pred_result2): self.assertTrue(np.isclose(row1.prediction, row2.prediction, atol=1e-3)) + @pytest.mark.skipif(**no_sparse_unwrap()) def test_classifier_with_sparse_optim(self): cls = SparkXGBClassifier(missing=0.0) model = cls.fit(self.cls_df_sparse_train) From e5bef4ffcede7078cdf378938fedcd5faac85f38 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 14 Jan 2023 18:46:04 +0800 Subject: [PATCH 034/126] [backport] Fix threads in DMatrix slice. (#8667) (#8679) --- src/data/simple_dmatrix.cc | 1 + tests/cpp/data/test_simple_dmatrix.cc | 29 +++++++++++++++++++++------ 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/src/data/simple_dmatrix.cc b/src/data/simple_dmatrix.cc index 4679ef543ea2..c2a69a204db0 100644 --- a/src/data/simple_dmatrix.cc +++ b/src/data/simple_dmatrix.cc @@ -42,6 +42,7 @@ DMatrix* SimpleDMatrix::Slice(common::Span ridxs) { out->Info() = this->Info().Slice(ridxs); out->Info().num_nonzero_ = h_offset.back(); } + out->ctx_ = this->ctx_; return out; } diff --git a/tests/cpp/data/test_simple_dmatrix.cc b/tests/cpp/data/test_simple_dmatrix.cc index 266115731fd6..198663872fc3 100644 --- a/tests/cpp/data/test_simple_dmatrix.cc +++ b/tests/cpp/data/test_simple_dmatrix.cc @@ -1,13 +1,19 @@ -// Copyright by Contributors +/** + * Copyright 2016-2023 by XGBoost Contributors + */ #include -#include +#include // std::array +#include // std::numeric_limits +#include // std::unique_ptr -#include "../../../src/data/adapter.h" -#include "../../../src/data/simple_dmatrix.h" -#include "../filesystem.h" // dmlc::TemporaryDirectory -#include "../helpers.h" +#include "../../../src/data/adapter.h" // ArrayAdapter +#include "../../../src/data/simple_dmatrix.h" // SimpleDMatrix +#include "../filesystem.h" // dmlc::TemporaryDirectory +#include "../helpers.h" // RandomDataGenerator,CreateSimpleTestData #include "xgboost/base.h" +#include "xgboost/host_device_vector.h" // HostDeviceVector +#include "xgboost/string_view.h" // StringView using namespace xgboost; // NOLINT @@ -298,6 +304,17 @@ TEST(SimpleDMatrix, Slice) { ASSERT_EQ(out->Info().num_col_, out->Info().num_col_); ASSERT_EQ(out->Info().num_row_, ridxs.size()); ASSERT_EQ(out->Info().num_nonzero_, ridxs.size() * kCols); // dense + + { + HostDeviceVector data; + auto arr_str = RandomDataGenerator{kRows, kCols, 0.0}.GenerateArrayInterface(&data); + auto adapter = data::ArrayAdapter{StringView{arr_str}}; + auto n_threads = 2; + std::unique_ptr p_fmat{ + DMatrix::Create(&adapter, std::numeric_limits::quiet_NaN(), n_threads, "")}; + std::unique_ptr slice{p_fmat->Slice(ridxs)}; + ASSERT_LE(slice->Ctx()->Threads(), n_threads); + } } TEST(SimpleDMatrix, SaveLoadBinary) { From 021e6a842a97e4b773b14f35d630f132595455af Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sat, 14 Jan 2023 18:46:59 +0800 Subject: [PATCH 035/126] [backport] [R] Get CXX flags from R CMD config. (#8669) (#8680) --- R-package/configure | 1803 +++++++++++++++++++++------------------- R-package/configure.ac | 19 +- 2 files changed, 979 insertions(+), 843 deletions(-) diff --git a/R-package/configure b/R-package/configure index f9d27803b903..2a4153f6f2fa 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,9 +1,10 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.69 for xgboost 1.7.3. +# Generated by GNU Autoconf 2.71 for xgboost 1.7.3. # # -# Copyright (C) 1992-1996, 1998-2012 Free Software Foundation, Inc. +# Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, +# Inc. # # # This configure script is free software; the Free Software Foundation @@ -14,14 +15,16 @@ # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else +else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( @@ -31,46 +34,46 @@ esac fi + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then +if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || @@ -79,13 +82,6 @@ if test "${PATH_SEPARATOR+set}" != set; then fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( @@ -94,8 +90,12 @@ case $0 in #(( for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS @@ -107,30 +107,10 @@ if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH # Use a proper internal environment variable to ensure we don't fall # into an infinite loop, continuously re-executing ourselves. @@ -152,20 +132,22 @@ esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 -as_fn_exit 255 +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 +exit 255 fi # We don't want this to propagate to other subprocesses. { _as_can_reexec=; unset _as_can_reexec;} if test "x$CONFIG_SHELL" = x; then - as_bourne_compatible="if test -n \"\${ZSH_VERSION+set}\" && (emulate sh) >/dev/null 2>&1; then : + as_bourne_compatible="as_nop=: +if test \${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on \${1+\"\$@\"}, which # is contrary to our usage. Disable this feature. alias -g '\${1+\"\$@\"}'='\"\$@\"' setopt NO_GLOB_SUBST -else +else \$as_nop case \`(set -o) 2>/dev/null\` in #( *posix*) : set -o posix ;; #( @@ -185,41 +167,52 @@ as_fn_success || { exitcode=1; echo as_fn_success failed.; } as_fn_failure && { exitcode=1; echo as_fn_failure succeeded.; } as_fn_ret_success || { exitcode=1; echo as_fn_ret_success failed.; } as_fn_ret_failure && { exitcode=1; echo as_fn_ret_failure succeeded.; } -if ( set x; as_fn_ret_success y && test x = \"\$1\" ); then : +if ( set x; as_fn_ret_success y && test x = \"\$1\" ) +then : -else +else \$as_nop exitcode=1; echo positional parameters were not saved. fi test x\$exitcode = x0 || exit 1 +blah=\$(echo \$(echo blah)) +test x\"\$blah\" = xblah || exit 1 test -x / || exit 1" as_suggested=" as_lineno_1=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_1a=\$LINENO as_lineno_2=";as_suggested=$as_suggested$LINENO;as_suggested=$as_suggested" as_lineno_2a=\$LINENO eval 'test \"x\$as_lineno_1'\$as_run'\" != \"x\$as_lineno_2'\$as_run'\" && test \"x\`expr \$as_lineno_1'\$as_run' + 1\`\" = \"x\$as_lineno_2'\$as_run'\"' || exit 1" - if (eval "$as_required") 2>/dev/null; then : + if (eval "$as_required") 2>/dev/null +then : as_have_required=yes -else +else $as_nop as_have_required=no fi - if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null; then : + if test x$as_have_required = xyes && (eval "$as_suggested") 2>/dev/null +then : -else +else $as_nop as_save_IFS=$IFS; IFS=$PATH_SEPARATOR as_found=false for as_dir in /bin$PATH_SEPARATOR/usr/bin$PATH_SEPARATOR$PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac as_found=: case $as_dir in #( /*) for as_base in sh bash ksh sh5; do # Try only shells that exist, to save several forks. - as_shell=$as_dir/$as_base + as_shell=$as_dir$as_base if { test -f "$as_shell" || test -f "$as_shell.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$as_shell"; } 2>/dev/null; then : + as_run=a "$as_shell" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : CONFIG_SHELL=$as_shell as_have_required=yes - if { $as_echo "$as_bourne_compatible""$as_suggested" | as_run=a "$as_shell"; } 2>/dev/null; then : + if as_run=a "$as_shell" -c "$as_bourne_compatible""$as_suggested" 2>/dev/null +then : break 2 fi fi @@ -227,14 +220,21 @@ fi esac as_found=false done -$as_found || { if { test -f "$SHELL" || test -f "$SHELL.exe"; } && - { $as_echo "$as_bourne_compatible""$as_required" | as_run=a "$SHELL"; } 2>/dev/null; then : - CONFIG_SHELL=$SHELL as_have_required=yes -fi; } IFS=$as_save_IFS +if $as_found +then : + +else $as_nop + if { test -f "$SHELL" || test -f "$SHELL.exe"; } && + as_run=a "$SHELL" -c "$as_bourne_compatible""$as_required" 2>/dev/null +then : + CONFIG_SHELL=$SHELL as_have_required=yes +fi +fi - if test "x$CONFIG_SHELL" != x; then : + if test "x$CONFIG_SHELL" != x +then : export CONFIG_SHELL # We cannot yet assume a decent shell, so we have to provide a # neutralization value for shells without unset; and this also @@ -252,18 +252,19 @@ esac exec $CONFIG_SHELL $as_opts "$as_myself" ${1+"$@"} # Admittedly, this is quite paranoid, since all the known shells bail # out after a failed `exec'. -$as_echo "$0: could not re-execute with $CONFIG_SHELL" >&2 +printf "%s\n" "$0: could not re-execute with $CONFIG_SHELL" >&2 exit 255 fi - if test x$as_have_required = xno; then : - $as_echo "$0: This script requires a shell more modern than all" - $as_echo "$0: the shells that I found on your system." - if test x${ZSH_VERSION+set} = xset ; then - $as_echo "$0: In particular, zsh $ZSH_VERSION has bugs and should" - $as_echo "$0: be upgraded to zsh 4.3.4 or later." + if test x$as_have_required = xno +then : + printf "%s\n" "$0: This script requires a shell more modern than all" + printf "%s\n" "$0: the shells that I found on your system." + if test ${ZSH_VERSION+y} ; then + printf "%s\n" "$0: In particular, zsh $ZSH_VERSION has bugs and should" + printf "%s\n" "$0: be upgraded to zsh 4.3.4 or later." else - $as_echo "$0: Please tell bug-autoconf@gnu.org about your system, + printf "%s\n" "$0: Please tell bug-autoconf@gnu.org about your system, $0: including any error possibly output before this $0: message. Then install a modern shell, or manually run $0: the script under such a shell if you do have one." @@ -290,6 +291,7 @@ as_fn_unset () } as_unset=as_fn_unset + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -307,6 +309,14 @@ as_fn_exit () as_fn_set_status $1 exit $1 } # as_fn_exit +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_mkdir_p # ------------- @@ -321,7 +331,7 @@ as_fn_mkdir_p () as_dirs= while :; do case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" @@ -330,7 +340,7 @@ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | +printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -369,12 +379,13 @@ as_fn_executable_p () # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : eval 'as_fn_append () { eval $1+=\$2 }' -else +else $as_nop as_fn_append () { eval $1=\$$1\$2 @@ -386,18 +397,27 @@ fi # as_fn_append # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : eval 'as_fn_arith () { as_val=$(( $* )) }' -else +else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` } fi # as_fn_arith +# as_fn_nop +# --------- +# Do nothing but, unlike ":", preserve the value of $?. +as_fn_nop () +{ + return $? +} +as_nop=as_fn_nop # as_fn_error STATUS ERROR [LINENO LOG_FD] # ---------------------------------------- @@ -409,9 +429,9 @@ as_fn_error () as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi - $as_echo "$as_me: error: $2" >&2 + printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error @@ -438,7 +458,7 @@ as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | +printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -482,7 +502,7 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits s/-\n.*// ' >$as_me.lineno && chmod +x "$as_me.lineno" || - { $as_echo "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } + { printf "%s\n" "$as_me: error: cannot create $as_me.lineno; rerun with a POSIX shell" >&2; as_fn_exit 1; } # If we had to re-execute with $CONFIG_SHELL, we're ensured to have # already done that, so ensure we don't try to do so again and fall @@ -496,6 +516,10 @@ as_cr_alnum=$as_cr_Letters$as_cr_digits exit } + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) @@ -509,6 +533,13 @@ case `echo -n x` in #((((( ECHO_N='-n';; esac +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -589,11 +620,11 @@ OPENMP_LIB OPENMP_CXXFLAGS OBJEXT EXEEXT -ac_ct_CC +ac_ct_CXX CPPFLAGS LDFLAGS -CFLAGS -CC +CXXFLAGS +CXX target_alias host_alias build_alias @@ -640,11 +671,12 @@ enable_option_checking ac_precious_vars='build_alias host_alias target_alias -CC -CFLAGS +CXX +CXXFLAGS LDFLAGS LIBS -CPPFLAGS' +CPPFLAGS +CCC' # Initialize some variables set by options. @@ -713,8 +745,6 @@ do *) ac_optarg=yes ;; esac - # Accept the important Cygnus configure options, so we can diagnose typos. - case $ac_dashdash$ac_option in --) ac_dashdash=yes ;; @@ -755,9 +785,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*disable-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" @@ -781,9 +811,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*enable-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid feature name: $ac_useropt" + as_fn_error $? "invalid feature name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "enable_$ac_useropt" @@ -994,9 +1024,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*with-\([^=]*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" @@ -1010,9 +1040,9 @@ do ac_useropt=`expr "x$ac_option" : 'x-*without-\(.*\)'` # Reject names that are not valid shell variable names. expr "x$ac_useropt" : ".*[^-+._$as_cr_alnum]" >/dev/null && - as_fn_error $? "invalid package name: $ac_useropt" + as_fn_error $? "invalid package name: \`$ac_useropt'" ac_useropt_orig=$ac_useropt - ac_useropt=`$as_echo "$ac_useropt" | sed 's/[-+.]/_/g'` + ac_useropt=`printf "%s\n" "$ac_useropt" | sed 's/[-+.]/_/g'` case $ac_user_opts in *" "with_$ac_useropt" @@ -1056,9 +1086,9 @@ Try \`$0 --help' for more information" *) # FIXME: should be removed in autoconf 3.0. - $as_echo "$as_me: WARNING: you should use --build, --host, --target" >&2 + printf "%s\n" "$as_me: WARNING: you should use --build, --host, --target" >&2 expr "x$ac_option" : ".*[^-._$as_cr_alnum]" >/dev/null && - $as_echo "$as_me: WARNING: invalid host type: $ac_option" >&2 + printf "%s\n" "$as_me: WARNING: invalid host type: $ac_option" >&2 : "${build_alias=$ac_option} ${host_alias=$ac_option} ${target_alias=$ac_option}" ;; @@ -1074,7 +1104,7 @@ if test -n "$ac_unrecognized_opts"; then case $enable_option_checking in no) ;; fatal) as_fn_error $? "unrecognized options: $ac_unrecognized_opts" ;; - *) $as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; + *) printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2 ;; esac fi @@ -1138,7 +1168,7 @@ $as_expr X"$as_myself" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_myself" : 'X\(//\)[^/]' \| \ X"$as_myself" : 'X\(//\)$' \| \ X"$as_myself" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_myself" | +printf "%s\n" X"$as_myself" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -1262,8 +1292,8 @@ if test -n "$ac_init_help"; then cat <<\_ACEOF Some influential environment variables: - CC C compiler command - CFLAGS C compiler flags + CXX C++ compiler command + CXXFLAGS C++ compiler flags LDFLAGS linker flags, e.g. -L if you have libraries in a nonstandard directory LIBS libraries to pass to the linker, e.g. -l @@ -1289,9 +1319,9 @@ if test "$ac_init_help" = "recursive"; then case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; @@ -1319,7 +1349,8 @@ esac ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix cd "$ac_dir" || { ac_status=$?; continue; } - # Check for guested configure. + # Check for configure.gnu first; this name is used for a wrapper for + # Metaconfig's "Configure" on case-insensitive file systems. if test -f "$ac_srcdir/configure.gnu"; then echo && $SHELL "$ac_srcdir/configure.gnu" --help=recursive @@ -1327,7 +1358,7 @@ ac_abs_srcdir=$ac_abs_top_srcdir$ac_dir_suffix echo && $SHELL "$ac_srcdir/configure" --help=recursive else - $as_echo "$as_me: WARNING: no configuration information is in $ac_dir" >&2 + printf "%s\n" "$as_me: WARNING: no configuration information is in $ac_dir" >&2 fi || ac_status=$? cd "$ac_pwd" || { ac_status=$?; break; } done @@ -1337,9 +1368,9 @@ test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF xgboost configure 1.7.3 -generated by GNU Autoconf 2.69 +generated by GNU Autoconf 2.71 -Copyright (C) 2012 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This configure script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it. _ACEOF @@ -1350,20 +1381,20 @@ fi ## Autoconf initialization. ## ## ------------------------ ## -# ac_fn_c_try_compile LINENO -# -------------------------- +# ac_fn_cxx_try_compile LINENO +# ---------------------------- # Try to compile conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_compile () +ac_fn_cxx_try_compile () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext + rm -f conftest.$ac_objext conftest.beam if { { ac_try="$ac_compile" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1371,14 +1402,15 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || + test -z "$ac_cxx_werror_flag" || test ! -s conftest.err - } && test -s conftest.$ac_objext; then : + } && test -s conftest.$ac_objext +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1386,22 +1418,22 @@ fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval -} # ac_fn_c_try_compile +} # ac_fn_cxx_try_compile -# ac_fn_c_try_link LINENO -# ----------------------- +# ac_fn_cxx_try_link LINENO +# ------------------------- # Try to link conftest.$ac_ext, and return whether this succeeded. -ac_fn_c_try_link () +ac_fn_cxx_try_link () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - rm -f conftest.$ac_objext conftest$ac_exeext + rm -f conftest.$ac_objext conftest.beam conftest$ac_exeext if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -1409,17 +1441,18 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 mv -f conftest.er1 conftest.err fi - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { - test -z "$ac_c_werror_flag" || + test -z "$ac_cxx_werror_flag" || test ! -s conftest.err } && test -s conftest$ac_exeext && { test "$cross_compiling" = yes || test -x conftest$ac_exeext - }; then : + } +then : ac_retval=0 -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=1 @@ -1432,13 +1465,13 @@ fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval -} # ac_fn_c_try_link +} # ac_fn_cxx_try_link -# ac_fn_c_try_run LINENO -# ---------------------- -# Try to link conftest.$ac_ext, and return whether this succeeded. Assumes -# that executables *can* be run. -ac_fn_c_try_run () +# ac_fn_cxx_try_run LINENO +# ------------------------ +# Try to run conftest.$ac_ext, and return whether this succeeded. Assumes that +# executables *can* be run. +ac_fn_cxx_try_run () { as_lineno=${as_lineno-"$1"} as_lineno_stack=as_lineno_stack=$as_lineno_stack if { { ac_try="$ac_link" @@ -1447,25 +1480,26 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } && { ac_try='./conftest$ac_exeext' { { case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; }; } +then : ac_retval=0 -else - $as_echo "$as_me: program exited with status $ac_status" >&5 - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: program exited with status $ac_status" >&5 + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 ac_retval=$ac_status @@ -1474,15 +1508,35 @@ fi eval $as_lineno_stack; ${as_lineno_stack:+:} unset as_lineno as_fn_set_status $ac_retval -} # ac_fn_c_try_run +} # ac_fn_cxx_try_run +ac_configure_args_raw= +for ac_arg +do + case $ac_arg in + *\'*) + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + esac + as_fn_append ac_configure_args_raw " '$ac_arg'" +done + +case $ac_configure_args_raw in + *$as_nl*) + ac_safe_unquote= ;; + *) + ac_unsafe_z='|&;<>()$`\\"*?[ '' ' # This string ends in space, tab. + ac_unsafe_a="$ac_unsafe_z#~" + ac_safe_unquote="s/ '\\([^$ac_unsafe_a][^$ac_unsafe_z]*\\)'/ \\1/g" + ac_configure_args_raw=` printf "%s\n" "$ac_configure_args_raw" | sed "$ac_safe_unquote"`;; +esac + cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. It was created by xgboost $as_me 1.7.3, which was -generated by GNU Autoconf 2.69. Invocation command line was +generated by GNU Autoconf 2.71. Invocation command line was - $ $0 $@ + $ $0$ac_configure_args_raw _ACEOF exec 5>>config.log @@ -1515,8 +1569,12 @@ as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - $as_echo "PATH: $as_dir" + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + printf "%s\n" "PATH: $as_dir" done IFS=$as_save_IFS @@ -1551,7 +1609,7 @@ do | -silent | --silent | --silen | --sile | --sil) continue ;; *\'*) - ac_arg=`$as_echo "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; + ac_arg=`printf "%s\n" "$ac_arg" | sed "s/'/'\\\\\\\\''/g"` ;; esac case $ac_pass in 1) as_fn_append ac_configure_args0 " '$ac_arg'" ;; @@ -1586,11 +1644,13 @@ done # WARNING: Use '\'' to represent an apostrophe within the trap. # WARNING: Do not start the trap code with a newline, due to a FreeBSD 4.0 bug. trap 'exit_status=$? + # Sanitize IFS. + IFS=" "" $as_nl" # Save into config.log some information that might help in debugging. { echo - $as_echo "## ---------------- ## + printf "%s\n" "## ---------------- ## ## Cache variables. ## ## ---------------- ##" echo @@ -1601,8 +1661,8 @@ trap 'exit_status=$? case $ac_val in #( *${as_nl}*) case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( @@ -1626,7 +1686,7 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; ) echo - $as_echo "## ----------------- ## + printf "%s\n" "## ----------------- ## ## Output variables. ## ## ----------------- ##" echo @@ -1634,14 +1694,14 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; do eval ac_val=\$$ac_var case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac - $as_echo "$ac_var='\''$ac_val'\''" + printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo if test -n "$ac_subst_files"; then - $as_echo "## ------------------- ## + printf "%s\n" "## ------------------- ## ## File substitutions. ## ## ------------------- ##" echo @@ -1649,15 +1709,15 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; do eval ac_val=\$$ac_var case $ac_val in - *\'\''*) ac_val=`$as_echo "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; + *\'\''*) ac_val=`printf "%s\n" "$ac_val" | sed "s/'\''/'\''\\\\\\\\'\'''\''/g"`;; esac - $as_echo "$ac_var='\''$ac_val'\''" + printf "%s\n" "$ac_var='\''$ac_val'\''" done | sort echo fi if test -s confdefs.h; then - $as_echo "## ----------- ## + printf "%s\n" "## ----------- ## ## confdefs.h. ## ## ----------- ##" echo @@ -1665,8 +1725,8 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; echo fi test "$ac_signal" != 0 && - $as_echo "$as_me: caught signal $ac_signal" - $as_echo "$as_me: exit $exit_status" + printf "%s\n" "$as_me: caught signal $ac_signal" + printf "%s\n" "$as_me: exit $exit_status" } >&5 rm -f core *.core core.conftest.* && rm -f -r conftest* confdefs* conf$$* $ac_clean_files && @@ -1680,63 +1740,48 @@ ac_signal=0 # confdefs.h avoids OS command line length limits that DEFS can exceed. rm -f -r conftest* confdefs.h -$as_echo "/* confdefs.h */" > confdefs.h +printf "%s\n" "/* confdefs.h */" > confdefs.h # Predefined preprocessor variables. -cat >>confdefs.h <<_ACEOF -#define PACKAGE_NAME "$PACKAGE_NAME" -_ACEOF +printf "%s\n" "#define PACKAGE_NAME \"$PACKAGE_NAME\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_TARNAME "$PACKAGE_TARNAME" -_ACEOF +printf "%s\n" "#define PACKAGE_TARNAME \"$PACKAGE_TARNAME\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_VERSION "$PACKAGE_VERSION" -_ACEOF +printf "%s\n" "#define PACKAGE_VERSION \"$PACKAGE_VERSION\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_STRING "$PACKAGE_STRING" -_ACEOF +printf "%s\n" "#define PACKAGE_STRING \"$PACKAGE_STRING\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_BUGREPORT "$PACKAGE_BUGREPORT" -_ACEOF +printf "%s\n" "#define PACKAGE_BUGREPORT \"$PACKAGE_BUGREPORT\"" >>confdefs.h -cat >>confdefs.h <<_ACEOF -#define PACKAGE_URL "$PACKAGE_URL" -_ACEOF +printf "%s\n" "#define PACKAGE_URL \"$PACKAGE_URL\"" >>confdefs.h # Let the site file select an alternate cache file if it wants to. # Prefer an explicitly selected file to automatically selected ones. -ac_site_file1=NONE -ac_site_file2=NONE if test -n "$CONFIG_SITE"; then - # We do not want a PATH search for config.site. - case $CONFIG_SITE in #(( - -*) ac_site_file1=./$CONFIG_SITE;; - */*) ac_site_file1=$CONFIG_SITE;; - *) ac_site_file1=./$CONFIG_SITE;; - esac + ac_site_files="$CONFIG_SITE" elif test "x$prefix" != xNONE; then - ac_site_file1=$prefix/share/config.site - ac_site_file2=$prefix/etc/config.site + ac_site_files="$prefix/share/config.site $prefix/etc/config.site" else - ac_site_file1=$ac_default_prefix/share/config.site - ac_site_file2=$ac_default_prefix/etc/config.site + ac_site_files="$ac_default_prefix/share/config.site $ac_default_prefix/etc/config.site" fi -for ac_site_file in "$ac_site_file1" "$ac_site_file2" + +for ac_site_file in $ac_site_files do - test "x$ac_site_file" = xNONE && continue - if test /dev/null != "$ac_site_file" && test -r "$ac_site_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 -$as_echo "$as_me: loading site script $ac_site_file" >&6;} + case $ac_site_file in #( + */*) : + ;; #( + *) : + ac_site_file=./$ac_site_file ;; +esac + if test -f "$ac_site_file" && test -r "$ac_site_file"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading site script $ac_site_file" >&5 +printf "%s\n" "$as_me: loading site script $ac_site_file" >&6;} sed 's/^/| /' "$ac_site_file" >&5 . "$ac_site_file" \ - || { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} + || { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "failed to load site script $ac_site_file See \`config.log' for more details" "$LINENO" 5; } fi @@ -1746,19 +1791,235 @@ if test -r "$cache_file"; then # Some versions of bash will fail to source /dev/null (special files # actually), so we avoid doing that. DJGPP emulates it as a regular file. if test /dev/null != "$cache_file" && test -f "$cache_file"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 -$as_echo "$as_me: loading cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: loading cache $cache_file" >&5 +printf "%s\n" "$as_me: loading cache $cache_file" >&6;} case $cache_file in [\\/]* | ?:[\\/]* ) . "$cache_file";; *) . "./$cache_file";; esac fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 -$as_echo "$as_me: creating cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating cache $cache_file" >&5 +printf "%s\n" "$as_me: creating cache $cache_file" >&6;} >$cache_file fi +# Test code for whether the C++ compiler supports C++98 (global declarations) +ac_cxx_conftest_cxx98_globals=' +// Does the compiler advertise C++98 conformance? +#if !defined __cplusplus || __cplusplus < 199711L +# error "Compiler does not advertise C++98 conformance" +#endif + +// These inclusions are to reject old compilers that +// lack the unsuffixed header files. +#include +#include + +// and are *not* freestanding headers in C++98. +extern void assert (int); +namespace std { + extern int strcmp (const char *, const char *); +} + +// Namespaces, exceptions, and templates were all added after "C++ 2.0". +using std::exception; +using std::strcmp; + +namespace { + +void test_exception_syntax() +{ + try { + throw "test"; + } catch (const char *s) { + // Extra parentheses suppress a warning when building autoconf itself, + // due to lint rules shared with more typical C programs. + assert (!(strcmp) (s, "test")); + } +} + +template struct test_template +{ + T const val; + explicit test_template(T t) : val(t) {} + template T add(U u) { return static_cast(u) + val; } +}; + +} // anonymous namespace +' + +# Test code for whether the C++ compiler supports C++98 (body of main) +ac_cxx_conftest_cxx98_main=' + assert (argc); + assert (! argv[0]); +{ + test_exception_syntax (); + test_template tt (2.0); + assert (tt.add (4) == 6.0); + assert (true && !false); +} +' + +# Test code for whether the C++ compiler supports C++11 (global declarations) +ac_cxx_conftest_cxx11_globals=' +// Does the compiler advertise C++ 2011 conformance? +#if !defined __cplusplus || __cplusplus < 201103L +# error "Compiler does not advertise C++11 conformance" +#endif + +namespace cxx11test +{ + constexpr int get_val() { return 20; } + + struct testinit + { + int i; + double d; + }; + + class delegate + { + public: + delegate(int n) : n(n) {} + delegate(): delegate(2354) {} + + virtual int getval() { return this->n; }; + protected: + int n; + }; + + class overridden : public delegate + { + public: + overridden(int n): delegate(n) {} + virtual int getval() override final { return this->n * 2; } + }; + + class nocopy + { + public: + nocopy(int i): i(i) {} + nocopy() = default; + nocopy(const nocopy&) = delete; + nocopy & operator=(const nocopy&) = delete; + private: + int i; + }; + + // for testing lambda expressions + template Ret eval(Fn f, Ret v) + { + return f(v); + } + + // for testing variadic templates and trailing return types + template auto sum(V first) -> V + { + return first; + } + template auto sum(V first, Args... rest) -> V + { + return first + sum(rest...); + } +} +' + +# Test code for whether the C++ compiler supports C++11 (body of main) +ac_cxx_conftest_cxx11_main=' +{ + // Test auto and decltype + auto a1 = 6538; + auto a2 = 48573953.4; + auto a3 = "String literal"; + + int total = 0; + for (auto i = a3; *i; ++i) { total += *i; } + + decltype(a2) a4 = 34895.034; +} +{ + // Test constexpr + short sa[cxx11test::get_val()] = { 0 }; +} +{ + // Test initializer lists + cxx11test::testinit il = { 4323, 435234.23544 }; +} +{ + // Test range-based for + int array[] = {9, 7, 13, 15, 4, 18, 12, 10, 5, 3, + 14, 19, 17, 8, 6, 20, 16, 2, 11, 1}; + for (auto &x : array) { x += 23; } +} +{ + // Test lambda expressions + using cxx11test::eval; + assert (eval ([](int x) { return x*2; }, 21) == 42); + double d = 2.0; + assert (eval ([&](double x) { return d += x; }, 3.0) == 5.0); + assert (d == 5.0); + assert (eval ([=](double x) mutable { return d += x; }, 4.0) == 9.0); + assert (d == 5.0); +} +{ + // Test use of variadic templates + using cxx11test::sum; + auto a = sum(1); + auto b = sum(1, 2); + auto c = sum(1.0, 2.0, 3.0); +} +{ + // Test constructor delegation + cxx11test::delegate d1; + cxx11test::delegate d2(); + cxx11test::delegate d3(45); +} +{ + // Test override and final + cxx11test::overridden o1(55464); +} +{ + // Test nullptr + char *c = nullptr; +} +{ + // Test template brackets + test_template<::test_template> v(test_template(12)); +} +{ + // Unicode literals + char const *utf8 = u8"UTF-8 string \u2500"; + char16_t const *utf16 = u"UTF-8 string \u2500"; + char32_t const *utf32 = U"UTF-32 string \u2500"; +} +' + +# Test code for whether the C compiler supports C++11 (complete). +ac_cxx_conftest_cxx11_program="${ac_cxx_conftest_cxx98_globals} +${ac_cxx_conftest_cxx11_globals} + +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_cxx_conftest_cxx98_main} + ${ac_cxx_conftest_cxx11_main} + return ok; +} +" + +# Test code for whether the C compiler supports C++98 (complete). +ac_cxx_conftest_cxx98_program="${ac_cxx_conftest_cxx98_globals} +int +main (int argc, char **argv) +{ + int ok = 0; + ${ac_cxx_conftest_cxx98_main} + return ok; +} +" + # Check that the precious variables saved in the cache have kept the same # value. ac_cache_corrupted=false @@ -1769,12 +2030,12 @@ for ac_var in $ac_precious_vars; do eval ac_new_val=\$ac_env_${ac_var}_value case $ac_old_set,$ac_new_set in set,) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was set to \`$ac_old_val' in the previous run" >&2;} ac_cache_corrupted=: ;; ,set) - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 -$as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' was not set in the previous run" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_cache_corrupted=: ;; ,);; *) @@ -1783,24 +2044,24 @@ $as_echo "$as_me: error: \`$ac_var' was not set in the previous run" >&2;} ac_old_val_w=`echo x $ac_old_val` ac_new_val_w=`echo x $ac_new_val` if test "$ac_old_val_w" != "$ac_new_val_w"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 -$as_echo "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: \`$ac_var' has changed since the previous run:" >&5 +printf "%s\n" "$as_me: error: \`$ac_var' has changed since the previous run:" >&2;} ac_cache_corrupted=: else - { $as_echo "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 -$as_echo "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&5 +printf "%s\n" "$as_me: warning: ignoring whitespace changes in \`$ac_var' since the previous run:" >&2;} eval $ac_var=\$ac_old_val fi - { $as_echo "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 -$as_echo "$as_me: former value: \`$ac_old_val'" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 -$as_echo "$as_me: current value: \`$ac_new_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: former value: \`$ac_old_val'" >&5 +printf "%s\n" "$as_me: former value: \`$ac_old_val'" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: current value: \`$ac_new_val'" >&5 +printf "%s\n" "$as_me: current value: \`$ac_new_val'" >&2;} fi;; esac # Pass precious variables to config.status. if test "$ac_new_set" = set; then case $ac_new_val in - *\'*) ac_arg=$ac_var=`$as_echo "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_arg=$ac_var=`printf "%s\n" "$ac_new_val" | sed "s/'/'\\\\\\\\''/g"` ;; *) ac_arg=$ac_var=$ac_new_val ;; esac case " $ac_configure_args " in @@ -1810,11 +2071,12 @@ $as_echo "$as_me: current value: \`$ac_new_val'" >&2;} fi done if $ac_cache_corrupted; then - { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} - { $as_echo "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 -$as_echo "$as_me: error: changes in the environment can compromise the build" >&2;} - as_fn_error $? "run \`make distclean' and/or \`rm $cache_file' and start over" "$LINENO" 5 + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: changes in the environment can compromise the build" >&5 +printf "%s\n" "$as_me: error: changes in the environment can compromise the build" >&2;} + as_fn_error $? "run \`${MAKE-make} distclean' and/or \`rm $cache_file' + and start over" "$LINENO" 5 fi ## -------------------- ## ## Main body of script. ## @@ -1828,224 +2090,76 @@ ac_compiler_gnu=$ac_cv_c_compiler_gnu -# Use this line to set CC variable to a C compiler -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu -if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}gcc", so it can be a program name with args. -set dummy ${ac_tool_prefix}gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS - -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } +: ${R_HOME=`R RHOME`} +if test -z "${R_HOME}"; then + echo "could not determine R_HOME" + exit 1 fi +CXX14=`"${R_HOME}/bin/R" CMD config CXX14` +CXX14STD=`"${R_HOME}/bin/R" CMD config CXX14STD` +CXX="${CXX14} ${CXX14STD}" +CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` -fi -if test -z "$ac_cv_prog_CC"; then - ac_ct_CC=$CC - # Extract the first word of "gcc", so it can be a program name with args. -set dummy gcc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="gcc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS +CC=`"${R_HOME}/bin/R" CMD config CC` +CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` +CPPFLAGS=`"${R_HOME}/bin/R" CMD config CPPFLAGS` -fi -fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi +LDFLAGS=`"${R_HOME}/bin/R" CMD config LDFLAGS` +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - if test "x$ac_ct_CC" = x; then - CC="" - else - case $cross_compiling:$ac_tool_warned in -yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} -ac_tool_warned=yes ;; -esac - CC=$ac_ct_CC - fi -else - CC="$ac_cv_prog_CC" -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - # Extract the first word of "${ac_tool_prefix}cc", so it can be a program name with args. -set dummy ${ac_tool_prefix}cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="${ac_tool_prefix}cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS +### Check whether backtrace() is part of libc or the external lib libexecinfo +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking Backtrace lib" >&5 +printf %s "checking Backtrace lib... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi - fi -fi -if test -z "$CC"; then - # Extract the first word of "cc", so it can be a program name with args. -set dummy cc; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. -else - ac_prog_rejected=no -as_save_IFS=$IFS; IFS=$PATH_SEPARATOR -for as_dir in $PATH -do - IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - if test "$as_dir/$ac_word$ac_exec_ext" = "/usr/ucb/cc"; then - ac_prog_rejected=yes - continue - fi - ac_cv_prog_CC="cc" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 - break 2 - fi -done - done -IFS=$as_save_IFS -if test $ac_prog_rejected = yes; then - # We found a bogon in the path, so make sure we never use it. - set dummy $ac_cv_prog_CC - shift - if test $# != 0; then - # We chose a different compiler from the bogus one. - # However, it has the same basename, so the bogon will be chosen - # first if we set CC to just the basename; use the full file name. - shift - ac_cv_prog_CC="$as_dir/$ac_word${1+' '}$@" - fi -fi -fi -fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -fi -fi -if test -z "$CC"; then - if test -n "$ac_tool_prefix"; then - for ac_prog in cl.exe +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu +if test -z "$CXX"; then + if test -n "$CCC"; then + CXX=$CCC + else + if test -n "$ac_tool_prefix"; then + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++ do # Extract the first word of "$ac_tool_prefix$ac_prog", so it can be a program name with args. set dummy $ac_tool_prefix$ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$CC"; then - ac_cv_prog_CC="$CC" # Let the user override the test. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$CXX"; then + ac_cv_prog_CXX="$CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_CC="$ac_tool_prefix$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_CXX="$ac_tool_prefix$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2054,42 +2168,47 @@ IFS=$as_save_IFS fi fi -CC=$ac_cv_prog_CC -if test -n "$CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $CC" >&5 -$as_echo "$CC" >&6; } +CXX=$ac_cv_prog_CXX +if test -n "$CXX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $CXX" >&5 +printf "%s\n" "$CXX" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi - test -n "$CC" && break + test -n "$CXX" && break done fi -if test -z "$CC"; then - ac_ct_CC=$CC - for ac_prog in cl.exe +if test -z "$CXX"; then + ac_ct_CXX=$CXX + for ac_prog in g++ c++ gpp aCC CC cxx cc++ cl.exe FCC KCC RCC xlC_r xlC clang++ do # Extract the first word of "$ac_prog", so it can be a program name with args. set dummy $ac_prog; ac_word=$2 -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 -$as_echo_n "checking for $ac_word... " >&6; } -if ${ac_cv_prog_ac_ct_CC+:} false; then : - $as_echo_n "(cached) " >&6 -else - if test -n "$ac_ct_CC"; then - ac_cv_prog_ac_ct_CC="$ac_ct_CC" # Let the user override the test. +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $ac_word" >&5 +printf %s "checking for $ac_word... " >&6; } +if test ${ac_cv_prog_ac_ct_CXX+y} +then : + printf %s "(cached) " >&6 +else $as_nop + if test -n "$ac_ct_CXX"; then + ac_cv_prog_ac_ct_CXX="$ac_ct_CXX" # Let the user override the test. else as_save_IFS=$IFS; IFS=$PATH_SEPARATOR for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac for ac_exec_ext in '' $ac_executable_extensions; do - if as_fn_executable_p "$as_dir/$ac_word$ac_exec_ext"; then - ac_cv_prog_ac_ct_CC="$ac_prog" - $as_echo "$as_me:${as_lineno-$LINENO}: found $as_dir/$ac_word$ac_exec_ext" >&5 + if as_fn_executable_p "$as_dir$ac_word$ac_exec_ext"; then + ac_cv_prog_ac_ct_CXX="$ac_prog" + printf "%s\n" "$as_me:${as_lineno-$LINENO}: found $as_dir$ac_word$ac_exec_ext" >&5 break 2 fi done @@ -2098,42 +2217,36 @@ IFS=$as_save_IFS fi fi -ac_ct_CC=$ac_cv_prog_ac_ct_CC -if test -n "$ac_ct_CC"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CC" >&5 -$as_echo "$ac_ct_CC" >&6; } +ac_ct_CXX=$ac_cv_prog_ac_ct_CXX +if test -n "$ac_ct_CXX"; then + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_ct_CXX" >&5 +printf "%s\n" "$ac_ct_CXX" >&6; } else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } fi - test -n "$ac_ct_CC" && break + test -n "$ac_ct_CXX" && break done - if test "x$ac_ct_CC" = x; then - CC="" + if test "x$ac_ct_CXX" = x; then + CXX="g++" else case $cross_compiling:$ac_tool_warned in yes:) -{ $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 -$as_echo "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: using cross tools not prefixed with host triplet" >&5 +printf "%s\n" "$as_me: WARNING: using cross tools not prefixed with host triplet" >&2;} ac_tool_warned=yes ;; esac - CC=$ac_ct_CC + CXX=$ac_ct_CXX fi fi + fi fi - - -test -z "$CC" && { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "no acceptable C compiler found in \$PATH -See \`config.log' for more details" "$LINENO" 5; } - # Provide some information about the compiler. -$as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler version" >&5 +printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C++ compiler version" >&5 set X $ac_compile ac_compiler=$2 for ac_option in --version -v -V -qversion; do @@ -2143,7 +2256,7 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compiler $ac_option >&5") 2>conftest.err ac_status=$? if test -s conftest.err; then @@ -2153,7 +2266,7 @@ $as_echo "$ac_try_echo"; } >&5 cat conftest.er1 >&5 fi rm -f conftest.er1 conftest.err - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } done @@ -2161,7 +2274,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -2173,9 +2286,9 @@ ac_clean_files="$ac_clean_files a.out a.out.dSYM a.exe b.out" # Try to create an executable without -o first, disregard a.out. # It will help us diagnose broken compilers, and finding out an intuition # of exeext. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether the C compiler works" >&5 -$as_echo_n "checking whether the C compiler works... " >&6; } -ac_link_default=`$as_echo "$ac_link" | sed 's/ -o *conftest[^ ]*//'` +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the C++ compiler works" >&5 +printf %s "checking whether the C++ compiler works... " >&6; } +ac_link_default=`printf "%s\n" "$ac_link" | sed 's/ -o *conftest[^ ]*//'` # The possible output files: ac_files="a.out conftest.exe conftest a.exe a_out.exe b.out conftest.*" @@ -2196,11 +2309,12 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link_default") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : # Autoconf-2.13 could set the ac_cv_exeext variable to `no'. # So ignore a value of `no', otherwise this would lead to `EXEEXT = no' # in a Makefile. We should not override ac_cv_exeext if it was cached, @@ -2217,7 +2331,7 @@ do # certainly right. break;; *.* ) - if test "${ac_cv_exeext+set}" = set && test "$ac_cv_exeext" != no; + if test ${ac_cv_exeext+y} && test "$ac_cv_exeext" != no; then :; else ac_cv_exeext=`expr "$ac_file" : '[^.]*\(\..*\)'` fi @@ -2233,44 +2347,46 @@ do done test "$ac_cv_exeext" = no && ac_cv_exeext= -else +else $as_nop ac_file='' fi -if test -z "$ac_file"; then : - { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 -$as_echo "no" >&6; } -$as_echo "$as_me: failed program was:" >&5 +if test -z "$ac_file" +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: no" >&5 +printf "%s\n" "no" >&6; } +printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error 77 "C compiler cannot create executables +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "C++ compiler cannot create executables See \`config.log' for more details" "$LINENO" 5; } -else - { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 -$as_echo "yes" >&6; } -fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for C compiler default output file name" >&5 -$as_echo_n "checking for C compiler default output file name... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 -$as_echo "$ac_file" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +printf "%s\n" "yes" >&6; } +fi +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for C++ compiler default output file name" >&5 +printf %s "checking for C++ compiler default output file name... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_file" >&5 +printf "%s\n" "$ac_file" >&6; } ac_exeext=$ac_cv_exeext rm -f -r a.out a.out.dSYM a.exe conftest$ac_cv_exeext b.out ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 -$as_echo_n "checking for suffix of executables... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of executables" >&5 +printf %s "checking for suffix of executables... " >&6; } if { { ac_try="$ac_link" case "(($ac_try" in *\"* | *\`* | *\\*) ac_try_echo=\$ac_try;; *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : # If both `conftest.exe' and `conftest' are `present' (well, observable) # catch `conftest.exe'. For instance with Cygwin, `ls conftest' will # work properly (i.e., refer to `conftest.exe'), while it won't with @@ -2284,15 +2400,15 @@ for ac_file in conftest.exe conftest conftest.*; do * ) break;; esac done -else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +else $as_nop + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of executables: cannot compile and link See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest conftest$ac_cv_exeext -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 -$as_echo "$ac_cv_exeext" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_exeext" >&5 +printf "%s\n" "$ac_cv_exeext" >&6; } rm -f conftest.$ac_ext EXEEXT=$ac_cv_exeext @@ -2301,7 +2417,7 @@ cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { FILE *f = fopen ("conftest.out", "w"); return ferror (f) || fclose (f) != 0; @@ -2313,8 +2429,8 @@ _ACEOF ac_clean_files="$ac_clean_files conftest.out" # Check that the compiler produces executables we can run. If not, either # the compiler is broken, or we cross compile. -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 -$as_echo_n "checking whether we are cross compiling... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether we are cross compiling" >&5 +printf %s "checking whether we are cross compiling... " >&6; } if test "$cross_compiling" != yes; then { { ac_try="$ac_link" case "(($ac_try" in @@ -2322,10 +2438,10 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_link") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; } if { ac_try='./conftest$ac_cv_exeext' { { case "(($ac_try" in @@ -2333,39 +2449,40 @@ $as_echo "$ac_try_echo"; } >&5 *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_try") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 test $ac_status = 0; }; }; then cross_compiling=no else if test "$cross_compiling" = maybe; then cross_compiling=yes else - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} -as_fn_error $? "cannot run C compiled programs. + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} +as_fn_error 77 "cannot run C++ compiled programs. If you meant to cross compile, use \`--host'. See \`config.log' for more details" "$LINENO" 5; } fi fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 -$as_echo "$cross_compiling" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $cross_compiling" >&5 +printf "%s\n" "$cross_compiling" >&6; } rm -f conftest.$ac_ext conftest$ac_cv_exeext conftest.out ac_clean_files=$ac_clean_files_save -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 -$as_echo_n "checking for suffix of object files... " >&6; } -if ${ac_cv_objext+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for suffix of object files" >&5 +printf %s "checking for suffix of object files... " >&6; } +if test ${ac_cv_objext+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; @@ -2379,11 +2496,12 @@ case "(($ac_try" in *) ac_try_echo=$ac_try;; esac eval ac_try_echo="\"\$as_me:${as_lineno-$LINENO}: $ac_try_echo\"" -$as_echo "$ac_try_echo"; } >&5 +printf "%s\n" "$ac_try_echo"; } >&5 (eval "$ac_compile") 2>&5 ac_status=$? - $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 - test $ac_status = 0; }; then : + printf "%s\n" "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 + test $ac_status = 0; } +then : for ac_file in conftest.o conftest.obj conftest.*; do test -f "$ac_file" || continue; case $ac_file in @@ -2392,31 +2510,32 @@ $as_echo "$ac_try_echo"; } >&5 break;; esac done -else - $as_echo "$as_me: failed program was:" >&5 +else $as_nop + printf "%s\n" "$as_me: failed program was:" >&5 sed 's/^/| /' conftest.$ac_ext >&5 -{ { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot compute suffix of object files: cannot compile See \`config.log' for more details" "$LINENO" 5; } fi rm -f conftest.$ac_cv_objext conftest.$ac_ext fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 -$as_echo "$ac_cv_objext" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_objext" >&5 +printf "%s\n" "$ac_cv_objext" >&6; } OBJEXT=$ac_cv_objext ac_objext=$OBJEXT -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether we are using the GNU C compiler" >&5 -$as_echo_n "checking whether we are using the GNU C compiler... " >&6; } -if ${ac_cv_c_compiler_gnu+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether the compiler supports GNU C++" >&5 +printf %s "checking whether the compiler supports GNU C++... " >&6; } +if test ${ac_cv_cxx_compiler_gnu+y} +then : + printf %s "(cached) " >&6 +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { #ifndef __GNUC__ choke me @@ -2426,273 +2545,279 @@ main () return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_cxx_try_compile "$LINENO" +then : ac_compiler_gnu=yes -else +else $as_nop ac_compiler_gnu=no fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext -ac_cv_c_compiler_gnu=$ac_compiler_gnu +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext +ac_cv_cxx_compiler_gnu=$ac_compiler_gnu fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_c_compiler_gnu" >&5 -$as_echo "$ac_cv_c_compiler_gnu" >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_cxx_compiler_gnu" >&5 +printf "%s\n" "$ac_cv_cxx_compiler_gnu" >&6; } +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + if test $ac_compiler_gnu = yes; then - GCC=yes -else - GCC= -fi -ac_test_CFLAGS=${CFLAGS+set} -ac_save_CFLAGS=$CFLAGS -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking whether $CC accepts -g" >&5 -$as_echo_n "checking whether $CC accepts -g... " >&6; } -if ${ac_cv_prog_cc_g+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_save_c_werror_flag=$ac_c_werror_flag - ac_c_werror_flag=yes - ac_cv_prog_cc_g=no - CFLAGS="-g" + GXX=yes +else + GXX= +fi +ac_test_CXXFLAGS=${CXXFLAGS+y} +ac_save_CXXFLAGS=$CXXFLAGS +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether $CXX accepts -g" >&5 +printf %s "checking whether $CXX accepts -g... " >&6; } +if test ${ac_cv_prog_cxx_g+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_save_cxx_werror_flag=$ac_cxx_werror_flag + ac_cxx_werror_flag=yes + ac_cv_prog_cxx_g=no + CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_g=yes -else - CFLAGS="" +if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_g=yes +else $as_nop + CXXFLAGS="" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : +if ac_fn_cxx_try_compile "$LINENO" +then : -else - ac_c_werror_flag=$ac_save_c_werror_flag - CFLAGS="-g" +else $as_nop + ac_cxx_werror_flag=$ac_save_cxx_werror_flag + CXXFLAGS="-g" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ int -main () +main (void) { ; return 0; } _ACEOF -if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_g=yes +if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_g=yes fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext fi -rm -f core conftest.err conftest.$ac_objext conftest.$ac_ext - ac_c_werror_flag=$ac_save_c_werror_flag +rm -f core conftest.err conftest.$ac_objext conftest.beam conftest.$ac_ext + ac_cxx_werror_flag=$ac_save_cxx_werror_flag fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_g" >&5 -$as_echo "$ac_cv_prog_cc_g" >&6; } -if test "$ac_test_CFLAGS" = set; then - CFLAGS=$ac_save_CFLAGS -elif test $ac_cv_prog_cc_g = yes; then - if test "$GCC" = yes; then - CFLAGS="-g -O2" +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_g" >&5 +printf "%s\n" "$ac_cv_prog_cxx_g" >&6; } +if test $ac_test_CXXFLAGS; then + CXXFLAGS=$ac_save_CXXFLAGS +elif test $ac_cv_prog_cxx_g = yes; then + if test "$GXX" = yes; then + CXXFLAGS="-g -O2" else - CFLAGS="-g" + CXXFLAGS="-g" fi else - if test "$GCC" = yes; then - CFLAGS="-O2" + if test "$GXX" = yes; then + CXXFLAGS="-O2" else - CFLAGS= + CXXFLAGS= fi fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for $CC option to accept ISO C89" >&5 -$as_echo_n "checking for $CC option to accept ISO C89... " >&6; } -if ${ac_cv_prog_cc_c89+:} false; then : - $as_echo_n "(cached) " >&6 -else - ac_cv_prog_cc_c89=no -ac_save_CC=$CC +ac_prog_cxx_stdcxx=no +if test x$ac_prog_cxx_stdcxx = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++11 features" >&5 +printf %s "checking for $CXX option to enable C++11 features... " >&6; } +if test ${ac_cv_prog_cxx_11+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cxx_11=no +ac_save_CXX=$CXX cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -#include -#include -struct stat; -/* Most of the following tests are stolen from RCS 5.7's src/conf.sh. */ -struct buf { int x; }; -FILE * (*rcsopen) (struct buf *, struct stat *, int); -static char *e (p, i) - char **p; - int i; -{ - return p[i]; -} -static char *f (char * (*g) (char **, int), char **p, ...) -{ - char *s; - va_list v; - va_start (v,p); - s = g (p, va_arg (v,int)); - va_end (v); - return s; -} - -/* OSF 4.0 Compaq cc is some sort of almost-ANSI by default. It has - function prototypes and stuff, but not '\xHH' hex character constants. - These don't provoke an error unfortunately, instead are silently treated - as 'x'. The following induces an error, until -std is added to get - proper ANSI mode. Curiously '\x00'!='x' always comes out true, for an - array size at least. It's necessary to write '\x00'==0 to get something - that's true only with -std. */ -int osf4_cc_array ['\x00' == 0 ? 1 : -1]; - -/* IBM C 6 for AIX is almost-ANSI by default, but it replaces macro parameters - inside strings and character constants. */ -#define FOO(x) 'x' -int xlc6_cc_array[FOO(a) == 'x' ? 1 : -1]; - -int test (int i, double x); -struct s1 {int (*f) (int a);}; -struct s2 {int (*f) (double a);}; -int pairnames (int, char **, FILE *(*)(struct buf *, struct stat *, int), int, int); -int argc; -char **argv; -int -main () -{ -return f (e, argv, 0) != argv[0] || f (e, argv, 1) != argv[1]; - ; - return 0; -} +$ac_cxx_conftest_cxx11_program _ACEOF -for ac_arg in '' -qlanglvl=extc89 -qlanglvl=ansi -std \ - -Ae "-Aa -D_HPUX_SOURCE" "-Xc -D__EXTENSIONS__" +for ac_arg in '' -std=gnu++11 -std=gnu++0x -std=c++11 -std=c++0x -qlanglvl=extended0x -AA do - CC="$ac_save_CC $ac_arg" - if ac_fn_c_try_compile "$LINENO"; then : - ac_cv_prog_cc_c89=$ac_arg + CXX="$ac_save_CXX $ac_arg" + if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_cxx11=$ac_arg fi -rm -f core conftest.err conftest.$ac_objext - test "x$ac_cv_prog_cc_c89" != "xno" && break +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cxx_cxx11" != "xno" && break done rm -f conftest.$ac_ext -CC=$ac_save_CC - +CXX=$ac_save_CXX +fi + +if test "x$ac_cv_prog_cxx_cxx11" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cxx_cxx11" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx11" >&5 +printf "%s\n" "$ac_cv_prog_cxx_cxx11" >&6; } + CXX="$CXX $ac_cv_prog_cxx_cxx11" +fi + ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx11 + ac_prog_cxx_stdcxx=cxx11 +fi +fi +if test x$ac_prog_cxx_stdcxx = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for $CXX option to enable C++98 features" >&5 +printf %s "checking for $CXX option to enable C++98 features... " >&6; } +if test ${ac_cv_prog_cxx_98+y} +then : + printf %s "(cached) " >&6 +else $as_nop + ac_cv_prog_cxx_98=no +ac_save_CXX=$CXX +cat confdefs.h - <<_ACEOF >conftest.$ac_ext +/* end confdefs.h. */ +$ac_cxx_conftest_cxx98_program +_ACEOF +for ac_arg in '' -std=gnu++98 -std=c++98 -qlanglvl=extended -AA +do + CXX="$ac_save_CXX $ac_arg" + if ac_fn_cxx_try_compile "$LINENO" +then : + ac_cv_prog_cxx_cxx98=$ac_arg +fi +rm -f core conftest.err conftest.$ac_objext conftest.beam + test "x$ac_cv_prog_cxx_cxx98" != "xno" && break +done +rm -f conftest.$ac_ext +CXX=$ac_save_CXX fi -# AC_CACHE_VAL -case "x$ac_cv_prog_cc_c89" in - x) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 -$as_echo "none needed" >&6; } ;; - xno) - { $as_echo "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 -$as_echo "unsupported" >&6; } ;; - *) - CC="$CC $ac_cv_prog_cc_c89" - { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cc_c89" >&5 -$as_echo "$ac_cv_prog_cc_c89" >&6; } ;; -esac -if test "x$ac_cv_prog_cc_c89" != xno; then : +if test "x$ac_cv_prog_cxx_cxx98" = xno +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: unsupported" >&5 +printf "%s\n" "unsupported" >&6; } +else $as_nop + if test "x$ac_cv_prog_cxx_cxx98" = x +then : + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: none needed" >&5 +printf "%s\n" "none needed" >&6; } +else $as_nop + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_prog_cxx_cxx98" >&5 +printf "%s\n" "$ac_cv_prog_cxx_cxx98" >&6; } + CXX="$CXX $ac_cv_prog_cxx_cxx98" +fi + ac_cv_prog_cxx_stdcxx=$ac_cv_prog_cxx_cxx98 + ac_prog_cxx_stdcxx=cxx98 +fi fi -ac_ext=c -ac_cpp='$CPP $CPPFLAGS' -ac_compile='$CC -c $CFLAGS $CPPFLAGS conftest.$ac_ext >&5' -ac_link='$CC -o conftest$ac_exeext $CFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' -ac_compiler_gnu=$ac_cv_c_compiler_gnu +ac_ext=cpp +ac_cpp='$CXXCPP $CPPFLAGS' +ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ext $LIBS >&5' +ac_compiler_gnu=$ac_cv_cxx_compiler_gnu -### Check whether backtrace() is part of libc or the external lib libexecinfo -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking Backtrace lib" >&5 -$as_echo_n "checking Backtrace lib... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 -$as_echo "" >&6; } - -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for backtrace in -lexecinfo" >&5 -$as_echo_n "checking for backtrace in -lexecinfo... " >&6; } -if ${ac_cv_lib_execinfo_backtrace+:} false; then : - $as_echo_n "(cached) " >&6 -else +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking for backtrace in -lexecinfo" >&5 +printf %s "checking for backtrace in -lexecinfo... " >&6; } +if test ${ac_cv_lib_execinfo_backtrace+y} +then : + printf %s "(cached) " >&6 +else $as_nop ac_check_lib_save_LIBS=$LIBS LIBS="-lexecinfo $LIBS" cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ -/* Override any GCC internal prototype to avoid an error. - Use char because int might match the return type of a GCC - builtin and then its argument prototype would still apply. */ -#ifdef __cplusplus -extern "C" -#endif -char backtrace (); +namespace conftest { + extern "C" int backtrace (); +} int -main () +main (void) { -return backtrace (); +return conftest::backtrace (); ; return 0; } _ACEOF -if ac_fn_c_try_link "$LINENO"; then : +if ac_fn_cxx_try_link "$LINENO" +then : ac_cv_lib_execinfo_backtrace=yes -else +else $as_nop ac_cv_lib_execinfo_backtrace=no fi -rm -f core conftest.err conftest.$ac_objext \ +rm -f core conftest.err conftest.$ac_objext conftest.beam \ conftest$ac_exeext conftest.$ac_ext LIBS=$ac_check_lib_save_LIBS fi -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_execinfo_backtrace" >&5 -$as_echo "$ac_cv_lib_execinfo_backtrace" >&6; } -if test "x$ac_cv_lib_execinfo_backtrace" = xyes; then : +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: $ac_cv_lib_execinfo_backtrace" >&5 +printf "%s\n" "$ac_cv_lib_execinfo_backtrace" >&6; } +if test "x$ac_cv_lib_execinfo_backtrace" = xyes +then : BACKTRACE_LIB=-lexecinfo -else +else $as_nop BACKTRACE_LIB='' fi ### Endian detection -{ $as_echo "$as_me:${as_lineno-$LINENO}: checking endian" >&5 -$as_echo_n "checking endian... " >&6; } -{ $as_echo "$as_me:${as_lineno-$LINENO}: result: " >&5 -$as_echo "" >&6; } -if test "$cross_compiling" = yes; then : - { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 -$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking endian" >&5 +printf %s "checking endian... " >&6; } +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: " >&5 +printf "%s\n" "" >&6; } +if test "$cross_compiling" = yes +then : + { { printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +printf "%s\n" "$as_me: error: in \`$ac_pwd':" >&2;} as_fn_error $? "cannot run test program while cross compiling See \`config.log' for more details" "$LINENO" 5; } -else +else $as_nop cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { const uint16_t endianness = 256; return !!(*(const uint8_t *)&endianness); ; return 0; } _ACEOF -if ac_fn_c_try_run "$LINENO"; then : +if ac_fn_cxx_try_run "$LINENO" +then : ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=1" -else +else $as_nop ENDIAN_FLAG="-DDMLC_CMAKE_LITTLE_ENDIAN=0" fi rm -f core *.core core.conftest.* gmon.out bb.out conftest$ac_exeext \ @@ -2719,13 +2844,13 @@ then OPENMP_CXXFLAGS="-Xpreprocessor -fopenmp -I${HOMEBREW_LIBOMP_PREFIX}/include" OPENMP_LIB="-lomp -L${HOMEBREW_LIBOMP_PREFIX}/lib" ac_pkg_openmp=no - { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5 -$as_echo_n "checking whether OpenMP will work in a package... " >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: checking whether OpenMP will work in a package" >&5 +printf %s "checking whether OpenMP will work in a package... " >&6; } cat confdefs.h - <<_ACEOF >conftest.$ac_ext /* end confdefs.h. */ #include int -main () +main (void) { return (omp_get_max_threads() <= 1); ; @@ -2733,8 +2858,8 @@ main () } _ACEOF ${CC} -o conftest conftest.c ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes - { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${ac_pkg_openmp}" >&5 -$as_echo "${ac_pkg_openmp}" >&6; } + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_pkg_openmp}" >&5 +printf "%s\n" "${ac_pkg_openmp}" >&6; } if test "${ac_pkg_openmp}" = no; then OPENMP_CXXFLAGS='' OPENMP_LIB='' @@ -2779,8 +2904,8 @@ _ACEOF case $ac_val in #( *${as_nl}*) case $ac_var in #( - *_cv_*) { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 -$as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; + *_cv_*) { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: cache variable $ac_var contains a newline" >&5 +printf "%s\n" "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; esac case $ac_var in #( _ | IFS | as_nl) ;; #( @@ -2810,15 +2935,15 @@ $as_echo "$as_me: WARNING: cache variable $ac_var contains a newline" >&2;} ;; /^ac_cv_env_/b end t clear :clear - s/^\([^=]*\)=\(.*[{}].*\)$/test "${\1+set}" = set || &/ + s/^\([^=]*\)=\(.*[{}].*\)$/test ${\1+y} || &/ t end s/^\([^=]*\)=\(.*\)$/\1=${\1=\2}/ :end' >>confcache if diff "$cache_file" confcache >/dev/null 2>&1; then :; else if test -w "$cache_file"; then if test "x$cache_file" != "x/dev/null"; then - { $as_echo "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 -$as_echo "$as_me: updating cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: updating cache $cache_file" >&5 +printf "%s\n" "$as_me: updating cache $cache_file" >&6;} if test ! -f "$cache_file" || test -h "$cache_file"; then cat confcache >"$cache_file" else @@ -2832,8 +2957,8 @@ $as_echo "$as_me: updating cache $cache_file" >&6;} fi fi else - { $as_echo "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 -$as_echo "$as_me: not updating unwritable cache $cache_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: not updating unwritable cache $cache_file" >&5 +printf "%s\n" "$as_me: not updating unwritable cache $cache_file" >&6;} fi fi rm -f confcache @@ -2886,7 +3011,7 @@ U= for ac_i in : $LIBOBJS; do test "x$ac_i" = x: && continue # 1. Remove the extension, and $U if already installed. ac_script='s/\$U\././;s/\.o$//;s/\.obj$//' - ac_i=`$as_echo "$ac_i" | sed "$ac_script"` + ac_i=`printf "%s\n" "$ac_i" | sed "$ac_script"` # 2. Prepend LIBOBJDIR. When used with automake>=1.10 LIBOBJDIR # will be set to the directory where LIBOBJS objects are built. as_fn_append ac_libobjs " \${LIBOBJDIR}$ac_i\$U.$ac_objext" @@ -2902,8 +3027,8 @@ LTLIBOBJS=$ac_ltlibobjs ac_write_fail=0 ac_clean_files_save=$ac_clean_files ac_clean_files="$ac_clean_files $CONFIG_STATUS" -{ $as_echo "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 -$as_echo "$as_me: creating $CONFIG_STATUS" >&6;} +{ printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $CONFIG_STATUS" >&5 +printf "%s\n" "$as_me: creating $CONFIG_STATUS" >&6;} as_write_fail=0 cat >$CONFIG_STATUS <<_ASEOF || as_write_fail=1 #! $SHELL @@ -2926,14 +3051,16 @@ cat >>$CONFIG_STATUS <<\_ASEOF || as_write_fail=1 # Be more Bourne compatible DUALCASE=1; export DUALCASE # for MKS sh -if test -n "${ZSH_VERSION+set}" && (emulate sh) >/dev/null 2>&1; then : +as_nop=: +if test ${ZSH_VERSION+y} && (emulate sh) >/dev/null 2>&1 +then : emulate sh NULLCMD=: # Pre-4.2 versions of Zsh do word splitting on ${1+"$@"}, which # is contrary to our usage. Disable this feature. alias -g '${1+"$@"}'='"$@"' setopt NO_GLOB_SUBST -else +else $as_nop case `(set -o) 2>/dev/null` in #( *posix*) : set -o posix ;; #( @@ -2943,46 +3070,46 @@ esac fi + +# Reset variables that may have inherited troublesome values from +# the environment. + +# IFS needs to be set, to space, tab, and newline, in precisely that order. +# (If _AS_PATH_WALK were called with IFS unset, it would have the +# side effect of setting IFS to empty, thus disabling word splitting.) +# Quoting is to prevent editors from complaining about space-tab. as_nl=' ' export as_nl -# Printing a long string crashes Solaris 7 /usr/bin/printf. -as_echo='\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\' -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo -as_echo=$as_echo$as_echo$as_echo$as_echo$as_echo$as_echo -# Prefer a ksh shell builtin over an external printf program on Solaris, -# but without wasting forks for bash or zsh. -if test -z "$BASH_VERSION$ZSH_VERSION" \ - && (test "X`print -r -- $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='print -r --' - as_echo_n='print -rn --' -elif (test "X`printf %s $as_echo`" = "X$as_echo") 2>/dev/null; then - as_echo='printf %s\n' - as_echo_n='printf %s' -else - if test "X`(/usr/ucb/echo -n -n $as_echo) 2>/dev/null`" = "X-n $as_echo"; then - as_echo_body='eval /usr/ucb/echo -n "$1$as_nl"' - as_echo_n='/usr/ucb/echo -n' - else - as_echo_body='eval expr "X$1" : "X\\(.*\\)"' - as_echo_n_body='eval - arg=$1; - case $arg in #( - *"$as_nl"*) - expr "X$arg" : "X\\(.*\\)$as_nl"; - arg=`expr "X$arg" : ".*$as_nl\\(.*\\)"`;; - esac; - expr "X$arg" : "X\\(.*\\)" | tr -d "$as_nl" - ' - export as_echo_n_body - as_echo_n='sh -c $as_echo_n_body as_echo' - fi - export as_echo_body - as_echo='sh -c $as_echo_body as_echo' -fi +IFS=" "" $as_nl" + +PS1='$ ' +PS2='> ' +PS4='+ ' + +# Ensure predictable behavior from utilities with locale-dependent output. +LC_ALL=C +export LC_ALL +LANGUAGE=C +export LANGUAGE + +# We cannot yet rely on "unset" to work, but we need these variables +# to be unset--not just set to an empty or harmless value--now, to +# avoid bugs in old shells (e.g. pre-3.0 UWIN ksh). This construct +# also avoids known problems related to "unset" and subshell syntax +# in other old shells (e.g. bash 2.01 and pdksh 5.2.14). +for as_var in BASH_ENV ENV MAIL MAILPATH CDPATH +do eval test \${$as_var+y} \ + && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : +done + +# Ensure that fds 0, 1, and 2 are open. +if (exec 3>&0) 2>/dev/null; then :; else exec 0&1) 2>/dev/null; then :; else exec 1>/dev/null; fi +if (exec 3>&2) ; then :; else exec 2>/dev/null; fi # The user is always right. -if test "${PATH_SEPARATOR+set}" != set; then +if ${PATH_SEPARATOR+false} :; then PATH_SEPARATOR=: (PATH='/bin;/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 && { (PATH='/bin:/bin'; FPATH=$PATH; sh -c :) >/dev/null 2>&1 || @@ -2991,13 +3118,6 @@ if test "${PATH_SEPARATOR+set}" != set; then fi -# IFS -# We need space, tab and new line, in precisely that order. Quoting is -# there to prevent editors from complaining about space-tab. -# (If _AS_PATH_WALK were called with IFS unset, it would disable word -# splitting by setting IFS to empty value.) -IFS=" "" $as_nl" - # Find who we are. Look in the path if we contain no directory separator. as_myself= case $0 in #(( @@ -3006,8 +3126,12 @@ case $0 in #(( for as_dir in $PATH do IFS=$as_save_IFS - test -z "$as_dir" && as_dir=. - test -r "$as_dir/$0" && as_myself=$as_dir/$0 && break + case $as_dir in #((( + '') as_dir=./ ;; + */) ;; + *) as_dir=$as_dir/ ;; + esac + test -r "$as_dir$0" && as_myself=$as_dir$0 && break done IFS=$as_save_IFS @@ -3019,30 +3143,10 @@ if test "x$as_myself" = x; then as_myself=$0 fi if test ! -f "$as_myself"; then - $as_echo "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 + printf "%s\n" "$as_myself: error: cannot find myself; rerun with an absolute file name" >&2 exit 1 fi -# Unset variables that we do not need and which cause bugs (e.g. in -# pre-3.0 UWIN ksh). But do not cause bugs in bash 2.01; the "|| exit 1" -# suppresses any "Segmentation fault" message there. '((' could -# trigger a bug in pdksh 5.2.14. -for as_var in BASH_ENV ENV MAIL MAILPATH -do eval test x\${$as_var+set} = xset \ - && ( (unset $as_var) || exit 1) >/dev/null 2>&1 && unset $as_var || : -done -PS1='$ ' -PS2='> ' -PS4='+ ' - -# NLS nuisances. -LC_ALL=C -export LC_ALL -LANGUAGE=C -export LANGUAGE - -# CDPATH. -(unset CDPATH) >/dev/null 2>&1 && unset CDPATH # as_fn_error STATUS ERROR [LINENO LOG_FD] @@ -3055,13 +3159,14 @@ as_fn_error () as_status=$1; test $as_status -eq 0 && as_status=1 if test "$4"; then as_lineno=${as_lineno-"$3"} as_lineno_stack=as_lineno_stack=$as_lineno_stack - $as_echo "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 + printf "%s\n" "$as_me:${as_lineno-$LINENO}: error: $2" >&$4 fi - $as_echo "$as_me: error: $2" >&2 + printf "%s\n" "$as_me: error: $2" >&2 as_fn_exit $as_status } # as_fn_error + # as_fn_set_status STATUS # ----------------------- # Set $? to STATUS, without forking. @@ -3088,18 +3193,20 @@ as_fn_unset () { eval $1=; unset $1;} } as_unset=as_fn_unset + # as_fn_append VAR VALUE # ---------------------- # Append the text in VALUE to the end of the definition contained in VAR. Take # advantage of any shell optimizations that allow amortized linear growth over # repeated appends, instead of the typical quadratic growth present in naive # implementations. -if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null; then : +if (eval "as_var=1; as_var+=2; test x\$as_var = x12") 2>/dev/null +then : eval 'as_fn_append () { eval $1+=\$2 }' -else +else $as_nop as_fn_append () { eval $1=\$$1\$2 @@ -3111,12 +3218,13 @@ fi # as_fn_append # Perform arithmetic evaluation on the ARGs, and store the result in the # global $as_val. Take advantage of shells that can avoid forks. The arguments # must be portable across $(()) and expr. -if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null; then : +if (eval "test \$(( 1 + 1 )) = 2") 2>/dev/null +then : eval 'as_fn_arith () { as_val=$(( $* )) }' -else +else $as_nop as_fn_arith () { as_val=`expr "$@" || test $? -eq 1` @@ -3147,7 +3255,7 @@ as_me=`$as_basename -- "$0" || $as_expr X/"$0" : '.*/\([^/][^/]*\)/*$' \| \ X"$0" : 'X\(//\)$' \| \ X"$0" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X/"$0" | +printf "%s\n" X/"$0" | sed '/^.*\/\([^/][^/]*\)\/*$/{ s//\1/ q @@ -3169,6 +3277,10 @@ as_cr_Letters=$as_cr_letters$as_cr_LETTERS as_cr_digits='0123456789' as_cr_alnum=$as_cr_Letters$as_cr_digits + +# Determine whether it's possible to make 'echo' print without a newline. +# These variables are no longer used directly by Autoconf, but are AC_SUBSTed +# for compatibility with existing Makefiles. ECHO_C= ECHO_N= ECHO_T= case `echo -n x` in #((((( -n*) @@ -3182,6 +3294,12 @@ case `echo -n x` in #((((( ECHO_N='-n';; esac +# For backward compatibility with old third-party macros, we provide +# the shell variables $as_echo and $as_echo_n. New code should use +# AS_ECHO(["message"]) and AS_ECHO_N(["message"]), respectively. +as_echo='printf %s\n' +as_echo_n='printf %s' + rm -f conf$$ conf$$.exe conf$$.file if test -d conf$$.dir; then rm -f conf$$.dir/conf$$.file @@ -3223,7 +3341,7 @@ as_fn_mkdir_p () as_dirs= while :; do case $as_dir in #( - *\'*) as_qdir=`$as_echo "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( + *\'*) as_qdir=`printf "%s\n" "$as_dir" | sed "s/'/'\\\\\\\\''/g"`;; #'( *) as_qdir=$as_dir;; esac as_dirs="'$as_qdir' $as_dirs" @@ -3232,7 +3350,7 @@ $as_expr X"$as_dir" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$as_dir" : 'X\(//\)[^/]' \| \ X"$as_dir" : 'X\(//\)$' \| \ X"$as_dir" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$as_dir" | +printf "%s\n" X"$as_dir" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -3295,7 +3413,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # values after options handling. ac_log=" This file was extended by xgboost $as_me 1.7.3, which was -generated by GNU Autoconf 2.69. Invocation command line was +generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES CONFIG_HEADERS = $CONFIG_HEADERS @@ -3344,14 +3462,16 @@ $config_files Report bugs to the package provider." _ACEOF +ac_cs_config=`printf "%s\n" "$ac_configure_args" | sed "$ac_safe_unquote"` +ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\''/g"` cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 -ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`" +ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ xgboost config.status 1.7.3 -configured by $0, generated by GNU Autoconf 2.69, +configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" -Copyright (C) 2012 Free Software Foundation, Inc. +Copyright (C) 2021 Free Software Foundation, Inc. This config.status script is free software; the Free Software Foundation gives unlimited permission to copy, distribute and modify it." @@ -3388,21 +3508,21 @@ do -recheck | --recheck | --rechec | --reche | --rech | --rec | --re | --r) ac_cs_recheck=: ;; --version | --versio | --versi | --vers | --ver | --ve | --v | -V ) - $as_echo "$ac_cs_version"; exit ;; + printf "%s\n" "$ac_cs_version"; exit ;; --config | --confi | --conf | --con | --co | --c ) - $as_echo "$ac_cs_config"; exit ;; + printf "%s\n" "$ac_cs_config"; exit ;; --debug | --debu | --deb | --de | --d | -d ) debug=: ;; --file | --fil | --fi | --f ) $ac_shift case $ac_optarg in - *\'*) ac_optarg=`$as_echo "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; + *\'*) ac_optarg=`printf "%s\n" "$ac_optarg" | sed "s/'/'\\\\\\\\''/g"` ;; '') as_fn_error $? "missing file argument" ;; esac as_fn_append CONFIG_FILES " '$ac_optarg'" ac_need_defaults=false;; --he | --h | --help | --hel | -h ) - $as_echo "$ac_cs_usage"; exit ;; + printf "%s\n" "$ac_cs_usage"; exit ;; -q | -quiet | --quiet | --quie | --qui | --qu | --q \ | -silent | --silent | --silen | --sile | --sil | --si | --s) ac_cs_silent=: ;; @@ -3430,7 +3550,7 @@ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 if \$ac_cs_recheck; then set X $SHELL '$0' $ac_configure_args \$ac_configure_extra_args --no-create --no-recursion shift - \$as_echo "running CONFIG_SHELL=$SHELL \$*" >&6 + \printf "%s\n" "running CONFIG_SHELL=$SHELL \$*" >&6 CONFIG_SHELL='$SHELL' export CONFIG_SHELL exec "\$@" @@ -3444,7 +3564,7 @@ exec 5>>config.log sed 'h;s/./-/g;s/^.../## /;s/...$/ ##/;p;x;p;x' <<_ASBOX ## Running $as_me. ## _ASBOX - $as_echo "$ac_log" + printf "%s\n" "$ac_log" } >&5 _ACEOF @@ -3469,7 +3589,7 @@ done # We use the long form for the default assignment because of an extremely # bizarre bug on SunOS 4.1.3. if $ac_need_defaults; then - test "${CONFIG_FILES+set}" = set || CONFIG_FILES=$config_files + test ${CONFIG_FILES+y} || CONFIG_FILES=$config_files fi # Have a temporary directory for convenience. Make it in the build tree @@ -3697,7 +3817,7 @@ do esac || as_fn_error 1 "cannot find input file: \`$ac_f'" "$LINENO" 5;; esac - case $ac_f in *\'*) ac_f=`$as_echo "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac + case $ac_f in *\'*) ac_f=`printf "%s\n" "$ac_f" | sed "s/'/'\\\\\\\\''/g"`;; esac as_fn_append ac_file_inputs " '$ac_f'" done @@ -3705,17 +3825,17 @@ do # use $as_me), people would be surprised to read: # /* config.h. Generated by config.status. */ configure_input='Generated from '` - $as_echo "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' + printf "%s\n" "$*" | sed 's|^[^:]*/||;s|:[^:]*/|, |g' `' by configure.' if test x"$ac_file" != x-; then configure_input="$ac_file. $configure_input" - { $as_echo "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 -$as_echo "$as_me: creating $ac_file" >&6;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: creating $ac_file" >&5 +printf "%s\n" "$as_me: creating $ac_file" >&6;} fi # Neutralize special characters interpreted by sed in replacement strings. case $configure_input in #( *\&* | *\|* | *\\* ) - ac_sed_conf_input=`$as_echo "$configure_input" | + ac_sed_conf_input=`printf "%s\n" "$configure_input" | sed 's/[\\\\&|]/\\\\&/g'`;; #( *) ac_sed_conf_input=$configure_input;; esac @@ -3732,7 +3852,7 @@ $as_expr X"$ac_file" : 'X\(.*[^/]\)//*[^/][^/]*/*$' \| \ X"$ac_file" : 'X\(//\)[^/]' \| \ X"$ac_file" : 'X\(//\)$' \| \ X"$ac_file" : 'X\(/\)' \| . 2>/dev/null || -$as_echo X"$ac_file" | +printf "%s\n" X"$ac_file" | sed '/^X\(.*[^/]\)\/\/*[^/][^/]*\/*$/{ s//\1/ q @@ -3756,9 +3876,9 @@ $as_echo X"$ac_file" | case "$ac_dir" in .) ac_dir_suffix= ac_top_builddir_sub=. ac_top_build_prefix= ;; *) - ac_dir_suffix=/`$as_echo "$ac_dir" | sed 's|^\.[\\/]||'` + ac_dir_suffix=/`printf "%s\n" "$ac_dir" | sed 's|^\.[\\/]||'` # A ".." for each directory in $ac_dir_suffix. - ac_top_builddir_sub=`$as_echo "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` + ac_top_builddir_sub=`printf "%s\n" "$ac_dir_suffix" | sed 's|/[^\\/]*|/..|g;s|/||'` case $ac_top_builddir_sub in "") ac_top_builddir_sub=. ac_top_build_prefix= ;; *) ac_top_build_prefix=$ac_top_builddir_sub/ ;; @@ -3811,8 +3931,8 @@ ac_sed_dataroot=' case `eval "sed -n \"\$ac_sed_dataroot\" $ac_file_inputs"` in *datarootdir*) ac_datarootdir_seen=yes;; *@datadir@*|*@docdir@*|*@infodir@*|*@localedir@*|*@mandir@*) - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 -$as_echo "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&5 +printf "%s\n" "$as_me: WARNING: $ac_file_inputs seems to ignore the --datarootdir setting" >&2;} _ACEOF cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_datarootdir_hack=' @@ -3854,9 +3974,9 @@ test -z "$ac_datarootdir_hack$ac_datarootdir_seen" && { ac_out=`sed -n '/\${datarootdir}/p' "$ac_tmp/out"`; test -n "$ac_out"; } && { ac_out=`sed -n '/^[ ]*datarootdir[ ]*:*=/p' \ "$ac_tmp/out"`; test -z "$ac_out"; } && - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&5 -$as_echo "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' +printf "%s\n" "$as_me: WARNING: $ac_file contains a reference to the variable \`datarootdir' which seems to be undefined. Please make sure it is defined" >&2;} rm -f "$ac_tmp/stdin" @@ -3903,7 +4023,8 @@ if test "$no_create" != yes; then $ac_cs_success || as_fn_exit 1 fi if test -n "$ac_unrecognized_opts" && test "$enable_option_checking" != no; then - { $as_echo "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 -$as_echo "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} + { printf "%s\n" "$as_me:${as_lineno-$LINENO}: WARNING: unrecognized options: $ac_unrecognized_opts" >&5 +printf "%s\n" "$as_me: WARNING: unrecognized options: $ac_unrecognized_opts" >&2;} fi + diff --git a/R-package/configure.ac b/R-package/configure.ac index e5d2396e2fbb..05a4a28e2296 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -4,8 +4,23 @@ AC_PREREQ(2.69) AC_INIT([xgboost],[1.7.3],[],[xgboost],[]) -# Use this line to set CC variable to a C compiler -AC_PROG_CC +: ${R_HOME=`R RHOME`} +if test -z "${R_HOME}"; then + echo "could not determine R_HOME" + exit 1 +fi + +CXX14=`"${R_HOME}/bin/R" CMD config CXX14` +CXX14STD=`"${R_HOME}/bin/R" CMD config CXX14STD` +CXX="${CXX14} ${CXX14STD}" +CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` + +CC=`"${R_HOME}/bin/R" CMD config CC` +CFLAGS=`"${R_HOME}/bin/R" CMD config CFLAGS` +CPPFLAGS=`"${R_HOME}/bin/R" CMD config CPPFLAGS` + +LDFLAGS=`"${R_HOME}/bin/R" CMD config LDFLAGS` +AC_LANG(C++) ### Check whether backtrace() is part of libc or the external lib libexecinfo AC_MSG_CHECKING([Backtrace lib]) From 76bdca072a8a93e84cc4eaccb1cd4b268e38d69d Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 15 Jan 2023 04:00:31 +0800 Subject: [PATCH 036/126] [R] Fix threads used to create DMatrix in predict. (#8681) (#8682) --- R-package/R/xgb.Booster.R | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/R-package/R/xgb.Booster.R b/R-package/R/xgb.Booster.R index 2f1f5091cdf2..f31c506466f9 100644 --- a/R-package/R/xgb.Booster.R +++ b/R-package/R/xgb.Booster.R @@ -328,8 +328,9 @@ predict.xgb.Booster <- function(object, newdata, missing = NA, outputmargin = FA predleaf = FALSE, predcontrib = FALSE, approxcontrib = FALSE, predinteraction = FALSE, reshape = FALSE, training = FALSE, iterationrange = NULL, strict_shape = FALSE, ...) { object <- xgb.Booster.complete(object, saveraw = FALSE) + if (!inherits(newdata, "xgb.DMatrix")) - newdata <- xgb.DMatrix(newdata, missing = missing) + newdata <- xgb.DMatrix(newdata, missing = missing, nthread = NVL(object$params[["nthread"]], -1)) if (!is.null(object[["feature_names"]]) && !is.null(colnames(newdata)) && !identical(object[["feature_names"]], colnames(newdata))) From 68d86336d7d40f85b007318fdbbd3b023b763f30 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 29 Jan 2023 12:43:10 +0800 Subject: [PATCH 037/126] [backport] [R] fix OpenMP detection on macOS (#8684) (#8732) Co-authored-by: James Lamb --- R-package/configure | 2 +- R-package/configure.ac | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/R-package/configure b/R-package/configure index 2a4153f6f2fa..84452be04ece 100755 --- a/R-package/configure +++ b/R-package/configure @@ -2857,7 +2857,7 @@ main (void) return 0; } _ACEOF - ${CC} -o conftest conftest.c ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes + ${CXX} -o conftest conftest.cpp ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes { printf "%s\n" "$as_me:${as_lineno-$LINENO}: result: ${ac_pkg_openmp}" >&5 printf "%s\n" "${ac_pkg_openmp}" >&6; } if test "${ac_pkg_openmp}" = no; then diff --git a/R-package/configure.ac b/R-package/configure.ac index 05a4a28e2296..63cd02fafb2f 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -55,7 +55,7 @@ then ac_pkg_openmp=no AC_MSG_CHECKING([whether OpenMP will work in a package]) AC_LANG_CONFTEST([AC_LANG_PROGRAM([[#include ]], [[ return (omp_get_max_threads() <= 1); ]])]) - ${CC} -o conftest conftest.c ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes + ${CXX} -o conftest conftest.cpp ${CPPFLAGS} ${LDFLAGS} ${OPENMP_LIB} ${OPENMP_CXXFLAGS} 2>/dev/null && ./conftest && ac_pkg_openmp=yes AC_MSG_RESULT([${ac_pkg_openmp}]) if test "${ac_pkg_openmp}" = no; then OPENMP_CXXFLAGS='' From 2f22f8d49b5c7797657db9c703b2a9c8ab8a5932 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 6 Feb 2023 16:58:15 +0800 Subject: [PATCH 038/126] [backport] Make sure input numpy array is aligned. (#8690) (#8696) (#8734) * [backport] Make sure input numpy array is aligned. (#8690) - use `np.require` to specify that the alignment is required. - scipy csr as well. - validate input pointer in `ArrayInterface`. * Workaround CUDA warning. (#8696) * backport from half type support for alignment. * fix import. --- python-package/xgboost/core.py | 17 +++++++------ python-package/xgboost/data.py | 33 +++++++++++++++++++------- src/data/array_interface.h | 25 ++++++++++++++----- tests/cpp/data/test_array_interface.cc | 14 +++++++++-- tests/python/test_dmatrix.py | 2 +- 5 files changed, 66 insertions(+), 25 deletions(-) diff --git a/python-package/xgboost/core.py b/python-package/xgboost/core.py index aa5014c29d11..8a877ec5a824 100644 --- a/python-package/xgboost/core.py +++ b/python-package/xgboost/core.py @@ -2172,6 +2172,7 @@ def assign_type(t: int) -> None: ) return _prediction_output(shape, dims, preds, False) + # pylint: disable=too-many-statements def inplace_predict( self, data: DataType, @@ -2192,10 +2193,10 @@ def inplace_predict( .. code-block:: python - booster.set_param({'predictor': 'gpu_predictor'}) + booster.set_param({"predictor": "gpu_predictor"}) booster.inplace_predict(cupy_array) - booster.set_param({'predictor': 'cpu_predictor}) + booster.set_param({"predictor": "cpu_predictor"}) booster.inplace_predict(numpy_array) .. versionadded:: 1.1.0 @@ -2301,14 +2302,16 @@ def inplace_predict( ) return _prediction_output(shape, dims, preds, False) if isinstance(data, scipy.sparse.csr_matrix): - csr = data + from .data import _transform_scipy_csr + + data = _transform_scipy_csr(data) _check_call( _LIB.XGBoosterPredictFromCSR( self.handle, - _array_interface(csr.indptr), - _array_interface(csr.indices), - _array_interface(csr.data), - c_bst_ulong(csr.shape[1]), + _array_interface(data.indptr), + _array_interface(data.indices), + _array_interface(data.data), + c_bst_ulong(data.shape[1]), from_pystr_to_cstr(json.dumps(args)), p_handle, ctypes.byref(shape), diff --git a/python-package/xgboost/data.py b/python-package/xgboost/data.py index 775eedd5776f..6afc27e156c9 100644 --- a/python-package/xgboost/data.py +++ b/python-package/xgboost/data.py @@ -30,6 +30,7 @@ c_array, c_str, from_pystr_to_cstr, + make_jcargs, ) DispatchedDataBackendReturnType = Tuple[ @@ -80,6 +81,21 @@ def _array_interface(data: np.ndarray) -> bytes: return interface_str +def _transform_scipy_csr(data: DataType) -> DataType: + from scipy.sparse import csr_matrix + + indptr, _ = _ensure_np_dtype(data.indptr, data.indptr.dtype) + indices, _ = _ensure_np_dtype(data.indices, data.indices.dtype) + values, _ = _ensure_np_dtype(data.data, data.data.dtype) + if ( + indptr is not data.indptr + or indices is not data.indices + or values is not data.data + ): + data = csr_matrix((values, indices, indptr), shape=data.shape) + return data + + def _from_scipy_csr( data: DataType, missing: FloatCompatible, @@ -93,18 +109,14 @@ def _from_scipy_csr( f"length mismatch: {len(data.indices)} vs {len(data.data)}" ) handle = ctypes.c_void_p() - args = { - "missing": float(missing), - "nthread": int(nthread), - } - config = bytes(json.dumps(args), "utf-8") + data = _transform_scipy_csr(data) _check_call( _LIB.XGDMatrixCreateFromCSR( _array_interface(data.indptr), _array_interface(data.indices), _array_interface(data.data), c_bst_ulong(data.shape[1]), - config, + make_jcargs(missing=float(missing), nthread=int(nthread)), ctypes.byref(handle), ) ) @@ -153,12 +165,13 @@ def _is_numpy_array(data: DataType) -> bool: def _ensure_np_dtype( - data: DataType, - dtype: Optional[NumpyDType] + data: DataType, dtype: Optional[NumpyDType] ) -> Tuple[np.ndarray, Optional[NumpyDType]]: if data.dtype.hasobject or data.dtype in [np.float16, np.bool_]: - data = data.astype(np.float32, copy=False) dtype = np.float32 + data = data.astype(dtype, copy=False) + if not data.flags.aligned: + data = np.require(data, requirements="A") return data, dtype @@ -1197,11 +1210,13 @@ def _proxy_transform( data, _ = _ensure_np_dtype(data, data.dtype) return data, None, feature_names, feature_types if _is_scipy_csr(data): + data = _transform_scipy_csr(data) return data, None, feature_names, feature_types if _is_pandas_df(data): arr, feature_names, feature_types = _transform_pandas_df( data, enable_categorical, feature_names, feature_types ) + arr, _ = _ensure_np_dtype(arr, arr.dtype) return arr, None, feature_names, feature_types raise TypeError("Value type is not supported for data iterator:" + str(type(data))) diff --git a/src/data/array_interface.h b/src/data/array_interface.h index e755108069dc..a833fe9e8e3d 100644 --- a/src/data/array_interface.h +++ b/src/data/array_interface.h @@ -1,5 +1,5 @@ -/*! - * Copyright 2019-2021 by Contributors +/** + * Copyright 2019-2023 by XGBoost Contributors * \file array_interface.h * \brief View of __array_interface__ */ @@ -7,9 +7,11 @@ #define XGBOOST_DATA_ARRAY_INTERFACE_H_ #include -#include +#include // std::size_t +#include #include #include +#include // std::alignment_of,std::remove_pointer_t #include #include @@ -394,6 +396,11 @@ class ArrayInterface { data = ArrayInterfaceHandler::ExtractData(array, n); static_assert(allow_mask ? D == 1 : D >= 1, "Masked ndarray is not supported."); + + auto alignment = this->ElementAlignment(); + auto ptr = reinterpret_cast(this->data); + CHECK_EQ(ptr % alignment, 0) << "Input pointer misalignment."; + if (allow_mask) { common::Span s_mask; size_t n_bits = ArrayInterfaceHandler::ExtractMask(array, &s_mask); @@ -512,9 +519,15 @@ class ArrayInterface { return func(reinterpret_cast(data)); } - XGBOOST_DEVICE size_t ElementSize() { - return this->DispatchCall( - [](auto *p_values) { return sizeof(std::remove_pointer_t); }); + XGBOOST_DEVICE std::size_t ElementSize() const { + return this->DispatchCall([](auto *typed_data_ptr) { + return sizeof(std::remove_pointer_t); + }); + } + XGBOOST_DEVICE std::size_t ElementAlignment() const { + return this->DispatchCall([](auto *typed_data_ptr) { + return std::alignment_of>::value; + }); } template diff --git a/tests/cpp/data/test_array_interface.cc b/tests/cpp/data/test_array_interface.cc index 5bd771ff08e2..9bf7010dc797 100644 --- a/tests/cpp/data/test_array_interface.cc +++ b/tests/cpp/data/test_array_interface.cc @@ -1,10 +1,12 @@ -/*! - * Copyright 2020-2021 by XGBoost Contributors +/** + * Copyright 2020-2023 by XGBoost Contributors */ #include #include #include "../helpers.h" #include "../../../src/data/array_interface.h" +#include "dmlc/logging.h" +#include "xgboost/json.h" namespace xgboost { TEST(ArrayInterface, Initialize) { @@ -71,6 +73,14 @@ TEST(ArrayInterface, Error) { column["mask"]["data"] = Null{}; common::Span s_mask; EXPECT_THROW(ArrayInterfaceHandler::ExtractMask(column_obj, &s_mask), dmlc::Error); + + get(column).erase("mask"); + // misaligned. + j_data = {Json(Integer(reinterpret_cast( + reinterpret_cast(storage.ConstHostPointer()) + 1))), + Json(Boolean(false))}; + column["data"] = j_data; + EXPECT_THROW({ ArrayInterface<1> arr{column}; }, dmlc::Error); } TEST(ArrayInterface, GetElement) { diff --git a/tests/python/test_dmatrix.py b/tests/python/test_dmatrix.py index b7933eac4c53..5c9232895fed 100644 --- a/tests/python/test_dmatrix.py +++ b/tests/python/test_dmatrix.py @@ -326,7 +326,7 @@ def test_sparse_dmatrix_csr(self): nrow = 100 ncol = 1000 x = rand(nrow, ncol, density=0.0005, format='csr', random_state=rng) - assert x.indices.max() < ncol - 1 + assert x.indices.max() < ncol x.data[:] = 1 dtrain = xgb.DMatrix(x, label=rng.binomial(1, 0.3, nrow)) assert (dtrain.num_row(), dtrain.num_col()) == (nrow, ncol) From df984f9c433061ebf3e97e0e3bd4b15c5577c9c8 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 9 Feb 2023 18:31:49 +0800 Subject: [PATCH 039/126] [backport] Fix different number of features in gpu_hist evaluator. (#8754) (#8769) Co-authored-by: Rory Mitchell --- src/tree/gpu_hist/evaluate_splits.cu | 49 ++++++++++++------- src/tree/gpu_hist/evaluate_splits.cuh | 13 +++-- src/tree/updater_gpu_hist.cu | 31 ++++++------ .../test_gpu_interaction_constraints.py | 39 +++++++++++++-- 4 files changed, 93 insertions(+), 39 deletions(-) diff --git a/src/tree/gpu_hist/evaluate_splits.cu b/src/tree/gpu_hist/evaluate_splits.cu index e471c8d361f5..781fff92aef8 100644 --- a/src/tree/gpu_hist/evaluate_splits.cu +++ b/src/tree/gpu_hist/evaluate_splits.cu @@ -248,8 +248,10 @@ class EvaluateSplitAgent { template __global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel( - bst_feature_t number_active_features, common::Span d_inputs, - const EvaluateSplitSharedInputs shared_inputs, common::Span sorted_idx, + bst_feature_t max_active_features, + common::Span d_inputs, + const EvaluateSplitSharedInputs shared_inputs, + common::Span sorted_idx, const TreeEvaluator::SplitEvaluator evaluator, common::Span out_candidates) { // Aligned && shared storage for best_split @@ -263,11 +265,15 @@ __global__ __launch_bounds__(kBlockSize) void EvaluateSplitsKernel( __syncthreads(); // Allocate blocks to one feature of one node - const auto input_idx = blockIdx.x / number_active_features; + const auto input_idx = blockIdx.x / max_active_features; const EvaluateSplitInputs &inputs = d_inputs[input_idx]; // One block for each feature. Features are sampled, so fidx != blockIdx.x - - int fidx = inputs.feature_set[blockIdx.x % number_active_features]; + // Some blocks may not have any feature to work on, simply return + int feature_offset = blockIdx.x % max_active_features; + if (feature_offset >= inputs.feature_set.size()) { + return; + } + int fidx = inputs.feature_set[feature_offset]; using AgentT = EvaluateSplitAgent; __shared__ typename AgentT::TempStorage temp_storage; @@ -338,7 +344,8 @@ __device__ void SetCategoricalSplit(const EvaluateSplitSharedInputs &shared_inpu } void GPUHistEvaluator::LaunchEvaluateSplits( - bst_feature_t number_active_features, common::Span d_inputs, + bst_feature_t max_active_features, + common::Span d_inputs, EvaluateSplitSharedInputs shared_inputs, TreeEvaluator::SplitEvaluator evaluator, common::Span out_splits) { @@ -346,20 +353,25 @@ void GPUHistEvaluator::LaunchEvaluateSplits( this->SortHistogram(d_inputs, shared_inputs, evaluator); } - size_t combined_num_features = number_active_features * d_inputs.size(); - dh::TemporaryArray feature_best_splits(combined_num_features); + size_t combined_num_features = max_active_features * d_inputs.size(); + dh::TemporaryArray feature_best_splits( + combined_num_features, DeviceSplitCandidate()); // One block for each feature uint32_t constexpr kBlockThreads = 32; - dh::LaunchKernel {static_cast(combined_num_features), kBlockThreads, 0}( - EvaluateSplitsKernel, number_active_features, d_inputs, - shared_inputs, this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()), + dh::LaunchKernel{static_cast(combined_num_features), kBlockThreads, + 0}( + EvaluateSplitsKernel, max_active_features, d_inputs, + shared_inputs, + this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()), evaluator, dh::ToSpan(feature_best_splits)); // Reduce to get best candidate for left and right child over all features - auto reduce_offset = dh::MakeTransformIterator( - thrust::make_counting_iterator(0llu), - [=] __device__(size_t idx) -> size_t { return idx * number_active_features; }); + auto reduce_offset = + dh::MakeTransformIterator(thrust::make_counting_iterator(0llu), + [=] __device__(size_t idx) -> size_t { + return idx * max_active_features; + }); size_t temp_storage_bytes = 0; auto num_segments = out_splits.size(); cub::DeviceSegmentedReduce::Sum(nullptr, temp_storage_bytes, feature_best_splits.data(), @@ -386,15 +398,16 @@ void GPUHistEvaluator::CopyToHost(const std::vector &nidx) { } void GPUHistEvaluator::EvaluateSplits( - const std::vector &nidx, bst_feature_t number_active_features, - common::Span d_inputs, EvaluateSplitSharedInputs shared_inputs, + const std::vector &nidx, bst_feature_t max_active_features, + common::Span d_inputs, + EvaluateSplitSharedInputs shared_inputs, common::Span out_entries) { auto evaluator = this->tree_evaluator_.template GetEvaluator(); dh::TemporaryArray splits_out_storage(d_inputs.size()); auto out_splits = dh::ToSpan(splits_out_storage); - this->LaunchEvaluateSplits(number_active_features, d_inputs, shared_inputs, evaluator, - out_splits); + this->LaunchEvaluateSplits(max_active_features, d_inputs, shared_inputs, + evaluator, out_splits); auto d_sorted_idx = this->SortedIdx(d_inputs.size(), shared_inputs.feature_values.size()); auto d_entries = out_entries; diff --git a/src/tree/gpu_hist/evaluate_splits.cuh b/src/tree/gpu_hist/evaluate_splits.cuh index d3174c4df651..0b44f31aa5c0 100644 --- a/src/tree/gpu_hist/evaluate_splits.cuh +++ b/src/tree/gpu_hist/evaluate_splits.cuh @@ -170,13 +170,18 @@ class GPUHistEvaluator { TreeEvaluator::SplitEvaluator evaluator); // impl of evaluate splits, contains CUDA kernels so it's public - void LaunchEvaluateSplits(bst_feature_t number_active_features,common::Span d_inputs,EvaluateSplitSharedInputs shared_inputs, - TreeEvaluator::SplitEvaluator evaluator, - common::Span out_splits); + void LaunchEvaluateSplits( + bst_feature_t max_active_features, + common::Span d_inputs, + EvaluateSplitSharedInputs shared_inputs, + TreeEvaluator::SplitEvaluator evaluator, + common::Span out_splits); /** * \brief Evaluate splits for left and right nodes. */ - void EvaluateSplits(const std::vector &nidx,bst_feature_t number_active_features,common::Span d_inputs, + void EvaluateSplits(const std::vector &nidx, + bst_feature_t max_active_features, + common::Span d_inputs, EvaluateSplitSharedInputs shared_inputs, common::Span out_splits); /** diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index b90a7ce09f0f..3b01d74374e7 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -318,24 +318,27 @@ struct GPUHistMakerDevice { auto right_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(right_nidx)); right_sampled_features->SetDevice(ctx_->gpu_id); common::Span right_feature_set = - interaction_constraints.Query(right_sampled_features->DeviceSpan(), left_nidx); - h_node_inputs[i * 2] = {left_nidx, candidate.depth + 1, candidate.split.left_sum, - left_feature_set, hist.GetNodeHistogram(left_nidx)}; - h_node_inputs[i * 2 + 1] = {right_nidx, candidate.depth + 1, candidate.split.right_sum, - right_feature_set, hist.GetNodeHistogram(right_nidx)}; + interaction_constraints.Query(right_sampled_features->DeviceSpan(), + right_nidx); + h_node_inputs[i * 2] = {left_nidx, candidate.depth + 1, + candidate.split.left_sum, left_feature_set, + hist.GetNodeHistogram(left_nidx)}; + h_node_inputs[i * 2 + 1] = {right_nidx, candidate.depth + 1, + candidate.split.right_sum, right_feature_set, + hist.GetNodeHistogram(right_nidx)}; } - bst_feature_t number_active_features = h_node_inputs[0].feature_set.size(); + bst_feature_t max_active_features = 0; for (auto input : h_node_inputs) { - CHECK_EQ(input.feature_set.size(), number_active_features) - << "Current implementation assumes that the number of active features " - "(after sampling) in any node is the same"; + max_active_features = std::max(max_active_features, + bst_feature_t(input.feature_set.size())); } - dh::safe_cuda(cudaMemcpyAsync(d_node_inputs.data().get(), h_node_inputs.data(), - h_node_inputs.size() * sizeof(EvaluateSplitInputs), - cudaMemcpyDefault)); + dh::safe_cuda(cudaMemcpyAsync( + d_node_inputs.data().get(), h_node_inputs.data(), + h_node_inputs.size() * sizeof(EvaluateSplitInputs), cudaMemcpyDefault)); - this->evaluator_.EvaluateSplits(nidx, number_active_features, dh::ToSpan(d_node_inputs), - shared_inputs, dh::ToSpan(entries)); + this->evaluator_.EvaluateSplits(nidx, max_active_features, + dh::ToSpan(d_node_inputs), shared_inputs, + dh::ToSpan(entries)); dh::safe_cuda(cudaMemcpyAsync(pinned_candidates_out.data(), entries.data().get(), sizeof(GPUExpandEntry) * entries.size(), cudaMemcpyDeviceToHost)); diff --git a/tests/python-gpu/test_gpu_interaction_constraints.py b/tests/python-gpu/test_gpu_interaction_constraints.py index 885cf5bf9901..434cc15dacd5 100644 --- a/tests/python-gpu/test_gpu_interaction_constraints.py +++ b/tests/python-gpu/test_gpu_interaction_constraints.py @@ -1,8 +1,14 @@ -import numpy as np import sys + +import numpy as np +import pandas as pd + +import xgboost as xgb + sys.path.append("tests/python") # Don't import the test class, otherwise they will run twice. import test_interaction_constraints as test_ic # noqa + rng = np.random.RandomState(1994) @@ -10,7 +16,34 @@ class TestGPUInteractionConstraints: cputest = test_ic.TestInteractionConstraints() def test_interaction_constraints(self): - self.cputest.run_interaction_constraints(tree_method='gpu_hist') + self.cputest.run_interaction_constraints(tree_method="gpu_hist") def test_training_accuracy(self): - self.cputest.training_accuracy(tree_method='gpu_hist') + self.cputest.training_accuracy(tree_method="gpu_hist") + + # case where different number of features can occur in the evaluator + def test_issue_8730(self): + X = pd.DataFrame( + zip(range(0, 100), range(200, 300), range(300, 400), range(400, 500)), + columns=["A", "B", "C", "D"], + ) + y = np.array([*([0] * 50), *([1] * 50)]) + dm = xgb.DMatrix(X, label=y) + + params = { + "eta": 0.16095019509249486, + "min_child_weight": 1, + "subsample": 0.688567929338029, + "colsample_bynode": 0.7, + "gamma": 5.666579817418348e-06, + "lambda": 0.14943712232059794, + "grow_policy": "depthwise", + "max_depth": 3, + "tree_method": "gpu_hist", + "interaction_constraints": [["A", "B"], ["B", "D", "C"], ["C", "D"]], + "objective": "count:poisson", + "eval_metric": "poisson-nloglik", + "verbosity": 0, + } + + xgb.train(params, dm, num_boost_round=100) From 60303db2eeaf88cacfab6f2404f1379c383ee326 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 9 Feb 2023 20:16:39 +0800 Subject: [PATCH 040/126] [backport] Fix GPU L1 error. (#8749) (#8770) * [backport] Fix GPU L1 error. (#8749) * Fix backport. --- python-package/xgboost/compat.py | 2 +- src/tree/updater_gpu_hist.cu | 35 +++++++++++++++++-------- tests/cpp/tree/test_node_partition.cc | 24 +++++++++++++++++ tests/python-gpu/test_gpu_prediction.py | 10 ++++++- tests/python/test_updaters.py | 16 +++++++++++ 5 files changed, 74 insertions(+), 13 deletions(-) create mode 100644 tests/cpp/tree/test_node_partition.cc diff --git a/python-package/xgboost/compat.py b/python-package/xgboost/compat.py index fab734a01361..3be023abf5af 100644 --- a/python-package/xgboost/compat.py +++ b/python-package/xgboost/compat.py @@ -36,7 +36,6 @@ def lazy_isinstance(instance: Any, module: str, name: str) -> bool: PANDAS_INSTALLED = True except ImportError: - MultiIndex = object DataFrame = object Series = object @@ -161,6 +160,7 @@ def concat(value: Sequence[_T]) -> _T: # pylint: disable=too-many-return-statem # `importlib.utils`, except it's unclear from its document on how to use it. This one # seems to be easy to understand and works out of box. + # Copyright 2015 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index 3b01d74374e7..c17252902ded 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -188,7 +188,8 @@ struct GPUHistMakerDevice { common::Span gpair; dh::device_vector monotone_constraints; - dh::device_vector update_predictions; + // node idx for each sample + dh::device_vector positions; TrainParam param; @@ -426,7 +427,7 @@ struct GPUHistMakerDevice { LOG(FATAL) << "Current objective function can not be used with external memory."; } p_out_position->Resize(0); - update_predictions.clear(); + positions.clear(); return; } @@ -461,8 +462,6 @@ struct GPUHistMakerDevice { HostDeviceVector* p_out_position) { auto d_matrix = page->GetDeviceAccessor(ctx_->gpu_id); auto d_gpair = this->gpair; - update_predictions.resize(row_partitioner->GetRows().size()); - auto d_update_predictions = dh::ToSpan(update_predictions); p_out_position->SetDevice(ctx_->gpu_id); p_out_position->Resize(row_partitioner->GetRows().size()); @@ -497,32 +496,45 @@ struct GPUHistMakerDevice { node = d_nodes[position]; } - d_update_predictions[row_id] = node.LeafValue(); return position; }; // NOLINT auto d_out_position = p_out_position->DeviceSpan(); row_partitioner->FinalisePosition(d_out_position, new_position_op); + auto s_position = p_out_position->ConstDeviceSpan(); + positions.resize(s_position.size()); + dh::safe_cuda(cudaMemcpyAsync(positions.data().get(), s_position.data(), + s_position.size_bytes(), cudaMemcpyDeviceToDevice)); + dh::LaunchN(row_partitioner->GetRows().size(), [=] __device__(size_t idx) { bst_node_t position = d_out_position[idx]; - d_update_predictions[idx] = d_nodes[position].LeafValue(); bool is_row_sampled = d_gpair[idx].GetHess() - .0f == 0.f; d_out_position[idx] = is_row_sampled ? ~position : position; }); } bool UpdatePredictionCache(linalg::VectorView out_preds_d, RegTree const* p_tree) { - if (update_predictions.empty()) { + if (positions.empty()) { return false; } + CHECK(p_tree); dh::safe_cuda(cudaSetDevice(ctx_->gpu_id)); CHECK_EQ(out_preds_d.DeviceIdx(), ctx_->gpu_id); - auto d_update_predictions = dh::ToSpan(update_predictions); - CHECK_EQ(out_preds_d.Size(), d_update_predictions.size()); - dh::LaunchN(out_preds_d.Size(), [=] XGBOOST_DEVICE(size_t idx) mutable { - out_preds_d(idx) += d_update_predictions[idx]; + + auto d_position = dh::ToSpan(positions); + CHECK_EQ(out_preds_d.Size(), d_position.size()); + + auto const& h_nodes = p_tree->GetNodes(); + dh::caching_device_vector nodes(h_nodes.size()); + dh::safe_cuda(cudaMemcpyAsync(nodes.data().get(), h_nodes.data(), + h_nodes.size() * sizeof(RegTree::Node), cudaMemcpyHostToDevice)); + auto d_nodes = dh::ToSpan(nodes); + dh::LaunchN(d_position.size(), [=] XGBOOST_DEVICE(std::size_t idx) mutable { + bst_node_t nidx = d_position[idx]; + auto weight = d_nodes[nidx].LeafValue(); + out_preds_d(idx) += weight; }); return true; } @@ -865,6 +877,7 @@ class GPUHistMaker : public TreeUpdater { std::unique_ptr> maker; // NOLINT char const* Name() const override { return "grow_gpu_hist"; } + bool HasNodePosition() const override { return true; } private: bool initialised_{false}; diff --git a/tests/cpp/tree/test_node_partition.cc b/tests/cpp/tree/test_node_partition.cc new file mode 100644 index 000000000000..883c8e68ffa6 --- /dev/null +++ b/tests/cpp/tree/test_node_partition.cc @@ -0,0 +1,24 @@ +/** + * Copyright 2023 by XGBoost contributors + */ +#include +#include +#include + +namespace xgboost { +TEST(Updater, HasNodePosition) { + Context ctx; + ObjInfo task{ObjInfo::kRegression, true, true}; + std::unique_ptr up{TreeUpdater::Create("grow_histmaker", &ctx, task)}; + ASSERT_TRUE(up->HasNodePosition()); + + up.reset(TreeUpdater::Create("grow_quantile_histmaker", &ctx, task)); + ASSERT_TRUE(up->HasNodePosition()); + +#if defined(XGBOOST_USE_CUDA) + ctx.gpu_id = 0; + up.reset(TreeUpdater::Create("grow_gpu_hist", &ctx, task)); + ASSERT_TRUE(up->HasNodePosition()); +#endif // defined(XGBOOST_USE_CUDA) +} +} // namespace xgboost diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 3dedb0637e7c..56f488f0cd66 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -338,13 +338,21 @@ def run_predict_leaf_booster(self, param, num_rounds, dataset): @given(predict_parameter_strategy, tm.dataset_strategy) @settings(deadline=None, max_examples=20, print_blob=True) def test_predict_leaf_gbtree(self, param, dataset): + # Unsupported for random forest + if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"): + return + param['booster'] = 'gbtree' param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) @given(predict_parameter_strategy, tm.dataset_strategy) @settings(deadline=None, max_examples=20, print_blob=True) - def test_predict_leaf_dart(self, param, dataset): + def test_predict_leaf_dart(self, param: dict, dataset: tm.TestDataset) -> None: + # Unsupported for random forest + if param.get("num_parallel_tree", 1) > 1 and dataset.name.endswith("-l1"): + return + param['booster'] = 'dart' param['tree_method'] = 'gpu_hist' self.run_predict_leaf_booster(param, 10, dataset) diff --git a/tests/python/test_updaters.py b/tests/python/test_updaters.py index e28f173860e7..cd159df67dcb 100644 --- a/tests/python/test_updaters.py +++ b/tests/python/test_updaters.py @@ -458,6 +458,22 @@ def get_score(config: Dict) -> float: config_0 = json.loads(booster_0.save_config()) np.testing.assert_allclose(get_score(config_0), get_score(config_1) + 1) + evals_result: Dict[str, Dict[str, list]] = {} + xgb.train( + { + "tree_method": tree_method, + "objective": "reg:absoluteerror", + "subsample": 0.8 + }, + Xy, + num_boost_round=10, + evals=[(Xy, "Train")], + evals_result=evals_result, + ) + mae = evals_result["Train"]["mae"] + assert mae[-1] < 20.0 + assert tm.non_increasing(mae) + @pytest.mark.skipif(**tm.no_sklearn()) @pytest.mark.parametrize( "tree_method,weighted", [ From 08a547f5c24e89a390f7cc07ebd64eec3a546800 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 15 Feb 2023 01:39:20 +0800 Subject: [PATCH 041/126] [backport] Fix feature types param (#8772) (#8801) Signed-off-by: Weichen Xu Co-authored-by: WeichenXu --- python-package/xgboost/spark/core.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/python-package/xgboost/spark/core.py b/python-package/xgboost/spark/core.py index caa6e3cd0931..4f770e139a27 100644 --- a/python-package/xgboost/spark/core.py +++ b/python-package/xgboost/spark/core.py @@ -140,6 +140,13 @@ } +# TODO: supply hint message for all other unsupported params. +_unsupported_params_hint_message = { + "enable_categorical": "`xgboost.spark` estimators do not have 'enable_categorical' param, " + "but you can set `feature_types` param and mark categorical features with 'c' string." +} + + class _SparkXGBParams( HasFeaturesCol, HasLabelCol, @@ -523,7 +530,10 @@ def setParams(self, **kwargs): # pylint: disable=invalid-name or k in _unsupported_predict_params or k in _unsupported_train_params ): - raise ValueError(f"Unsupported param '{k}'.") + err_msg = _unsupported_params_hint_message.get( + k, f"Unsupported param '{k}'." + ) + raise ValueError(err_msg) _extra_params[k] = v _existing_extra_params = self.getOrDefault(self.arbitrary_params_dict) self._set(arbitrary_params_dict={**_existing_extra_params, **_extra_params}) @@ -749,6 +759,8 @@ def _fit(self, dataset): "feature_weights": self.getOrDefault(self.feature_weights), "missing": float(self.getOrDefault(self.missing)), } + if dmatrix_kwargs["feature_types"] is not None: + dmatrix_kwargs["enable_categorical"] = True booster_params["nthread"] = cpu_per_task use_gpu = self.getOrDefault(self.use_gpu) From f15a6d2b1950808329a31ddef2707ad65610adea Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 15 Feb 2023 02:45:09 +0800 Subject: [PATCH 042/126] [backport] Fix ranking with quantile dmatrix and group weight. (#8762) (#8800) * [backport] Fix ranking with quantile dmatrix and group weight. (#8762) * backport test utilities. --- src/data/iterative_dmatrix.cc | 13 ++++++-- .../test_device_quantile_dmatrix.py | 14 +++++++++ tests/python/test_quantile_dmatrix.py | 12 ++++++++ tests/python/testing.py | 30 ++++++++++++++++++- 4 files changed, 65 insertions(+), 4 deletions(-) diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc index 19dd3490d040..ebb9d1a98e20 100644 --- a/src/data/iterative_dmatrix.cc +++ b/src/data/iterative_dmatrix.cc @@ -58,6 +58,13 @@ void GetCutsFromRef(std::shared_ptr ref_, bst_feature_t n_features, Bat } }; auto ellpack = [&]() { + // workaround ellpack being initialized from CPU. + if (p.gpu_id == Context::kCpuId) { + p.gpu_id = ref_->Ctx()->gpu_id; + } + if (p.gpu_id == Context::kCpuId) { + p.gpu_id = 0; + } for (auto const& page : ref_->GetBatches(p)) { GetCutsFromEllpack(page, p_cuts); break; @@ -172,9 +179,9 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, size_t i = 0; while (iter.Next()) { if (!p_sketch) { - p_sketch.reset(new common::HostSketchContainer{batch_param_.max_bin, - proxy->Info().feature_types.ConstHostSpan(), - column_sizes, false, ctx_.Threads()}); + p_sketch.reset(new common::HostSketchContainer{ + batch_param_.max_bin, proxy->Info().feature_types.ConstHostSpan(), column_sizes, + !proxy->Info().group_ptr_.empty(), ctx_.Threads()}); } HostAdapterDispatch(proxy, [&](auto const& batch) { proxy->Info().num_nonzero_ = batch_nnz[i]; diff --git a/tests/python-gpu/test_device_quantile_dmatrix.py b/tests/python-gpu/test_device_quantile_dmatrix.py index 2c3a7a760a59..b3d7ef660fd4 100644 --- a/tests/python-gpu/test_device_quantile_dmatrix.py +++ b/tests/python-gpu/test_device_quantile_dmatrix.py @@ -139,3 +139,17 @@ def test_to_csr(self, n_samples, n_features, sparsity) -> None: booster.predict(xgb.DMatrix(d_m.get_data())), atol=1e-6, ) + + def test_ltr(self) -> None: + import cupy as cp + X, y, qid, w = tm.make_ltr(100, 3, 3, 5) + # make sure GPU is used to run sketching. + cpX = cp.array(X) + Xy_qdm = xgb.QuantileDMatrix(cpX, y, qid=qid, weight=w) + Xy = xgb.DMatrix(X, y, qid=qid, weight=w) + xgb.train({"tree_method": "gpu_hist", "objective": "rank:ndcg"}, Xy) + + from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy) + from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm) + + assert tm.predictor_equal(from_qdm, from_dm) diff --git a/tests/python/test_quantile_dmatrix.py b/tests/python/test_quantile_dmatrix.py index 65ccfa4e5e76..9000d99881d4 100644 --- a/tests/python/test_quantile_dmatrix.py +++ b/tests/python/test_quantile_dmatrix.py @@ -9,7 +9,9 @@ make_batches, make_batches_sparse, make_categorical, + make_ltr, make_sparse_regression, + predictor_equal, ) import xgboost as xgb @@ -218,6 +220,16 @@ def test_predict(self) -> None: b = booster.predict(qXy) np.testing.assert_allclose(a, b) + def test_ltr(self) -> None: + X, y, qid, w = make_ltr(100, 3, 3, 5) + Xy_qdm = xgb.QuantileDMatrix(X, y, qid=qid, weight=w) + Xy = xgb.DMatrix(X, y, qid=qid, weight=w) + xgb.train({"tree_method": "hist", "objective": "rank:ndcg"}, Xy) + + from_qdm = xgb.QuantileDMatrix(X, weight=w, ref=Xy_qdm) + from_dm = xgb.QuantileDMatrix(X, weight=w, ref=Xy) + assert predictor_equal(from_qdm, from_dm) + # we don't test empty Quantile DMatrix in single node construction. @given( strategies.integers(1, 1000), diff --git a/tests/python/testing.py b/tests/python/testing.py index 63d33de97e5a..f0f09911f078 100644 --- a/tests/python/testing.py +++ b/tests/python/testing.py @@ -466,7 +466,22 @@ def make_categorical( return df, label -def _cat_sampled_from(): +def make_ltr( + n_samples: int, n_features: int, n_query_groups: int, max_rel: int +) -> Tuple[np.ndarray, np.ndarray, np.ndarray, np.ndarray]: + """Make a dataset for testing LTR.""" + rng = np.random.default_rng(1994) + X = rng.normal(0, 1.0, size=n_samples * n_features).reshape(n_samples, n_features) + y = rng.integers(0, max_rel, size=n_samples) + qid = rng.integers(0, n_query_groups, size=n_samples) + w = rng.normal(0, 1.0, size=n_query_groups) + w -= np.min(w) + w /= np.max(w) + qid = np.sort(qid) + return X, y, qid, w + + +def _cat_sampled_from() -> strategies.SearchStrategy: @strategies.composite def _make_cat(draw): n_samples = draw(strategies.integers(2, 512)) @@ -775,6 +790,19 @@ def __exit__(self, *args): os.remove(f) +def predictor_equal(lhs: xgb.DMatrix, rhs: xgb.DMatrix) -> bool: + """Assert whether two DMatrices contain the same predictors.""" + lcsr = lhs.get_data() + rcsr = rhs.get_data() + return all( + ( + np.array_equal(lcsr.data, rcsr.data), + np.array_equal(lcsr.indices, rcsr.indices), + np.array_equal(lcsr.indptr, rcsr.indptr), + ) + ) + + @contextmanager def captured_output(): """Reassign stdout temporarily in order to test printed statements From c22f6db4bfae5a415213dca624b2f9a882b4e946 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 16 Feb 2023 06:39:25 +0800 Subject: [PATCH 043/126] [backport] Fix CPU bin compression with categorical data. (#8809) (#8810) * [backport] Fix CPU bin compression with categorical data. (#8809) * Fix CPU bin compression with categorical data. * The bug causes the maximum category to be lesser than 256 or the maximum number of bins when the input data is dense. * Avoid test symbol. --- src/common/column_matrix.cc | 2 +- src/data/gradient_index.cc | 25 +++++++++++++------------ src/data/gradient_index.cu | 2 +- src/data/gradient_index.h | 10 +++++++--- src/data/gradient_index_format.cc | 6 +++--- tests/cpp/data/test_gradient_index.cc | 24 ++++++++++++++++++++++++ 6 files changed, 49 insertions(+), 20 deletions(-) diff --git a/src/common/column_matrix.cc b/src/common/column_matrix.cc index 91977b96dcdf..d8acfa7a5570 100644 --- a/src/common/column_matrix.cc +++ b/src/common/column_matrix.cc @@ -46,7 +46,7 @@ void ColumnMatrix::InitStorage(GHistIndexMatrix const& gmat, double sparse_thres feature_offsets_[fid] = accum_index; } - SetTypeSize(gmat.max_num_bins); + SetTypeSize(gmat.MaxNumBinPerFeat()); auto storage_size = feature_offsets_.back() * static_cast>(bins_type_size_); index_.resize(storage_size, 0); diff --git a/src/data/gradient_index.cc b/src/data/gradient_index.cc index 2e9d38a1918c..a2d140a03edf 100644 --- a/src/data/gradient_index.cc +++ b/src/data/gradient_index.cc @@ -20,13 +20,13 @@ GHistIndexMatrix::GHistIndexMatrix() : columns_{std::make_unique hess) { + common::Span hess) + : max_numeric_bins_per_feat{max_bins_per_feat} { CHECK(p_fmat->SingleColBlock()); // We use sorted sketching for approx tree method since it's more efficient in // computation time (but higher memory usage). cut = common::SketchOnDMatrix(p_fmat, max_bins_per_feat, n_threads, sorted_sketch, hess); - max_num_bins = max_bins_per_feat; const uint32_t nbins = cut.Ptrs().back(); hit_count.resize(nbins, 0); hit_count_tloc_.resize(n_threads * nbins, 0); @@ -63,7 +63,7 @@ GHistIndexMatrix::GHistIndexMatrix(MetaInfo const &info, common::HistogramCuts & : row_ptr(info.num_row_ + 1, 0), hit_count(cuts.TotalBins(), 0), cut{std::forward(cuts)}, - max_num_bins(max_bin_per_feat), + max_numeric_bins_per_feat(max_bin_per_feat), isDense_{info.num_col_ * info.num_row_ == info.num_nonzero_} {} #if !defined(XGBOOST_USE_CUDA) @@ -86,13 +86,13 @@ void GHistIndexMatrix::PushBatch(SparsePage const &batch, common::Span ft, - common::HistogramCuts const &cuts, int32_t max_bins_per_feat, - bool isDense, double sparse_thresh, int32_t n_threads) { + common::HistogramCuts cuts, int32_t max_bins_per_feat, + bool isDense, double sparse_thresh, int32_t n_threads) + : cut{std::move(cuts)}, + max_numeric_bins_per_feat{max_bins_per_feat}, + base_rowid{batch.base_rowid}, + isDense_{isDense} { CHECK_GE(n_threads, 1); - base_rowid = batch.base_rowid; - isDense_ = isDense; - cut = cuts; - max_num_bins = max_bins_per_feat; CHECK_EQ(row_ptr.size(), 0); // The number of threads is pegged to the batch size. If the OMP // block is parallelized on anything other than the batch/block size, @@ -127,12 +127,13 @@ INSTANTIATION_PUSH(data::SparsePageAdapterBatch) #undef INSTANTIATION_PUSH void GHistIndexMatrix::ResizeIndex(const size_t n_index, const bool isDense) { - if ((max_num_bins - 1 <= static_cast(std::numeric_limits::max())) && isDense) { + if ((MaxNumBinPerFeat() - 1 <= static_cast(std::numeric_limits::max())) && + isDense) { // compress dense index to uint8 index.SetBinTypeSize(common::kUint8BinsTypeSize); index.Resize((sizeof(uint8_t)) * n_index); - } else if ((max_num_bins - 1 > static_cast(std::numeric_limits::max()) && - max_num_bins - 1 <= static_cast(std::numeric_limits::max())) && + } else if ((MaxNumBinPerFeat() - 1 > static_cast(std::numeric_limits::max()) && + MaxNumBinPerFeat() - 1 <= static_cast(std::numeric_limits::max())) && isDense) { // compress dense index to uint16 index.SetBinTypeSize(common::kUint16BinsTypeSize); diff --git a/src/data/gradient_index.cu b/src/data/gradient_index.cu index 42d935b3ca84..af5b0f67ba05 100644 --- a/src/data/gradient_index.cu +++ b/src/data/gradient_index.cu @@ -65,7 +65,7 @@ void GetRowPtrFromEllpack(Context const* ctx, EllpackPageImpl const* page, GHistIndexMatrix::GHistIndexMatrix(Context const* ctx, MetaInfo const& info, EllpackPage const& in_page, BatchParam const& p) - : max_num_bins{p.max_bin} { + : max_numeric_bins_per_feat{p.max_bin} { auto page = in_page.Impl(); isDense_ = page->is_dense; diff --git a/src/data/gradient_index.h b/src/data/gradient_index.h index 10d9e770dc13..0fc0daf9dee0 100644 --- a/src/data/gradient_index.h +++ b/src/data/gradient_index.h @@ -133,11 +133,15 @@ class GHistIndexMatrix { std::vector hit_count; /*! \brief The corresponding cuts */ common::HistogramCuts cut; - /*! \brief max_bin for each feature. */ - bst_bin_t max_num_bins; + /** \brief max_bin for each feature. */ + bst_bin_t max_numeric_bins_per_feat; /*! \brief base row index for current page (used by external memory) */ size_t base_rowid{0}; + bst_bin_t MaxNumBinPerFeat() const { + return std::max(static_cast(cut.MaxCategory() + 1), max_numeric_bins_per_feat); + } + ~GHistIndexMatrix(); /** * \brief Constrcutor for SimpleDMatrix. @@ -160,7 +164,7 @@ class GHistIndexMatrix { * \brief Constructor for external memory. */ GHistIndexMatrix(SparsePage const& page, common::Span ft, - common::HistogramCuts const& cuts, int32_t max_bins_per_feat, bool is_dense, + common::HistogramCuts cuts, int32_t max_bins_per_feat, bool is_dense, double sparse_thresh, int32_t n_threads); GHistIndexMatrix(); // also for ext mem, empty ctor so that we can read the cache back. diff --git a/src/data/gradient_index_format.cc b/src/data/gradient_index_format.cc index 4b3fd0ea0f77..204157682a94 100644 --- a/src/data/gradient_index_format.cc +++ b/src/data/gradient_index_format.cc @@ -35,7 +35,7 @@ class GHistIndexRawFormat : public SparsePageFormat { if (!fi->Read(&page->hit_count)) { return false; } - if (!fi->Read(&page->max_num_bins)) { + if (!fi->Read(&page->max_numeric_bins_per_feat)) { return false; } if (!fi->Read(&page->base_rowid)) { @@ -76,8 +76,8 @@ class GHistIndexRawFormat : public SparsePageFormat { page.hit_count.size() * sizeof(decltype(page.hit_count)::value_type) + sizeof(uint64_t); // max_bins, base row, is_dense - fo->Write(page.max_num_bins); - bytes += sizeof(page.max_num_bins); + fo->Write(page.max_numeric_bins_per_feat); + bytes += sizeof(page.max_numeric_bins_per_feat); fo->Write(page.base_rowid); bytes += sizeof(page.base_rowid); fo->Write(page.IsDense()); diff --git a/tests/cpp/data/test_gradient_index.cc b/tests/cpp/data/test_gradient_index.cc index 6233f1b25231..66e9cfe0c525 100644 --- a/tests/cpp/data/test_gradient_index.cc +++ b/tests/cpp/data/test_gradient_index.cc @@ -68,6 +68,30 @@ TEST(GradientIndex, FromCategoricalBasic) { } } +TEST(GradientIndex, FromCategoricalLarge) { + size_t constexpr kRows = 1000, kCats = 512, kCols = 1; + bst_bin_t max_bins = 8; + auto x = GenerateRandomCategoricalSingleColumn(kRows, kCats); + auto m = GetDMatrixFromData(x, kRows, 1); + Context ctx; + + auto &h_ft = m->Info().feature_types.HostVector(); + h_ft.resize(kCols, FeatureType::kCategorical); + + BatchParam p{max_bins, 0.8}; + { + GHistIndexMatrix gidx(m.get(), max_bins, p.sparse_thresh, false, Context{}.Threads(), {}); + ASSERT_TRUE(gidx.index.GetBinTypeSize() == common::kUint16BinsTypeSize); + } + { + for (auto const &page : m->GetBatches(p)) { + common::HistogramCuts cut = page.cut; + GHistIndexMatrix gidx{m->Info(), std::move(cut), max_bins}; + ASSERT_EQ(gidx.MaxNumBinPerFeat(), kCats); + } + } +} + TEST(GradientIndex, PushBatch) { size_t constexpr kRows = 64, kCols = 4; bst_bin_t max_bins = 64; From 36ad160501251336bfe69b602acc37ab3ec32d69 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 16 Feb 2023 06:40:01 +0800 Subject: [PATCH 044/126] Bump version to 1.7.4. (#8805) --- CMakeLists.txt | 2 +- R-package/DESCRIPTION | 4 ++-- R-package/configure | 18 +++++++++--------- R-package/configure.ac | 2 +- include/xgboost/version_config.h | 2 +- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 8 ++++---- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- python-package/xgboost/VERSION | 2 +- 13 files changed, 31 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 089e8a2a4dcf..8a8c2e788354 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.14 FATAL_ERROR) -project(xgboost LANGUAGES CXX C VERSION 1.7.3) +project(xgboost LANGUAGES CXX C VERSION 1.7.4) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") cmake_policy(SET CMP0022 NEW) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 8c372642968a..20c45c0ae0f8 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 1.7.3.1 -Date: 2023-01-06 +Version: 1.7.4.1 +Date: 2023-02-15 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), diff --git a/R-package/configure b/R-package/configure index 84452be04ece..6c157ca17e9e 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for xgboost 1.7.3. +# Generated by GNU Autoconf 2.71 for xgboost 1.7.4. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -607,8 +607,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='1.7.3' -PACKAGE_STRING='xgboost 1.7.3' +PACKAGE_VERSION='1.7.4' +PACKAGE_STRING='xgboost 1.7.4' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 1.7.3 to adapt to many kinds of systems. +\`configure' configures xgboost 1.7.4 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1287,7 +1287,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 1.7.3:";; + short | recursive ) echo "Configuration of xgboost 1.7.4:";; esac cat <<\_ACEOF @@ -1367,7 +1367,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 1.7.3 +xgboost configure 1.7.4 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 1.7.3, which was +It was created by xgboost $as_me 1.7.4, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 1.7.3, which was +This file was extended by xgboost $as_me 1.7.4, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -xgboost config.status 1.7.3 +xgboost config.status 1.7.4 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index 63cd02fafb2f..98dfd9782ba1 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[1.7.3],[],[xgboost],[]) +AC_INIT([xgboost],[1.7.4],[],[xgboost],[]) : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index 68e748daec6e..f8e7ef976242 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MINOR 7 -#define XGBOOST_VER_PATCH 3 +#define XGBOOST_VER_PATCH 4 #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 81ada460db7b..9e25b45734b0 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.3 + 1.7.4 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 71a7f3ca8572..8a9f50dbbbd6 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.3 + 1.7.4 xgboost4j-example_2.12 - 1.7.3 + 1.7.4 jar @@ -26,7 +26,7 @@ ml.dmlc xgboost4j-spark_${scala.binary.version} - 1.7.3 + 1.7.4 org.apache.spark @@ -37,7 +37,7 @@ ml.dmlc xgboost4j-flink_${scala.binary.version} - 1.7.3 + 1.7.4 org.apache.commons diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 8fb31cca0e31..83840d00cc0d 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.3 + 1.7.4 xgboost4j-flink_2.12 - 1.7.3 + 1.7.4 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.3 + 1.7.4 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 6d440e8a10f1..c67c9e729bb6 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.3 + 1.7.4 xgboost4j-gpu_2.12 - 1.7.3 + 1.7.4 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 84fa5ad3d47c..7d0c150ae38f 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.3 + 1.7.4 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.3 + 1.7.4 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index f41e643cdaee..55502a2029f2 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.3 + 1.7.4 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.3 + 1.7.4 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 9f2a2a86969b..76a9ad808199 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.3 + 1.7.4 xgboost4j_2.12 - 1.7.3 + 1.7.4 jar diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index 661e7aeadf36..10c088013f87 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -1.7.3 +1.7.4 From b9f79dce5bf104653fd81cc65f5b0cb82e9de18e Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 17 Mar 2023 08:41:13 -0600 Subject: [PATCH 045/126] version config, this auto-changed during steps I followed --- include/xgboost/version_config.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index b8b4517f0468..b91107f068b5 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MINOR 7 -#define XGBOOST_VER_PATCH 0 +#define XGBOOST_VER_PATCH 1 #endif // XGBOOST_VERSION_CONFIG_H_ From e2f04dd0d9c255effbc4e6396a1a97f5a87bfb1a Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 14:21:33 -0600 Subject: [PATCH 046/126] Changes in branch --- include/xgboost/c_api.h | 31 ++ jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 4 +- jvm-packages/xgboost4j-flink/pom.xml | 4 +- jvm-packages/xgboost4j-gpu/pom.xml | 4 +- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 +- jvm-packages/xgboost4j-spark/pom.xml | 4 +- jvm-packages/xgboost4j/pom.xml | 4 +- .../java/ml/dmlc/xgboost4j/java/Booster.java | 148 ++++++++++ .../ml/dmlc/xgboost4j/java/XGBoostJNI.java | 4 + .../xgboost4j/src/native/xgboost4j.cpp | 26 ++ jvm-packages/xgboost4j/src/native/xgboost4j.h | 7 + .../dmlc/xgboost4j/java/BoosterImplTest.java | 274 ++++++++++++++++++ src/c_api/c_api.cc | 79 +++-- 14 files changed, 557 insertions(+), 38 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 2daa4203878b..4a2f7defdd0c 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -984,6 +984,37 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle, bst_ulong *out_len, const float **out_result); +/*! + * \brief make prediction based on dmat (deprecated, use `XGBoosterPredictFromDMatrix` instead) + * \param handle handle + * \param dmat data matrix + * \param option_mask bit-mask of options taken in prediction, possible values + * 0:normal prediction + * 1:output margin instead of transformed value + * 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree + * 4:output feature contributions to individual predictions + * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees + * when the parameter is set to 0, we will use all the trees + * \param training Whether the prediction function is used as part of a training loop. + * Prediction can be run in 2 scenarios: + * 1. Given data matrix X, obtain prediction y_pred from the model. + * 2. Obtain the prediction for computing gradients. For example, DART booster performs dropout + * during training, and the prediction result will be different from the one obtained by normal + * inference step due to dropped trees. + * Set training=false for the first scenario. Set training=true for the second scenario. + * The second scenario applies when you are defining a custom objective function. + * \param out_len used to store length of returning result + * \param out_result used to set a pointer to array + * \return 0 when success, -1 when failure happens + */ +XGB_DLL int XGBoosterPredict(BoosterHandle handle, + DMatrixHandle dmat, + int option_mask, + unsigned ntree_limit, + int training, + bst_ulong *out_len, + const float **out_result); + /*! * \brief Make prediction from DMatrix, replacing \ref XGBoosterPredict. * diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index f39b5dcfa47f..5e88472aaa06 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.1-sovrn pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 2f27ae924242..3cd694868654 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.1-sovrn xgboost4j-example_2.12 - 1.7.1 + 1.7.1-sovrn jar diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index e90b74d27e95..25613407a189 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.1-sovrn xgboost4j-flink_2.12 - 1.7.1 + 1.7.1-sovrn/version> diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 5d97e8681d39..43e748a4d342 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.1-sovrn xgboost4j-gpu_2.12 - 1.7.1 + 1.7.1-sovrn jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index f5a3587fe1e6..6e2f119e2d76 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.1-sovrn xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.1 + 1.7.1-sovrn org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 66f9859926a7..7a50b5be1725 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.1-sovrn xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.1 + 1.7.1-sovrn org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index bc9654f36dd0..31c966d8ddb6 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1 + 1.7.1-sovrn xgboost4j_2.12 - 1.7.1 + 1.7.1-sovrn jar diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index ed1a3f5c9f92..7735896e772d 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -315,6 +315,154 @@ private synchronized float[][] predict(DMatrix data, } return predicts; } +/** + * Perform thread-safe prediction. Calls + * inplace_predict(data, num_rows, num_features, Float.NaN, false, 0, false, false). + * + * @param data Flattened input matrix of features for prediction + * @param num_rows The number of preditions to make (count of input matrix rows) + * @param num_features The number of features in the model (count of input matrix columns) + * + * @return predict Result matrix + * + * @see #inplace_predict(float[] data, int num_rows, int num_features, float missing, + * boolean outputMargin, int treeLimit, boolean predLeaf, + * boolean predContribs) + */ + public float[][] inplace_predict(float[] data, + int num_rows, + int num_features) throws XGBoostError { + return this.inplace_predict(data, num_rows, num_features, Float.NaN, false, 0, false, false); + } + + /** + * Perform thread-safe prediction. Calls + * inplace_predict(data, num_rows, num_features, missing, false, 0, false, false). + * + * @param data Flattened input matrix of features for prediction + * @param num_rows The number of preditions to make (count of input matrix rows) + * @param num_features The number of features in the model (count of input matrix columns) + * @param missing Value indicating missing element in the data input matrix + * + * @return predict Result matrix + * + * @see #inplace_predict(float[] data, int num_rows, int num_features, float missing, + * boolean outputMargin, int treeLimit, boolean predLeaf, + * boolean predContribs) + */ + public float[][] inplace_predict(float[] data, + int num_rows, + int num_features, + float missing) throws XGBoostError { + return this.inplace_predict(data, num_rows, num_features, missing, false, 0, false, false); + } + + /** + * Perform thread-safe prediction. Calls + * inplace_predict(data, num_rows, num_features, missing, + * outputMargin, 0, false, false). + * + * @param data Flattened input matrix of features for prediction + * @param num_rows The number of preditions to make (count of input matrix rows) + * @param num_features The number of features in the model (count of input matrix columns) + * @param missing Value indicating missing element in the data input matrix + * @param outputMargin Whether to only predict margin value instead of transformed prediction + * + * @return predict Result matrix + * + * @see #inplace_predict(float[] data, int num_rows, int num_features, float missing, + * boolean outputMargin, int treeLimit, boolean predLeaf, + * boolean predContribs) + */ + + public float[][] inplace_predict(float[] data, + int num_rows, + int num_features, + float missing, + boolean outputMargin) throws XGBoostError { + return this.inplace_predict(data, num_rows, num_features, missing, outputMargin, + 0, false, false); + } + + /** + * Perform thread-safe prediction. Calls + * inplace_predict(data, num_rows, num_features, missing, + * outputMargin, treeLimit, false, false). + * + * @param data Flattened input matrix of features for prediction + * @param num_rows The number of preditions to make (count of input matrix rows) + * @param num_features The number of features in the model (count of input matrix columns) + * @param missing Value indicating missing element in the data input matrix + * @param outputMargin Whether to only predict margin value instead of transformed prediction + * @param treeLimit limit number of trees, 0 means all trees. + * + * @return predict Result matrix + * + * @see #inplace_predict(float[] data, int num_rows, int num_features, float missing, + * boolean outputMargin, int treeLimit, boolean predLeaf, + * boolean predContribs) + */ + public float[][] inplace_predict(float[] data, + int num_rows, + int num_features, + float missing, + boolean outputMargin, + int treeLimit) throws XGBoostError { + return this.inplace_predict(data, num_rows, num_features, missing, outputMargin, + treeLimit, false, false); + } + + /** + * Perform thread-safe prediction. + * + * @param data Flattened input matrix of features for prediction + * @param num_rows The number of preditions to make (count of input matrix rows) + * @param num_features The number of features in the model (count of input matrix columns) + * @param missing Value indicating missing element in the data input matrix + * @param outputMargin Whether to only predict margin value instead of transformed prediction + * @param treeLimit limit number of trees, 0 means all trees. + * @param predLeaf prediction minimum to keep leafs + * @param predContribs prediction feature contributions + * + * @return predict Result matrix + */ + public float[][] inplace_predict(float[] data, + int num_rows, + int num_features, + float missing, + boolean outputMargin, + int treeLimit, + boolean predLeaf, + boolean predContribs) throws XGBoostError { + int optionMask = 0; + if (outputMargin) { + optionMask |= 1; + } + if (predLeaf) { + optionMask = 2; + } + if (predContribs) { + optionMask = 4; + } + + float[][] rawPredicts = new float[1][]; + XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, + missing, optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? + + // System.out.println("Booster.inplace_predict rawPredicts[0].length = " + + // rawPredicts[0].length); + + int row = num_rows; + int col = rawPredicts[0].length / row; + float[][] predicts = new float[row][col]; + int r, c; + for (int i = 0; i < rawPredicts[0].length; i++) { + r = i / col; + c = i % col; + predicts[r][c] = rawPredicts[0][i]; + } + return predicts; + } /** * Predict leaf indices given the data diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java index afe576598956..f73112dad216 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java @@ -114,6 +114,10 @@ public final static native int XGBoosterEvalOneIter(long handle, int iter, long[ public final static native int XGBoosterPredict(long handle, long dmat, int option_mask, int ntree_limit, float[][] predicts); + public final static native int XGBoosterInplacePredict(long handle, float[] data, int num_rows, int num_features, + float missing, int option_mask, int ntree_limit, + float[][] predicts); + public final static native int XGBoosterLoadModel(long handle, String fname); public final static native int XGBoosterSaveModel(long handle, String fname); diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp index 749fa5b40cdb..f2e6559d50a7 100644 --- a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp +++ b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp @@ -608,6 +608,32 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredict return ret; } +/* + * Class: ml_dmlc_xgboost4j_java_XGBoostJNI + * Method: XGBoosterInplacePredict + * Signature: (J[FIII[[F)I + */ +JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterInplacePredict + (JNIEnv *jenv, jclass jcls, jlong jhandle, jfloatArray jdata, jint num_rows, jint num_features, + jfloat missing, jint option_mask, jint treeLimit, jobjectArray jout) { + BoosterHandle handle = (BoosterHandle) jhandle; + jfloat* data = jenv->GetFloatArrayElements(jdata, 0); + const bst_ulong *len; + float *result; + int ret = XGBoosterInplacePredict(handle, data, num_rows, num_features, missing, option_mask, treeLimit, + &len, (const float **) &result); + JVM_CHECK_CALL(ret); + jenv->ReleaseFloatArrayElements(jdata, data, 0); + if (*len) { +// printf("JNI XGBoosterInplacePredict len = %u\n", *len); + jsize jlen = (jsize) *len; + jfloatArray jarray = jenv->NewFloatArray(jlen); + jenv->SetFloatArrayRegion(jarray, 0, jlen, (jfloat *) result); + jenv->SetObjectArrayElement(jout, 0, jarray); + } + return ret; +} + /* * Class: ml_dmlc_xgboost4j_java_XGBoostJNI * Method: XGBoosterLoadModel diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j.h b/jvm-packages/xgboost4j/src/native/xgboost4j.h index 5afe92b524ab..20bc51a88357 100644 --- a/jvm-packages/xgboost4j/src/native/xgboost4j.h +++ b/jvm-packages/xgboost4j/src/native/xgboost4j.h @@ -183,6 +183,13 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterEvalOneIt JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredict (JNIEnv *, jclass, jlong, jlong, jint, jint, jobjectArray); +/* + * Class: ml_dmlc_xgboost4j_java_XGBoostJNI + * Method: XGBoosterInplacePredict + * Signature: (J[FIII[[F)I + */ +JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterInplacePredict + (JNIEnv *, jclass, jlong, jfloatArray, jint, jint, jfloat, jint, jint, jobjectArray); /* * Class: ml_dmlc_xgboost4j_java_XGBoostJNI * Method: XGBoosterLoadModel diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index cce1254d0f87..4041db945a70 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -24,6 +24,166 @@ import junit.framework.TestCase; import org.junit.Test; +import java.util.Random; +import java.io.PrintStream; +import java.util.*; +import java.util.concurrent.*; + +// +// Utility class for comparing array contents +// +class ArrayComparator { + public static boolean compare(float[][] one, float[][] two) { + + // Test that both arrays are the same size in the first dimension + if (one.length != two.length) { + System.err.println("Array first dimensions are not identical in size"); + return false; + } + + // Test that both arrays are the same size in the second dimension + if (one[0].length != two[0].length) { + System.err.println("Array second dimensions are not identical in size"); + return false; + } + + // Test that all the array elements are identical + for (int i=0; i { + int task_num; + float[][] testX; + int test_rows; + int features; + float[][] true_predicts; + Booster booster; + Random rng = new Random(); + int n_preds = 100; + + public InplacePredictionTask(int n, Booster booster, float[][] testX, int test_rows, int features, float[][] true_predicts) { + this.task_num = n; + this.booster = booster; + this.testX = testX; + this.test_rows = test_rows; + this.features = features; + this.true_predicts = true_predicts; + } + + @Override + public Boolean call() throws Exception { +// System.err.println("Task #" + task_num + " started."); + + // Perform n_preds number of single-vector predictions + for (int i=0; i params = new HashMap() { + { + put("eta", 1.0); + put("max_depth", 2); + put("silent", 1); + put("tree_method", "hist"); + } + }; + + Map watches = new HashMap() { + { + put("train", trainMat); + put("test", testMat); + } + }; + + Booster booster = XGBoost.train(trainMat, params, 10, watches, null, null); + + + // Prediction + + // standard prediction + float[][] predicts = booster.predict(testMat); + + // inplace prediction + float[][] inplace_predicts = booster.inplace_predict(testX, test_rows, features); + + // Confirm that the two prediction results are identical + TestCase.assertTrue(ArrayComparator.compare(predicts, inplace_predicts)); + + + // Multi-thread prediction + + // Reformat the test matrix as 2D array + float[][] testX2 = new float[test_rows][features]; + + int k=0; + for (int i=0; i> result = new ArrayList(n_tasks); + ExecutorService executorService = Executors.newFixedThreadPool(5); // Create pool of 5 threads + + // Submit all the tasks + for (int i=0; i p_m, char const *c_json_config, Learner *learner, - xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim, - const float **out_result) { - xgboost_CHECK_C_ARG_PTR(c_json_config); - auto config = Json::Load(StringView{c_json_config}); - CHECK_EQ(get(config["cache_id"]), 0) << "Cache ID is not supported yet"; - - HostDeviceVector *p_predt{nullptr}; - auto type = PredictionType(RequiredArg(config, "type", __func__)); - float missing = GetMissing(config); - learner->InplacePredict(p_m, type, missing, &p_predt, - RequiredArg(config, "iteration_begin", __func__), - RequiredArg(config, "iteration_end", __func__)); +template +void InplacePredictImplCore(std::shared_ptr x, std::shared_ptr p_m, + Learner *learner, + xgboost::PredictionType type, + float missing, + size_t n_rows, size_t n_cols, + size_t iteration_begin, size_t iteration_end, + bool strict_shape, + xgboost::bst_ulong const **out_shape, + xgboost::bst_ulong *out_dim, const float **out_result) { + HostDeviceVector* p_predt { nullptr }; + learner->InplacePredict(x, p_m, type, missing, &p_predt, iteration_begin, iteration_end); CHECK(p_predt); auto &shape = learner->GetThreadLocal().prediction_shape; - auto const &info = p_m->Info(); - auto n_samples = info.num_row_; - auto n_features = info.num_col_; - auto chunksize = n_samples == 0 ? 0 : p_predt->Size() / n_samples; - bool strict_shape = RequiredArg(config, "strict_shape", __func__); - - xgboost_CHECK_C_ARG_PTR(out_dim); - CalcPredictShape(strict_shape, type, n_samples, n_features, chunksize, learner->Groups(), + auto chunksize = n_rows == 0 ? 0 : p_predt->Size() / n_rows; + CalcPredictShape(strict_shape, type, n_rows, n_cols, chunksize, learner->Groups(), learner->BoostedRounds(), &shape, out_dim); - - xgboost_CHECK_C_ARG_PTR(out_result); - xgboost_CHECK_C_ARG_PTR(out_shape); - *out_result = dmlc::BeginPtr(p_predt->HostVector()); *out_shape = dmlc::BeginPtr(shape); +// printf("InplacePredictImplCore shape = %u, dim = %u\n", **out_shape, *out_dim); +} + +template +void InplacePredictImpl(std::shared_ptr x, std::shared_ptr p_m, + char const *c_json_config, Learner *learner, + size_t n_rows, size_t n_cols, + xgboost::bst_ulong const **out_shape, + xgboost::bst_ulong *out_dim, const float **out_result) { + auto config = Json::Load(StringView{c_json_config}); + CHECK_EQ(get(config["cache_id"]), 0) << "Cache ID is not supported yet"; + + auto type = PredictionType(get(config["type"])); + float missing = GetMissing(config); + int iteration_begin = get(config["iteration_begin"]); + int iteration_end = get(config["iteration_end"]); + bool strict_shape = get(config["strict_shape"]); + InplacePredictImplCore(x, p_m, learner, type, missing, n_rows, n_cols, + iteration_begin, iteration_end, strict_shape, out_shape, out_dim, out_result); +} + +XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, + const float *data, + size_t num_rows, + size_t num_features, + float missing, + int option_mask, + int ntree_limit, + const xgboost::bst_ulong **len, + const bst_float **out_result) { + API_BEGIN(); + CHECK_HANDLE(); + xgboost::bst_ulong out_dim; + std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; + auto *learner = static_cast(handle); + auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); + InplacePredictImplCore(x, nullptr, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, + 0, iteration_end, true, len, &out_dim, out_result); +// printf("XGBoosterInplacePredict len = %u, dim = %u\n", **len, out_dim); + API_END(); } XGB_DLL int XGBoosterPredictFromDense(BoosterHandle handle, char const *array_interface, From 09249377f08c78f99c646c2caa12983bdd4f98e8 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 14:29:29 -0600 Subject: [PATCH 047/126] python 3 --- jvm-packages/xgboost4j/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 31c966d8ddb6..ff58f67cc0e5 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -81,7 +81,7 @@ exec - python + python3 create_jni.py --log-capi-invocation From a980b9afd7bbfdeee606cccd83903d0e286ab23c Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 14:32:32 -0600 Subject: [PATCH 048/126] fix pom --- jvm-packages/xgboost4j-flink/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 25613407a189..c9f4e0461780 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -9,7 +9,7 @@ 1.7.1-sovrn xgboost4j-flink_2.12 - 1.7.1-sovrn/version> + 1.7.1-sovrn @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.1 + 1.7.1-sovrn org.apache.commons From 434bff4c24044d28d875f41820b69b28fb06da0c Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 14:35:37 -0600 Subject: [PATCH 049/126] indent --- .../xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index 7735896e772d..ae44feb847f4 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -315,7 +315,7 @@ private synchronized float[][] predict(DMatrix data, } return predicts; } -/** + /** * Perform thread-safe prediction. Calls * inplace_predict(data, num_rows, num_features, Float.NaN, false, 0, false, false). * From 3dcf1f2540300569075205881c9a6c02cc133fc9 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 14:45:40 -0600 Subject: [PATCH 050/126] remove template from function --- src/c_api/c_api.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 20bbfafc56ec..e43556cfa603 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -990,7 +990,6 @@ void InplacePredictImplCore(std::shared_ptr x, std::shared_ptr p_m, // printf("InplacePredictImplCore shape = %u, dim = %u\n", **out_shape, *out_dim); } -template void InplacePredictImpl(std::shared_ptr x, std::shared_ptr p_m, char const *c_json_config, Learner *learner, size_t n_rows, size_t n_cols, From 754158e39119a83812ac182eaf724aa9dd8c38ad Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 14:47:16 -0600 Subject: [PATCH 051/126] Revert "remove template from function" This reverts commit 3dcf1f2540300569075205881c9a6c02cc133fc9. --- src/c_api/c_api.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index e43556cfa603..20bbfafc56ec 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -990,6 +990,7 @@ void InplacePredictImplCore(std::shared_ptr x, std::shared_ptr p_m, // printf("InplacePredictImplCore shape = %u, dim = %u\n", **out_shape, *out_dim); } +template void InplacePredictImpl(std::shared_ptr x, std::shared_ptr p_m, char const *c_json_config, Learner *learner, size_t n_rows, size_t n_cols, From 4fea48cd5278c341a81f86b87af49bc641f714e7 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 14:58:21 -0600 Subject: [PATCH 052/126] Changes to inplace predict --- src/c_api/c_api.cc | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 20bbfafc56ec..f88013591566 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -990,22 +990,36 @@ void InplacePredictImplCore(std::shared_ptr x, std::shared_ptr p_m, // printf("InplacePredictImplCore shape = %u, dim = %u\n", **out_shape, *out_dim); } -template -void InplacePredictImpl(std::shared_ptr x, std::shared_ptr p_m, - char const *c_json_config, Learner *learner, - size_t n_rows, size_t n_cols, - xgboost::bst_ulong const **out_shape, - xgboost::bst_ulong *out_dim, const float **out_result) { +void InplacePredictImpl(std::shared_ptr p_m, char const *c_json_config, Learner *learner, + xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim, + const float **out_result) { + xgboost_CHECK_C_ARG_PTR(c_json_config); auto config = Json::Load(StringView{c_json_config}); CHECK_EQ(get(config["cache_id"]), 0) << "Cache ID is not supported yet"; - auto type = PredictionType(get(config["type"])); + HostDeviceVector *p_predt{nullptr}; + auto type = PredictionType(RequiredArg(config, "type", __func__)); float missing = GetMissing(config); - int iteration_begin = get(config["iteration_begin"]); - int iteration_end = get(config["iteration_end"]); - bool strict_shape = get(config["strict_shape"]); - InplacePredictImplCore(x, p_m, learner, type, missing, n_rows, n_cols, - iteration_begin, iteration_end, strict_shape, out_shape, out_dim, out_result); + learner->InplacePredict(p_m, type, missing, &p_predt, + RequiredArg(config, "iteration_begin", __func__), + RequiredArg(config, "iteration_end", __func__)); + CHECK(p_predt); + auto &shape = learner->GetThreadLocal().prediction_shape; + auto const &info = p_m->Info(); + auto n_samples = info.num_row_; + auto n_features = info.num_col_; + auto chunksize = n_samples == 0 ? 0 : p_predt->Size() / n_samples; + bool strict_shape = RequiredArg(config, "strict_shape", __func__); + + xgboost_CHECK_C_ARG_PTR(out_dim); + CalcPredictShape(strict_shape, type, n_samples, n_features, chunksize, learner->Groups(), + learner->BoostedRounds(), &shape, out_dim); + + xgboost_CHECK_C_ARG_PTR(out_result); + xgboost_CHECK_C_ARG_PTR(out_shape); + + *out_result = dmlc::BeginPtr(p_predt->HostVector()); + *out_shape = dmlc::BeginPtr(shape); } XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, From b5297da902d8045e9325ebd8b3eb513db4a97a43 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 15:05:13 -0600 Subject: [PATCH 053/126] changes to c method --- src/c_api/c_api.cc | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index f88013591566..18a588231367 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -997,29 +997,22 @@ void InplacePredictImpl(std::shared_ptr p_m, char const *c_json_config, auto config = Json::Load(StringView{c_json_config}); CHECK_EQ(get(config["cache_id"]), 0) << "Cache ID is not supported yet"; - HostDeviceVector *p_predt{nullptr}; auto type = PredictionType(RequiredArg(config, "type", __func__)); float missing = GetMissing(config); - learner->InplacePredict(p_m, type, missing, &p_predt, - RequiredArg(config, "iteration_begin", __func__), - RequiredArg(config, "iteration_end", __func__)); - CHECK(p_predt); - auto &shape = learner->GetThreadLocal().prediction_shape; auto const &info = p_m->Info(); auto n_samples = info.num_row_; auto n_features = info.num_col_; - auto chunksize = n_samples == 0 ? 0 : p_predt->Size() / n_samples; + int iteration_begin = get(config["iteration_begin"]); + int iteration_end = get(config["iteration_end"]); bool strict_shape = RequiredArg(config, "strict_shape", __func__); xgboost_CHECK_C_ARG_PTR(out_dim); - CalcPredictShape(strict_shape, type, n_samples, n_features, chunksize, learner->Groups(), - learner->BoostedRounds(), &shape, out_dim); xgboost_CHECK_C_ARG_PTR(out_result); xgboost_CHECK_C_ARG_PTR(out_shape); - *out_result = dmlc::BeginPtr(p_predt->HostVector()); - *out_shape = dmlc::BeginPtr(shape); + InplacePredictImplCore(x, p_m, learner, type, missing, n_rows, n_cols, + iteration_begin, iteration_end, strict_shape, out_shape, out_dim, out_result); } XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, From 3ffd646ea70ee802e2cc3bde34290b8987f072d2 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 15:29:38 -0600 Subject: [PATCH 054/126] remove x --- src/c_api/c_api.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 18a588231367..5f14a789ea86 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -969,7 +969,7 @@ XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, } template -void InplacePredictImplCore(std::shared_ptr x, std::shared_ptr p_m, +void InplacePredictImplCore(std::shared_ptr p_m, Learner *learner, xgboost::PredictionType type, float missing, @@ -979,7 +979,7 @@ void InplacePredictImplCore(std::shared_ptr x, std::shared_ptr p_m, xgboost::bst_ulong const **out_shape, xgboost::bst_ulong *out_dim, const float **out_result) { HostDeviceVector* p_predt { nullptr }; - learner->InplacePredict(x, p_m, type, missing, &p_predt, iteration_begin, iteration_end); + learner->InplacePredict(p_m, type, missing, &p_predt, iteration_begin, iteration_end); CHECK(p_predt); auto &shape = learner->GetThreadLocal().prediction_shape; auto chunksize = n_rows == 0 ? 0 : p_predt->Size() / n_rows; @@ -1011,7 +1011,7 @@ void InplacePredictImpl(std::shared_ptr p_m, char const *c_json_config, xgboost_CHECK_C_ARG_PTR(out_result); xgboost_CHECK_C_ARG_PTR(out_shape); - InplacePredictImplCore(x, p_m, learner, type, missing, n_rows, n_cols, + InplacePredictImplCore(p_m, learner, type, missing, n_rows, n_cols, iteration_begin, iteration_end, strict_shape, out_shape, out_dim, out_result); } @@ -1030,7 +1030,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); - InplacePredictImplCore(x, nullptr, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, + InplacePredictImplCore(nullptr, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, 0, iteration_end, true, len, &out_dim, out_result); // printf("XGBoosterInplacePredict len = %u, dim = %u\n", **len, out_dim); API_END(); From d7867d492eaa74f67ae554b9dacc12a06a0473e3 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 15:33:10 -0600 Subject: [PATCH 055/126] try renaming for rows and columns --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 5f14a789ea86..37be407bdbc2 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1011,7 +1011,7 @@ void InplacePredictImpl(std::shared_ptr p_m, char const *c_json_config, xgboost_CHECK_C_ARG_PTR(out_result); xgboost_CHECK_C_ARG_PTR(out_shape); - InplacePredictImplCore(p_m, learner, type, missing, n_rows, n_cols, + InplacePredictImplCore(p_m, learner, type, missing, n_samples, n_features, iteration_begin, iteration_end, strict_shape, out_shape, out_dim, out_result); } From 7278f7931b61fefb48f009fbbc5a6bc94d7ace1d Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 15:53:15 -0600 Subject: [PATCH 056/126] Remove template from coreImpl --- src/c_api/c_api.cc | 1 - 1 file changed, 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 37be407bdbc2..4386ec6f33a6 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -968,7 +968,6 @@ XGB_DLL int XGBoosterPredictFromDMatrix(BoosterHandle handle, API_END(); } -template void InplacePredictImplCore(std::shared_ptr p_m, Learner *learner, xgboost::PredictionType type, From c9facbd5d6658d10d347225dd2597454496cf951 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Tue, 21 Mar 2023 16:18:54 -0600 Subject: [PATCH 057/126] InplacePredict instead of a dupe --- include/xgboost/c_api.h | 29 +++++++++++------------------ 1 file changed, 11 insertions(+), 18 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 4a2f7defdd0c..91b030ee3152 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -993,27 +993,20 @@ XGB_DLL int XGBoosterPredict(BoosterHandle handle, * 1:output margin instead of transformed value * 2:output leaf index of trees instead of leaf value, note leaf index is unique per tree * 4:output feature contributions to individual predictions - * \param ntree_limit limit number of trees used for prediction, this is only valid for boosted trees - * when the parameter is set to 0, we will use all the trees - * \param training Whether the prediction function is used as part of a training loop. - * Prediction can be run in 2 scenarios: - * 1. Given data matrix X, obtain prediction y_pred from the model. - * 2. Obtain the prediction for computing gradients. For example, DART booster performs dropout - * during training, and the prediction result will be different from the one obtained by normal - * inference step due to dropped trees. - * Set training=false for the first scenario. Set training=true for the second scenario. - * The second scenario applies when you are defining a custom objective function. * \param out_len used to store length of returning result * \param out_result used to set a pointer to array * \return 0 when success, -1 when failure happens - */ -XGB_DLL int XGBoosterPredict(BoosterHandle handle, - DMatrixHandle dmat, - int option_mask, - unsigned ntree_limit, - int training, - bst_ulong *out_len, - const float **out_result); +*/ +//TODO: Fix documentation above +XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, + const float *data, + size_t num_rows, + size_t num_features, + float missing, + int option_mask, + int ntree_limit, + const bst_ulong **len, + const float **out_result); /*! * \brief Make prediction from DMatrix, replacing \ref XGBoosterPredict. From 70609e6e23c6a548a36142ff75c90441bd56de63 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 22 Mar 2023 15:11:48 -0600 Subject: [PATCH 058/126] Copy some lines from python-c api --- src/c_api/c_api.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 4386ec6f33a6..578c0d7bc0da 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1027,6 +1027,10 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, CHECK_HANDLE(); xgboost::bst_ulong out_dim; std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; + std::shared_ptr p_m{nullptr}; + p_m.reset(new data::DMatrixProxy); + auto proxy = dynamic_cast(p_m.get()); + CHECK(proxy) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(nullptr, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From 43112947bc07bef6ebf03afbe3bfab036a6ab12d Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 22 Mar 2023 15:19:22 -0600 Subject: [PATCH 059/126] remove nullptr --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 578c0d7bc0da..5a5e1dfb5908 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1033,7 +1033,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, CHECK(proxy) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); - InplacePredictImplCore(nullptr, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, + InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, 0, iteration_end, true, len, &out_dim, out_result); // printf("XGBoosterInplacePredict len = %u, dim = %u\n", **len, out_dim); API_END(); From 1568ebc84164bb3d34882c2ac51623cdc408de9a Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 22 Mar 2023 16:04:25 -0600 Subject: [PATCH 060/126] Try making a dMatrix in api --- src/c_api/c_api.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 5a5e1dfb5908..1ece500ce825 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1026,11 +1026,8 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, API_BEGIN(); CHECK_HANDLE(); xgboost::bst_ulong out_dim; - std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; - std::shared_ptr p_m{nullptr}; - p_m.reset(new data::DMatrixProxy); - auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy) << "Invalid input type for inplace predict."; + //std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; + std::shared_ptr p_m{new xgboost::data::DMatrix(data, num_rows, num_features)}; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From 85f6ce07bee835d6d11b5ba83d78d2460f767a64 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 22 Mar 2023 16:05:33 -0600 Subject: [PATCH 061/126] Revert "Try making a dMatrix in api" This reverts commit 1568ebc84164bb3d34882c2ac51623cdc408de9a. --- src/c_api/c_api.cc | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 1ece500ce825..5a5e1dfb5908 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1026,8 +1026,11 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, API_BEGIN(); CHECK_HANDLE(); xgboost::bst_ulong out_dim; - //std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; - std::shared_ptr p_m{new xgboost::data::DMatrix(data, num_rows, num_features)}; + std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; + std::shared_ptr p_m{nullptr}; + p_m.reset(new data::DMatrixProxy); + auto proxy = dynamic_cast(p_m.get()); + CHECK(proxy) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From fa71168c21899eb9502948532abfe79d05343310 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 22 Mar 2023 16:25:02 -0600 Subject: [PATCH 062/126] another dMatrix attempt --- src/c_api/c_api.cc | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 5a5e1dfb5908..93b7cb5220e1 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1027,10 +1027,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, CHECK_HANDLE(); xgboost::bst_ulong out_dim; std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; - std::shared_ptr p_m{nullptr}; - p_m.reset(new data::DMatrixProxy); - auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy) << "Invalid input type for inplace predict."; + std::shared_ptr p_m{xgboost::DMatrix::Create(data, num_rows, num_features)}; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From bd41c9b053245a4e14ee39afefedebbdc449a917 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 22 Mar 2023 16:27:37 -0600 Subject: [PATCH 063/126] Revert "another dMatrix attempt" This reverts commit fa71168c21899eb9502948532abfe79d05343310. --- src/c_api/c_api.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 93b7cb5220e1..5a5e1dfb5908 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1027,7 +1027,10 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, CHECK_HANDLE(); xgboost::bst_ulong out_dim; std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; - std::shared_ptr p_m{xgboost::DMatrix::Create(data, num_rows, num_features)}; + std::shared_ptr p_m{nullptr}; + p_m.reset(new data::DMatrixProxy); + auto proxy = dynamic_cast(p_m.get()); + CHECK(proxy) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From c869a1478ca8ef2b93544f1a77298b9aac80684c Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 13:14:51 -0600 Subject: [PATCH 064/126] First try at getting dMatrix logic into changes --- include/xgboost/c_api.h | 1 + .../java/ml/dmlc/xgboost4j/java/Booster.java | 19 +++++++++++++------ .../ml/dmlc/xgboost4j/java/XGBoostJNI.java | 4 ++-- .../xgboost4j/src/native/xgboost4j.cpp | 5 +++-- jvm-packages/xgboost4j/src/native/xgboost4j.h | 2 +- .../dmlc/xgboost4j/java/BoosterImplTest.java | 2 +- src/c_api/c_api.cc | 8 ++++++-- 7 files changed, 27 insertions(+), 14 deletions(-) diff --git a/include/xgboost/c_api.h b/include/xgboost/c_api.h index 91b030ee3152..0d91c56df5c5 100644 --- a/include/xgboost/c_api.h +++ b/include/xgboost/c_api.h @@ -1002,6 +1002,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, const float *data, size_t num_rows, size_t num_features, + DMatrixHandle d_matrix_handle, float missing, int option_mask, int ntree_limit, diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index ae44feb847f4..bf2856675aa6 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -315,6 +315,7 @@ private synchronized float[][] predict(DMatrix data, } return predicts; } + // TODO - ANYTHING /** * Perform thread-safe prediction. Calls * inplace_predict(data, num_rows, num_features, Float.NaN, false, 0, false, false). @@ -331,8 +332,9 @@ private synchronized float[][] predict(DMatrix data, */ public float[][] inplace_predict(float[] data, int num_rows, - int num_features) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, Float.NaN, false, 0, false, false); + int num_features, + DMatrix d_matrix) throws XGBoostError { + return this.inplace_predict(data, num_rows, num_features, d_matrix.getHandle(), Float.NaN, false, 0, false, false); } /** @@ -353,8 +355,9 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, + long d_matrix_handle, float missing) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, missing, false, 0, false, false); + return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, false, 0, false, false); } /** @@ -378,9 +381,10 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, + long d_matrix_handle, float missing, boolean outputMargin) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, missing, outputMargin, + return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, outputMargin, 0, false, false); } @@ -405,10 +409,11 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, + long d_matrix_handle, float missing, boolean outputMargin, int treeLimit) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, missing, outputMargin, + return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, outputMargin, treeLimit, false, false); } @@ -418,6 +423,7 @@ public float[][] inplace_predict(float[] data, * @param data Flattened input matrix of features for prediction * @param num_rows The number of preditions to make (count of input matrix rows) * @param num_features The number of features in the model (count of input matrix columns) + * @param d_matrix_h The handle for a dmatrix * @param missing Value indicating missing element in the data input matrix * @param outputMargin Whether to only predict margin value instead of transformed prediction * @param treeLimit limit number of trees, 0 means all trees. @@ -429,6 +435,7 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, + long d_matrix_handle, float missing, boolean outputMargin, int treeLimit, @@ -446,7 +453,7 @@ public float[][] inplace_predict(float[] data, } float[][] rawPredicts = new float[1][]; - XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, + XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, d_matrix_handle missing, optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? // System.out.println("Booster.inplace_predict rawPredicts[0].length = " + diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java index f73112dad216..49921983d461 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/XGBoostJNI.java @@ -113,8 +113,8 @@ public final static native int XGBoosterEvalOneIter(long handle, int iter, long[ public final static native int XGBoosterPredict(long handle, long dmat, int option_mask, int ntree_limit, float[][] predicts); - - public final static native int XGBoosterInplacePredict(long handle, float[] data, int num_rows, int num_features, + // TODO - anything + public final static native int XGBoosterInplacePredict(long handle, float[] data, int num_rows, int num_features, long d_matrix_handle, float missing, int option_mask, int ntree_limit, float[][] predicts); diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp index f2e6559d50a7..ae11d8864827 100644 --- a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp +++ b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp @@ -608,19 +608,20 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredict return ret; } +// TODO - ANYTHING /* * Class: ml_dmlc_xgboost4j_java_XGBoostJNI * Method: XGBoosterInplacePredict * Signature: (J[FIII[[F)I */ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterInplacePredict - (JNIEnv *jenv, jclass jcls, jlong jhandle, jfloatArray jdata, jint num_rows, jint num_features, + (JNIEnv *jenv, jclass jcls, jlong jhandle, jfloatArray jdata, jint num_rows, jint num_features, jlong d_matrix_handle, jfloat missing, jint option_mask, jint treeLimit, jobjectArray jout) { BoosterHandle handle = (BoosterHandle) jhandle; jfloat* data = jenv->GetFloatArrayElements(jdata, 0); const bst_ulong *len; float *result; - int ret = XGBoosterInplacePredict(handle, data, num_rows, num_features, missing, option_mask, treeLimit, + int ret = XGBoosterInplacePredict(handle, data, num_rows, num_features, d_matrix_handle, missing, option_mask, treeLimit, &len, (const float **) &result); JVM_CHECK_CALL(ret); jenv->ReleaseFloatArrayElements(jdata, data, 0); diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j.h b/jvm-packages/xgboost4j/src/native/xgboost4j.h index 20bc51a88357..dc8c9f5ee56b 100644 --- a/jvm-packages/xgboost4j/src/native/xgboost4j.h +++ b/jvm-packages/xgboost4j/src/native/xgboost4j.h @@ -189,7 +189,7 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterPredict * Signature: (J[FIII[[F)I */ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterInplacePredict - (JNIEnv *, jclass, jlong, jfloatArray, jint, jint, jfloat, jint, jint, jobjectArray); + (JNIEnv *, jclass, jlong, jfloatArray, jint, jint, jlong, jfloat, jint, jint, jobjectArray); /* * Class: ml_dmlc_xgboost4j_java_XGBoostJNI * Method: XGBoosterLoadModel diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index 4041db945a70..cbc26313358a 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -331,7 +331,7 @@ public void testBoosterInplacePredict() throws XGBoostError, IOException { float[][] predicts = booster.predict(testMat); // inplace prediction - float[][] inplace_predicts = booster.inplace_predict(testX, test_rows, features); + float[][] inplace_predicts = booster.inplace_predict(testX, test_rows, features, testMat); // Confirm that the two prediction results are identical TestCase.assertTrue(ArrayComparator.compare(predicts, inplace_predicts)); diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 5a5e1dfb5908..31ae4f3342cf 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1018,6 +1018,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, const float *data, size_t num_rows, size_t num_features, + DMatrixHandle dMatrixHandle, float missing, int option_mask, int ntree_limit, @@ -1026,9 +1027,12 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, API_BEGIN(); CHECK_HANDLE(); xgboost::bst_ulong out_dim; - std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; std::shared_ptr p_m{nullptr}; - p_m.reset(new data::DMatrixProxy); + if (!dMatrixHandle) { + p_m.reset(new data::DMatrixProxy); + } else { + p_m = *static_cast *>(dMatrixHandle); + } auto proxy = dynamic_cast(p_m.get()); CHECK(proxy) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); From 163bfcace3c9052cf6feabcacc6adad4ec235bc7 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 13:18:57 -0600 Subject: [PATCH 065/126] missing comma --- .../xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index bf2856675aa6..dbe8a3810353 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -453,7 +453,7 @@ public float[][] inplace_predict(float[] data, } float[][] rawPredicts = new float[1][]; - XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, d_matrix_handle + XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, d_matrix_handle, missing, optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? // System.out.println("Booster.inplace_predict rawPredicts[0].length = " + From 14f2243e0cd23bd6e67f0c2701ba55c7903f46c1 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 13:23:35 -0600 Subject: [PATCH 066/126] Checkstyle failure --- .../java/ml/dmlc/xgboost4j/java/Booster.java | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index dbe8a3810353..763f2bb394cf 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -334,7 +334,8 @@ public float[][] inplace_predict(float[] data, int num_rows, int num_features, DMatrix d_matrix) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix.getHandle(), Float.NaN, false, 0, false, false); + return this.inplace_predict(data, num_rows, num_features, d_matrix.getHandle(), + Float.NaN, false, 0, false, false); } /** @@ -357,7 +358,8 @@ public float[][] inplace_predict(float[] data, int num_features, long d_matrix_handle, float missing) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, false, 0, false, false); + return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, + missing, false, 0, false, false); } /** @@ -384,8 +386,8 @@ public float[][] inplace_predict(float[] data, long d_matrix_handle, float missing, boolean outputMargin) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, outputMargin, - 0, false, false); + return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, + outputMargin, 0, false, false); } /** @@ -413,8 +415,8 @@ public float[][] inplace_predict(float[] data, float missing, boolean outputMargin, int treeLimit) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, outputMargin, - treeLimit, false, false); + return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, + outputMargin, treeLimit, false, false); } /** @@ -453,8 +455,8 @@ public float[][] inplace_predict(float[] data, } float[][] rawPredicts = new float[1][]; - XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, d_matrix_handle, - missing, optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? + XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, + d_matrix_handle, missing, optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? // System.out.println("Booster.inplace_predict rawPredicts[0].length = " + // rawPredicts[0].length); From 66ca8c34212e8f5c7c258c4bc491af83e2576125 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 13:27:33 -0600 Subject: [PATCH 067/126] c h e c k s t y l e --- .../src/main/java/ml/dmlc/xgboost4j/java/Booster.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index 763f2bb394cf..dc05f618b431 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -456,7 +456,8 @@ public float[][] inplace_predict(float[] data, float[][] rawPredicts = new float[1][]; XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, - d_matrix_handle, missing, optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? + d_matrix_handle, missing, + optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? // System.out.println("Booster.inplace_predict rawPredicts[0].length = " + // rawPredicts[0].length); From e9952286068cfb1eeed77620b2c6a50f2b7b20f8 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 13:34:42 -0600 Subject: [PATCH 068/126] Convert long to handle --- jvm-packages/xgboost4j/src/native/xgboost4j.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp index ae11d8864827..79932678cf5a 100644 --- a/jvm-packages/xgboost4j/src/native/xgboost4j.cpp +++ b/jvm-packages/xgboost4j/src/native/xgboost4j.cpp @@ -618,10 +618,11 @@ JNIEXPORT jint JNICALL Java_ml_dmlc_xgboost4j_java_XGBoostJNI_XGBoosterInplacePr (JNIEnv *jenv, jclass jcls, jlong jhandle, jfloatArray jdata, jint num_rows, jint num_features, jlong d_matrix_handle, jfloat missing, jint option_mask, jint treeLimit, jobjectArray jout) { BoosterHandle handle = (BoosterHandle) jhandle; + DMatrixHandle dmat = (DMatrixHandle) d_matrix_handle; jfloat* data = jenv->GetFloatArrayElements(jdata, 0); const bst_ulong *len; float *result; - int ret = XGBoosterInplacePredict(handle, data, num_rows, num_features, d_matrix_handle, missing, option_mask, treeLimit, + int ret = XGBoosterInplacePredict(handle, data, num_rows, num_features, dmat, missing, option_mask, treeLimit, &len, (const float **) &result); JVM_CHECK_CALL(ret); jenv->ReleaseFloatArrayElements(jdata, data, 0); From b3796b74b64c70862b1f4f64e3e902ce70e28e18 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 13:40:37 -0600 Subject: [PATCH 069/126] a dMatrix for testing --- .../test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index cbc26313358a..bd9d26b160b6 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -95,16 +95,18 @@ class InplacePredictThread extends Thread { float[][] testX; int test_rows; int features; + DMatrix dMatrix; float[][] true_predicts; Booster booster; Random rng = new Random(); int n_preds = 100; - public InplacePredictThread(int n, Booster booster, float[][] testX, int test_rows, int features, float[][] true_predicts) { + public InplacePredictThread(int n, Booster booster, float[][] testX, int test_rows, int features, DMatrix dMatrix, float[][] true_predicts) { this.thread_num = n; this.booster = booster; this.testX = testX; this.test_rows = test_rows; + this.dMatrix = dMatrix; this.features = features; this.true_predicts = true_predicts; } @@ -120,7 +122,7 @@ public void run() { int r = this.rng.nextInt(this.test_rows); // In-place predict a single random row - float[][] predictions = booster.inplace_predict(this.testX[r], 1, this.features); + float[][] predictions = booster.inplace_predict(this.testX[r], 1, this.features, this.dMatrix); // Confirm results as expected if (predictions[0][0] != this.true_predicts[r][0]) { From 737b293bcd84929db1d12fab937d236e40028614 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 13:46:43 -0600 Subject: [PATCH 070/126] some more changes to test --- .../test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index bd9d26b160b6..de2a668febfc 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -151,12 +151,13 @@ class InplacePredictionTask implements Callable { Random rng = new Random(); int n_preds = 100; - public InplacePredictionTask(int n, Booster booster, float[][] testX, int test_rows, int features, float[][] true_predicts) { + public InplacePredictionTask(int n, Booster booster, float[][] testX, int test_rows, int features, DMatrix dMatrix, float[][] true_predicts) { this.task_num = n; this.booster = booster; this.testX = testX; this.test_rows = test_rows; this.features = features; + this.dMatrix = dMatrix; this.true_predicts = true_predicts; } @@ -170,7 +171,7 @@ public Boolean call() throws Exception { int r = this.rng.nextInt(this.test_rows); // In-place predict a single random row - float[][] predictions = booster.inplace_predict(this.testX[r], 1, this.features); + float[][] predictions = booster.inplace_predict(this.testX[r], 1, this.features, this.dMatrix); // Confirm results as expected if (predictions[0][0] != this.true_predicts[r][0]) { From 66f0d8c756f8086963de2b1479a2c119365bef6b Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 13:54:15 -0600 Subject: [PATCH 071/126] Changes to tests --- .../src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index de2a668febfc..0a15f93dde2c 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -146,6 +146,7 @@ class InplacePredictionTask implements Callable { float[][] testX; int test_rows; int features; + DMatrix dMatrix; float[][] true_predicts; Booster booster; Random rng = new Random(); @@ -359,7 +360,7 @@ public void testBoosterInplacePredict() throws XGBoostError, IOException { // Submit all the tasks for (int i=0; i Date: Thu, 23 Mar 2023 14:07:32 -0600 Subject: [PATCH 072/126] comment out for testing --- src/c_api/c_api.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 31ae4f3342cf..e6103370d5ba 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1028,11 +1028,11 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, CHECK_HANDLE(); xgboost::bst_ulong out_dim; std::shared_ptr p_m{nullptr}; - if (!dMatrixHandle) { + //if (!dMatrixHandle) { p_m.reset(new data::DMatrixProxy); - } else { + /*} else { p_m = *static_cast *>(dMatrixHandle); - } + }*/ auto proxy = dynamic_cast(p_m.get()); CHECK(proxy) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); From a367ca20be626f82fd728c1374b3b05910424573 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 14:14:57 -0600 Subject: [PATCH 073/126] uncomment for test, pass dMatrix through Booster.java --- .../java/ml/dmlc/xgboost4j/java/Booster.java | 18 +++++++++--------- src/c_api/c_api.cc | 6 +++--- 2 files changed, 12 insertions(+), 12 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index dc05f618b431..cebe288820f7 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -334,7 +334,7 @@ public float[][] inplace_predict(float[] data, int num_rows, int num_features, DMatrix d_matrix) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix.getHandle(), + return this.inplace_predict(data, num_rows, num_features, d_matrix, Float.NaN, false, 0, false, false); } @@ -356,9 +356,9 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, - long d_matrix_handle, + DMatrix d_matrix, float missing) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, + return this.inplace_predict(data, num_rows, num_features, d_matrix, missing, false, 0, false, false); } @@ -383,10 +383,10 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, - long d_matrix_handle, + DMatrix d_matrix, float missing, boolean outputMargin) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, + return this.inplace_predict(data, num_rows, num_features, d_matrix, missing, outputMargin, 0, false, false); } @@ -411,11 +411,11 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, - long d_matrix_handle, + DMatrix d_matrix, float missing, boolean outputMargin, int treeLimit) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix_handle, missing, + return this.inplace_predict(data, num_rows, num_features, d_matrix, missing, outputMargin, treeLimit, false, false); } @@ -437,7 +437,7 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, - long d_matrix_handle, + DMatrix d_matrix, float missing, boolean outputMargin, int treeLimit, @@ -456,7 +456,7 @@ public float[][] inplace_predict(float[] data, float[][] rawPredicts = new float[1][]; XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, - d_matrix_handle, missing, + d_matrix.getHandle(), missing, optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? // System.out.println("Booster.inplace_predict rawPredicts[0].length = " + diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index e6103370d5ba..31ae4f3342cf 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1028,11 +1028,11 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, CHECK_HANDLE(); xgboost::bst_ulong out_dim; std::shared_ptr p_m{nullptr}; - //if (!dMatrixHandle) { + if (!dMatrixHandle) { p_m.reset(new data::DMatrixProxy); - /*} else { + } else { p_m = *static_cast *>(dMatrixHandle); - }*/ + } auto proxy = dynamic_cast(p_m.get()); CHECK(proxy) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); From 74c4376e6e22d8ff638e05e0ccd423118f693ac3 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 14:59:54 -0600 Subject: [PATCH 074/126] maybe log something --- src/c_api/c_api.cc | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 31ae4f3342cf..4556a5a92bb6 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1027,12 +1027,14 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, API_BEGIN(); CHECK_HANDLE(); xgboost::bst_ulong out_dim; + //std::shared_ptr p_m(dMatrixHandle); std::shared_ptr p_m{nullptr}; if (!dMatrixHandle) { p_m.reset(new data::DMatrixProxy); } else { p_m = *static_cast *>(dMatrixHandle); } + std::cout << p_m; auto proxy = dynamic_cast(p_m.get()); CHECK(proxy) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); From eb4cd9a79b9ed0c14e1cf0a995658d0363122e10 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 15:06:16 -0600 Subject: [PATCH 075/126] See if we can log what the type for proxy is --- src/c_api/c_api.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 4556a5a92bb6..3b6010ccd0cc 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1034,9 +1034,8 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, } else { p_m = *static_cast *>(dMatrixHandle); } - std::cout << p_m; auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy) << "Invalid input type for inplace predict."; + CHECK(proxy) << "Invalid input type for inplace predict. :" << typeid(proxy).name(); auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From abd64ffe2be139a37ffcf6c71e0dfed7535926d1 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 15:10:08 -0600 Subject: [PATCH 076/126] dereference for logging --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 3b6010ccd0cc..2e28a1e6b678 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1035,7 +1035,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, p_m = *static_cast *>(dMatrixHandle); } auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy) << "Invalid input type for inplace predict. :" << typeid(proxy).name(); + CHECK(proxy) << "Invalid input type for inplace predict. :" << typeid(*proxy).name(); auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From 7dfa3c08997766bd96626b522fe2d45df7fc1198 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 15:25:17 -0600 Subject: [PATCH 077/126] No more check, do it live --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 2e28a1e6b678..5efd7c70c51e 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1035,7 +1035,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, p_m = *static_cast *>(dMatrixHandle); } auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy) << "Invalid input type for inplace predict. :" << typeid(*proxy).name(); + // CHECK(proxy) << "Invalid input type for inplace predict. :" << typeid(proxy).name(); auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From ad5972d4b1ed2804d63bbfb43fc1fe3e64e54383 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Thu, 23 Mar 2023 15:26:25 -0600 Subject: [PATCH 078/126] time to break the cpu predictor --- src/predictor/cpu_predictor.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 444d1b089d21..10078c51870e 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -397,7 +397,7 @@ class CPUPredictor : public Predictor { PredictionCacheEntry *out_preds, uint32_t tree_begin, unsigned tree_end) const override { auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy)<< "Inplace predict accepts only DMatrixProxy as input."; + // CHECK(proxy)<< "Inplace predict accepts only DMatrixProxy as input."; auto x = proxy->Adapter(); if (x.type() == typeid(std::shared_ptr)) { this->DispatchedInplacePredict( From 489c337c05b1c44c0a6ef75598d00e522324fbb3 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 09:16:54 -0600 Subject: [PATCH 079/126] Put checks back in, change check in c_api --- src/c_api/c_api.cc | 2 +- src/predictor/cpu_predictor.cc | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 5efd7c70c51e..9e62333dedd0 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1035,7 +1035,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, p_m = *static_cast *>(dMatrixHandle); } auto proxy = dynamic_cast(p_m.get()); - // CHECK(proxy) << "Invalid input type for inplace predict. :" << typeid(proxy).name(); + CHECK(proxy!=NULL) << "Invalid input type for inplace predict." auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 10078c51870e..444d1b089d21 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -397,7 +397,7 @@ class CPUPredictor : public Predictor { PredictionCacheEntry *out_preds, uint32_t tree_begin, unsigned tree_end) const override { auto proxy = dynamic_cast(p_m.get()); - // CHECK(proxy)<< "Inplace predict accepts only DMatrixProxy as input."; + CHECK(proxy)<< "Inplace predict accepts only DMatrixProxy as input."; auto x = proxy->Adapter(); if (x.type() == typeid(std::shared_ptr)) { this->DispatchedInplacePredict( From 710dae5a3cbc8536ee61d0c94229fa6549713dc8 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 09:27:25 -0600 Subject: [PATCH 080/126] missing semicolon --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 9e62333dedd0..afaa0a86c401 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1035,7 +1035,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, p_m = *static_cast *>(dMatrixHandle); } auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy!=NULL) << "Invalid input type for inplace predict." + CHECK(proxy!=NULL) << "Invalid input type for inplace predict."; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From 3e376885df917dd2d30b4ed1934709c232e9a646 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 10:09:50 -0600 Subject: [PATCH 081/126] change error logging --- src/c_api/c_api.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index afaa0a86c401..4eb92fe64b85 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1035,7 +1035,10 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, p_m = *static_cast *>(dMatrixHandle); } auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy!=NULL) << "Invalid input type for inplace predict."; + if (!proxy) { + fprintf (stderr, "proxy is null"); + exit(1); + } auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, From 688fc737a46c9f79890596d47c0ee9d2a542fc9b Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 10:17:33 -0600 Subject: [PATCH 082/126] More null checks --- src/c_api/c_api.cc | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 4eb92fe64b85..b681c987f30a 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1031,13 +1031,18 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, std::shared_ptr p_m{nullptr}; if (!dMatrixHandle) { p_m.reset(new data::DMatrixProxy); + if (!p_m) { + fprintf (stderr, "p_m 1 is null"); + } } else { p_m = *static_cast *>(dMatrixHandle); + if (!p_m) { + fprintf (stderr, "p_m 2 is null"); + } } auto proxy = dynamic_cast(p_m.get()); if (!proxy) { fprintf (stderr, "proxy is null"); - exit(1); } auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); From 2e6df6f0e76dc9fc58c90a068ad23d8a1a90c55d Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 10:19:26 -0600 Subject: [PATCH 083/126] exit on entering ifs --- src/c_api/c_api.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index b681c987f30a..923ab2cd26ae 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1033,16 +1033,19 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, p_m.reset(new data::DMatrixProxy); if (!p_m) { fprintf (stderr, "p_m 1 is null"); + exit(1); } } else { p_m = *static_cast *>(dMatrixHandle); if (!p_m) { fprintf (stderr, "p_m 2 is null"); + exit(1); } } auto proxy = dynamic_cast(p_m.get()); if (!proxy) { fprintf (stderr, "proxy is null"); + exit(1); } auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); From 2f81ff3857de990b96240a011765751213f14a7a Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 10:22:43 -0600 Subject: [PATCH 084/126] dMatrix Handle null check and more statments --- src/c_api/c_api.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 923ab2cd26ae..1d1dd7f5c2e5 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1029,14 +1029,20 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, xgboost::bst_ulong out_dim; //std::shared_ptr p_m(dMatrixHandle); std::shared_ptr p_m{nullptr}; + if (!dMatrixHandle) { + fprintf (stderr, "dMatrixHandle is null"); + exit(1); + } if (!dMatrixHandle) { p_m.reset(new data::DMatrixProxy); + fprintf (stdout, "dmatrix handle is null"); if (!p_m) { fprintf (stderr, "p_m 1 is null"); exit(1); } } else { p_m = *static_cast *>(dMatrixHandle); + fprintf (stdout, "dmatrix handle is not null"); if (!p_m) { fprintf (stderr, "p_m 2 is null"); exit(1); From c92d2dfc1201e65815695bb90403e45d3e24d657 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 10:27:01 -0600 Subject: [PATCH 085/126] try only one path --- src/c_api/c_api.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 1d1dd7f5c2e5..6da2f826cdf9 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1041,7 +1041,8 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, exit(1); } } else { - p_m = *static_cast *>(dMatrixHandle); + //p_m = *static_cast *>(dMatrixHandle); + p_m.reset(new data::DMatrixProxy); fprintf (stdout, "dmatrix handle is not null"); if (!p_m) { fprintf (stderr, "p_m 2 is null"); From 2aa500b85173256f0da1624030f2ac125dbc025a Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 11:07:27 -0600 Subject: [PATCH 086/126] Try a different way of creating the proxy --- src/c_api/c_api.cc | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 6da2f826cdf9..eeb7369c405b 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -322,7 +322,7 @@ XGB_DLL int XGQuantileDMatrixCreateFromCallback(DataIterHandle iter, DMatrixHand XGB_DLL int XGProxyDMatrixCreate(DMatrixHandle* out) { API_BEGIN(); xgboost_CHECK_C_ARG_PTR(out); - *out = new std::shared_ptr(new xgboost::data::DMatrixProxy);; + *out = new std::shared_ptr(new xgboost::data::DMatrixProxy); API_END(); } @@ -1041,15 +1041,15 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, exit(1); } } else { - //p_m = *static_cast *>(dMatrixHandle); - p_m.reset(new data::DMatrixProxy); + p_m = *static_cast *>(dMatrixHandle); + fprintf (stdout, "dmatrix handle is not null"); if (!p_m) { fprintf (stderr, "p_m 2 is null"); exit(1); } } - auto proxy = dynamic_cast(p_m.get()); + auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); if (!proxy) { fprintf (stderr, "proxy is null"); exit(1); From 95f2386c0886d36b1a0b54457e327a0bedaede69 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Fri, 24 Mar 2023 11:12:41 -0600 Subject: [PATCH 087/126] Logging in cpu predictor --- src/predictor/cpu_predictor.cc | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 444d1b089d21..62ec0b18cce3 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -397,7 +397,10 @@ class CPUPredictor : public Predictor { PredictionCacheEntry *out_preds, uint32_t tree_begin, unsigned tree_end) const override { auto proxy = dynamic_cast(p_m.get()); - CHECK(proxy)<< "Inplace predict accepts only DMatrixProxy as input."; + if (!proxy) { + fprintf (stderr, "InplacePredict: proxy is null cpu variant"); + exit(1); + } auto x = proxy->Adapter(); if (x.type() == typeid(std::shared_ptr)) { this->DispatchedInplacePredict( From ba50e6eb627994011bc9f849e7d4b930910d426e Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Sun, 26 Mar 2023 00:10:03 +0800 Subject: [PATCH 088/126] [backport] [CI] Require C++17 + CMake 3.18; Use CUDA 11.8 in CI (#8853) (#8971) Co-authored-by: Philip Hyunsu Cho --- CMakeLists.txt | 5 +- R-package/CMakeLists.txt | 2 +- cmake/Utils.cmake | 30 ++---- demo/c-api/CMakeLists.txt | 2 +- demo/c-api/external-memory/CMakeLists.txt | 2 +- demo/c-api/inference/CMakeLists.txt | 2 +- doc/tutorials/c_api_tutorial.rst | 2 +- plugin/CMakeLists.txt | 2 +- rabit/CMakeLists.txt | 2 +- tests/buildkite/build-containers.sh | 5 + tests/buildkite/build-cuda-with-rmm.sh | 3 +- tests/buildkite/build-cuda.sh | 3 +- tests/buildkite/build-jvm-packages-gpu.sh | 4 +- tests/buildkite/build-win64-gpu.ps1 | 6 +- tests/buildkite/conftest.sh | 7 +- tests/buildkite/deploy-jvm-packages.sh | 1 + .../aws-stack-creator/create_stack.py | 87 +++-------------- .../aws-stack-creator/metadata.py | 14 +-- .../infrastructure/common_blocks/utils.py | 97 +++++++++++++++++++ .../create_worker_image_pipelines.py | 33 +++---- .../ec2-image-builder-pipeline-template.yml | 10 +- .../worker-image-pipeline/metadata.py | 2 +- .../windows-gpu-bootstrap.yml | 20 ++-- tests/buildkite/pipeline.yml | 10 +- tests/buildkite/test-cpp-gpu.sh | 1 + tests/ci_build/Dockerfile.clang_tidy | 4 +- tests/ci_build/Dockerfile.cpu | 4 +- tests/ci_build/Dockerfile.gpu | 4 +- tests/ci_build/Dockerfile.gpu_build_centos7 | 3 +- tests/ci_build/Dockerfile.gpu_build_r_centos7 | 4 +- tests/ci_build/Dockerfile.jvm | 4 +- tests/ci_build/Dockerfile.jvm_gpu_build | 7 +- tests/ci_build/Dockerfile.rmm | 7 +- tests/ci_build/build_r_pkg_with_cuda_win64.sh | 2 +- tests/python-gpu/test_gpu_prediction.py | 1 + 35 files changed, 215 insertions(+), 177 deletions(-) create mode 100644 tests/buildkite/infrastructure/common_blocks/utils.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 8a8c2e788354..af12a49484e5 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18 FATAL_ERROR) project(xgboost LANGUAGES CXX C VERSION 1.7.4) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") @@ -168,9 +168,6 @@ find_package(Threads REQUIRED) if (USE_OPENMP) if (APPLE) - # Require CMake 3.16+ on Mac OSX, as previous versions of CMake had trouble locating - # OpenMP on Mac. See https://github.com/dmlc/xgboost/pull/5146#issuecomment-568312706 - cmake_minimum_required(VERSION 3.16) find_package(OpenMP) if (NOT OpenMP_FOUND) # Try again with extra path info; required for libomp 15+ from Homebrew diff --git a/R-package/CMakeLists.txt b/R-package/CMakeLists.txt index bf72bebde114..2c428cf32af6 100644 --- a/R-package/CMakeLists.txt +++ b/R-package/CMakeLists.txt @@ -31,7 +31,7 @@ if (USE_OPENMP) endif (USE_OPENMP) set_target_properties( xgboost-r PROPERTIES - CXX_STANDARD 14 + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON) diff --git a/cmake/Utils.cmake b/cmake/Utils.cmake index f28c1f2703e6..3a66735fe56f 100644 --- a/cmake/Utils.cmake +++ b/cmake/Utils.cmake @@ -178,17 +178,10 @@ function(xgboost_set_cuda_flags target) $<$:-Xcompiler=/utf-8>) endif (MSVC) - if (PLUGIN_RMM) - set_target_properties(${target} PROPERTIES - CUDA_STANDARD 17 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION OFF) - else () - set_target_properties(${target} PROPERTIES - CUDA_STANDARD 14 - CUDA_STANDARD_REQUIRED ON - CUDA_SEPARABLE_COMPILATION OFF) - endif (PLUGIN_RMM) + set_target_properties(${target} PROPERTIES + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + CUDA_SEPARABLE_COMPILATION OFF) endfunction(xgboost_set_cuda_flags) macro(xgboost_link_nccl target) @@ -205,17 +198,10 @@ endmacro(xgboost_link_nccl) # compile options macro(xgboost_target_properties target) - if (PLUGIN_RMM) - set_target_properties(${target} PROPERTIES - CXX_STANDARD 17 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON) - else () - set_target_properties(${target} PROPERTIES - CXX_STANDARD 14 - CXX_STANDARD_REQUIRED ON - POSITION_INDEPENDENT_CODE ON) - endif (PLUGIN_RMM) + set_target_properties(${target} PROPERTIES + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + POSITION_INDEPENDENT_CODE ON) if (HIDE_CXX_SYMBOLS) #-- Hide all C++ symbols diff --git a/demo/c-api/CMakeLists.txt b/demo/c-api/CMakeLists.txt index 25764c12ae46..9764267aa7c9 100644 --- a/demo/c-api/CMakeLists.txt +++ b/demo/c-api/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(xgboost-c-examples) add_subdirectory(basic) diff --git a/demo/c-api/external-memory/CMakeLists.txt b/demo/c-api/external-memory/CMakeLists.txt index 0c21acb3ced8..5e68e9918013 100644 --- a/demo/c-api/external-memory/CMakeLists.txt +++ b/demo/c-api/external-memory/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(external-memory-demo LANGUAGES C VERSION 0.0.1) find_package(xgboost REQUIRED) diff --git a/demo/c-api/inference/CMakeLists.txt b/demo/c-api/inference/CMakeLists.txt index 4d0f3cd6e52a..6aa8f1dd2dd5 100644 --- a/demo/c-api/inference/CMakeLists.txt +++ b/demo/c-api/inference/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.13) +cmake_minimum_required(VERSION 3.18) project(inference-demo LANGUAGES C VERSION 0.0.1) find_package(xgboost REQUIRED) diff --git a/doc/tutorials/c_api_tutorial.rst b/doc/tutorials/c_api_tutorial.rst index 5d4cb68cf7da..ad881bb5c68a 100644 --- a/doc/tutorials/c_api_tutorial.rst +++ b/doc/tutorials/c_api_tutorial.rst @@ -45,7 +45,7 @@ Use ``find_package()`` and ``target_link_libraries()`` in your application's CMa .. code-block:: cmake - cmake_minimum_required(VERSION 3.13) + cmake_minimum_required(VERSION 3.18) project(your_project_name LANGUAGES C CXX VERSION your_project_version) find_package(xgboost REQUIRED) add_executable(your_project_name /path/to/project_file.c) diff --git a/plugin/CMakeLists.txt b/plugin/CMakeLists.txt index 485f1cc3c33b..7026238e30cf 100644 --- a/plugin/CMakeLists.txt +++ b/plugin/CMakeLists.txt @@ -15,7 +15,7 @@ if (PLUGIN_UPDATER_ONEAPI) target_link_libraries(oneapi_plugin PUBLIC -fsycl) set_target_properties(oneapi_plugin PROPERTIES COMPILE_FLAGS -fsycl - CXX_STANDARD 14 + CXX_STANDARD 17 CXX_STANDARD_REQUIRED ON POSITION_INDEPENDENT_CODE ON) if (USE_OPENMP) diff --git a/rabit/CMakeLists.txt b/rabit/CMakeLists.txt index ad39fb249791..ab8171b2b0b9 100644 --- a/rabit/CMakeLists.txt +++ b/rabit/CMakeLists.txt @@ -1,4 +1,4 @@ -cmake_minimum_required(VERSION 3.3) +cmake_minimum_required(VERSION 3.18) find_package(Threads REQUIRED) diff --git a/tests/buildkite/build-containers.sh b/tests/buildkite/build-containers.sh index 41a13eaea5fb..899976a7ddf7 100755 --- a/tests/buildkite/build-containers.sh +++ b/tests/buildkite/build-containers.sh @@ -23,10 +23,15 @@ case "${container}" in gpu|rmm) BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" BUILD_ARGS="$BUILD_ARGS --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION" + if [[ $container == "rmm" ]] + then + BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" + fi ;; gpu_build_centos7|jvm_gpu_build) BUILD_ARGS="$BUILD_ARGS --build-arg CUDA_VERSION_ARG=$CUDA_VERSION" + BUILD_ARGS="$BUILD_ARGS --build-arg NCCL_VERSION_ARG=$NCCL_VERSION" ;; *) diff --git a/tests/buildkite/build-cuda-with-rmm.sh b/tests/buildkite/build-cuda-with-rmm.sh index ae704ce662ab..f474f318b675 100755 --- a/tests/buildkite/build-cuda-with-rmm.sh +++ b/tests/buildkite/build-cuda-with-rmm.sh @@ -15,7 +15,8 @@ fi command_wrapper="tests/ci_build/ci_build.sh rmm docker --build-arg "` `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` - `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION" + `"RAPIDS_VERSION_ARG=$RAPIDS_VERSION --build-arg "` + `"NCCL_VERSION_ARG=$NCCL_VERSION" echo "--- Build libxgboost from the source" $command_wrapper tests/ci_build/build_via_cmake.sh --conda-env=gpu_test -DUSE_CUDA=ON \ diff --git a/tests/buildkite/build-cuda.sh b/tests/buildkite/build-cuda.sh index a50963f7c7fc..b25345b1bbb1 100755 --- a/tests/buildkite/build-cuda.sh +++ b/tests/buildkite/build-cuda.sh @@ -16,7 +16,8 @@ else fi command_wrapper="tests/ci_build/ci_build.sh gpu_build_centos7 docker --build-arg "` - `"CUDA_VERSION_ARG=$CUDA_VERSION" + `"CUDA_VERSION_ARG=$CUDA_VERSION --build-arg "` + `"NCCL_VERSION_ARG=$NCCL_VERSION" echo "--- Build libxgboost from the source" $command_wrapper tests/ci_build/prune_libnccl.sh diff --git a/tests/buildkite/build-jvm-packages-gpu.sh b/tests/buildkite/build-jvm-packages-gpu.sh index 30e73eb37386..6a9a29cb39fb 100755 --- a/tests/buildkite/build-jvm-packages-gpu.sh +++ b/tests/buildkite/build-jvm-packages-gpu.sh @@ -14,5 +14,7 @@ else fi tests/ci_build/ci_build.sh jvm_gpu_build nvidia-docker \ - --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} tests/ci_build/build_jvm_packages.sh \ + --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ + --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ + tests/ci_build/build_jvm_packages.sh \ ${SPARK_VERSION} -Duse.cuda=ON ${arch_flag} diff --git a/tests/buildkite/build-win64-gpu.ps1 b/tests/buildkite/build-win64-gpu.ps1 index 6ee723abbec0..05d7aefb9048 100644 --- a/tests/buildkite/build-win64-gpu.ps1 +++ b/tests/buildkite/build-win64-gpu.ps1 @@ -12,10 +12,10 @@ if ( $is_release_branch -eq 0 ) { } mkdir build cd build -cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON ` - -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON -DCMAKE_UNITY_BUILD=ON ${arch_flag} +cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DCMAKE_VERBOSE_MAKEFILE=ON ` + -DGOOGLE_TEST=ON -DUSE_DMLC_GTEST=ON ${arch_flag} $msbuild = -join @( - "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\MSBuild\\15.0" + "C:\\Program Files\\Microsoft Visual Studio\\2022\\Community\\MSBuild\\Current" "\\Bin\\MSBuild.exe" ) & $msbuild xgboost.sln /m /p:Configuration=Release /nodeReuse:false diff --git a/tests/buildkite/conftest.sh b/tests/buildkite/conftest.sh index 30ef4aeab3bc..cf9270c1162c 100755 --- a/tests/buildkite/conftest.sh +++ b/tests/buildkite/conftest.sh @@ -22,9 +22,10 @@ function set_buildkite_env_vars_in_container { set -x -CUDA_VERSION=11.0.3 -RAPIDS_VERSION=22.10 -SPARK_VERSION=3.0.1 +CUDA_VERSION=11.8.0 +NCCL_VERSION=2.16.5-1 +RAPIDS_VERSION=23.02 +SPARK_VERSION=3.1.1 JDK_VERSION=8 if [[ -z ${BUILDKITE:-} ]] diff --git a/tests/buildkite/deploy-jvm-packages.sh b/tests/buildkite/deploy-jvm-packages.sh index 6ae5a719db47..a3410b294bd6 100755 --- a/tests/buildkite/deploy-jvm-packages.sh +++ b/tests/buildkite/deploy-jvm-packages.sh @@ -9,5 +9,6 @@ then echo "--- Deploy JVM packages to xgboost-maven-repo S3 repo" tests/ci_build/ci_build.sh jvm_gpu_build docker \ --build-arg CUDA_VERSION_ARG=${CUDA_VERSION} \ + --build-arg NCCL_VERSION_ARG=${NCCL_VERSION} \ tests/ci_build/deploy_jvm_packages.sh ${SPARK_VERSION} fi diff --git a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py b/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py index b9409de4cdcf..4277eed53318 100644 --- a/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py +++ b/tests/buildkite/infrastructure/aws-stack-creator/create_stack.py @@ -2,12 +2,16 @@ import copy import os import re +import sys import boto3 import botocore from metadata import AMI_ID, COMMON_STACK_PARAMS, STACK_PARAMS current_dir = os.path.dirname(__file__) +sys.path.append(os.path.join(current_dir, "..")) + +from common_blocks.utils import create_or_update_stack, wait TEMPLATE_URL = "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" @@ -68,72 +72,7 @@ def get_full_stack_id(stack_id): return f"buildkite-{stack_id}-autoscaling-group" -def stack_exists(args, *, stack_name): - client = boto3.client("cloudformation", region_name=args.aws_region) - waiter = client.get_waiter("stack_exists") - try: - waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1}) - return True - except botocore.exceptions.WaiterError as e: - return False - - -def create_or_update_stack( - args, *, stack_name, template_url=None, template_body=None, params=None -): - kwargs = { - "StackName": stack_name, - "Capabilities": [ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - "CAPABILITY_AUTO_EXPAND", - ], - } - if template_url: - kwargs["TemplateURL"] = template_url - if template_body: - kwargs["TemplateBody"] = template_body - if params: - kwargs["Parameters"] = params - - client = boto3.client("cloudformation", region_name=args.aws_region) - - if stack_exists(args, stack_name=stack_name): - print(f"Stack {stack_name} already exists. Updating...") - try: - response = client.update_stack(**kwargs) - return {"StackName": stack_name, "Action": "update"} - except botocore.exceptions.ClientError as e: - if e.response["Error"]["Code"] == "ValidationError" and re.search( - "No updates are to be performed", e.response["Error"]["Message"] - ): - print(f"No update was made to {stack_name}") - return {"StackName": stack_name, "Action": "noop"} - else: - raise e - else: - kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False}) - response = client.create_stack(**kwargs) - return {"StackName": stack_name, "Action": "create"} - - -def wait(promise): - client = boto3.client("cloudformation", region_name=args.aws_region) - stack_name = promise["StackName"] - print(f"Waiting for {stack_name}...") - if promise["Action"] == "create": - waiter = client.get_waiter("stack_create_complete") - waiter.wait(StackName=stack_name) - print(f"Finished creating stack {stack_name}") - elif promise["Action"] == "update": - waiter = client.get_waiter("stack_update_complete") - waiter.wait(StackName=stack_name) - print(f"Finished updating stack {stack_name}") - elif promise["Action"] != "noop": - raise ValueError(f"Invalid promise {promise}") - - -def create_agent_iam_policy(args): +def create_agent_iam_policy(args, *, client): policy_stack_name = "buildkite-agent-iam-policy" print(f"Creating stack {policy_stack_name} for agent IAM policy...") with open( @@ -142,9 +81,9 @@ def create_agent_iam_policy(args): ) as f: policy_template = f.read() promise = create_or_update_stack( - args, stack_name=policy_stack_name, template_body=policy_template + args, client=client, stack_name=policy_stack_name, template_body=policy_template ) - wait(promise) + wait(promise, client=client) cf = boto3.resource("cloudformation", region_name=args.aws_region) policy = cf.StackResource(policy_stack_name, "BuildkiteAgentManagedPolicy") @@ -152,10 +91,10 @@ def create_agent_iam_policy(args): def main(args): - agent_iam_policy = create_agent_iam_policy(args) - client = boto3.client("cloudformation", region_name=args.aws_region) + agent_iam_policy = create_agent_iam_policy(args, client=client) + promises = [] for stack_id in AMI_ID: @@ -167,13 +106,17 @@ def main(args): ) promise = create_or_update_stack( - args, stack_name=stack_id_full, template_url=TEMPLATE_URL, params=params + args, + client=client, + stack_name=stack_id_full, + template_url=TEMPLATE_URL, + params=params, ) promises.append(promise) print(f"CI stack {stack_id_full} is in progress in the background") for promise in promises: - wait(promise) + wait(promise, client=client) if __name__ == "__main__": diff --git a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py index edb4cc036f9c..30aa20a09500 100644 --- a/tests/buildkite/infrastructure/aws-stack-creator/metadata.py +++ b/tests/buildkite/infrastructure/aws-stack-creator/metadata.py @@ -1,27 +1,27 @@ AMI_ID = { # Managed by XGBoost team "linux-amd64-gpu": { - "us-west-2": "ami-00ed92bd37f77bc33", + "us-west-2": "ami-094271bed4788ddb5", }, "linux-amd64-mgpu": { - "us-west-2": "ami-00ed92bd37f77bc33", + "us-west-2": "ami-094271bed4788ddb5", }, "windows-gpu": { - "us-west-2": "ami-0a1a2ea551a07ad5f", + "us-west-2": "ami-0839681594a1d7627", }, "windows-cpu": { - "us-west-2": "ami-0a1a2ea551a07ad5f", + "us-west-2": "ami-0839681594a1d7627", }, # Managed by BuildKite # from https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml "linux-amd64-cpu": { - "us-west-2": "ami-075d4c25d5f0c17c1", + "us-west-2": "ami-00f2127550cf03658", }, "pipeline-loader": { - "us-west-2": "ami-075d4c25d5f0c17c1", + "us-west-2": "ami-00f2127550cf03658", }, "linux-arm64-cpu": { - "us-west-2": "ami-0952c6fb6db9a9891", + "us-west-2": "ami-0c5789068f4a2d1b5", }, } diff --git a/tests/buildkite/infrastructure/common_blocks/utils.py b/tests/buildkite/infrastructure/common_blocks/utils.py new file mode 100644 index 000000000000..27a0835e8dc0 --- /dev/null +++ b/tests/buildkite/infrastructure/common_blocks/utils.py @@ -0,0 +1,97 @@ +import re + +import boto3 +import botocore + + +def stack_exists(args, *, stack_name): + client = boto3.client("cloudformation", region_name=args.aws_region) + waiter = client.get_waiter("stack_exists") + try: + waiter.wait(StackName=stack_name, WaiterConfig={"MaxAttempts": 1}) + return True + except botocore.exceptions.WaiterError as e: + return False + + +def create_or_update_stack( + args, *, client, stack_name, template_url=None, template_body=None, params=None +): + kwargs = { + "StackName": stack_name, + "Capabilities": [ + "CAPABILITY_IAM", + "CAPABILITY_NAMED_IAM", + "CAPABILITY_AUTO_EXPAND", + ], + } + if template_url: + kwargs["TemplateURL"] = template_url + if template_body: + kwargs["TemplateBody"] = template_body + if params: + kwargs["Parameters"] = params + + if stack_exists(args, stack_name=stack_name): + print(f"Stack {stack_name} already exists. Updating...") + try: + response = client.update_stack(**kwargs) + return {"StackName": stack_name, "Action": "update"} + except botocore.exceptions.ClientError as e: + if e.response["Error"]["Code"] == "ValidationError" and re.search( + "No updates are to be performed", e.response["Error"]["Message"] + ): + print(f"No update was made to {stack_name}") + return {"StackName": stack_name, "Action": "noop"} + else: + raise e + else: + kwargs.update({"OnFailure": "ROLLBACK", "EnableTerminationProtection": False}) + response = client.create_stack(**kwargs) + return {"StackName": stack_name, "Action": "create"} + + +def replace_stack( + args, *, client, stack_name, template_url=None, template_body=None, params=None +): + """Delete an existing stack and create a new stack with identical name""" + + if not stack_exists(args, stack_name=stack_name): + raise ValueError(f"Stack {stack_name} does not exist") + r = client.delete_stack(StackName=stack_name) + delete_waiter = client.get_waiter("stack_delete_complete") + delete_waiter.wait(StackName=stack_name) + + kwargs = { + "StackName": stack_name, + "Capabilities": [ + "CAPABILITY_IAM", + "CAPABILITY_NAMED_IAM", + "CAPABILITY_AUTO_EXPAND", + ], + "OnFailure": "ROLLBACK", + "EnableTerminationProtection": False, + } + if template_url: + kwargs["TemplateURL"] = template_url + if template_body: + kwargs["TemplateBody"] = template_body + if params: + kwargs["Parameters"] = params + response = client.create_stack(**kwargs) + return {"StackName": stack_name, "Action": "create"} + + +def wait(promise, *, client): + stack_name = promise["StackName"] + print(f"Waiting for {stack_name}...") + if promise["Action"] == "create": + waiter = client.get_waiter("stack_create_complete") + waiter.wait(StackName=stack_name) + print(f"Finished creating stack {stack_name}") + elif promise["Action"] == "update": + waiter = client.get_waiter("stack_update_complete") + waiter.wait(StackName=stack_name) + print(f"Finished updating stack {stack_name}") + elif promise["Action"] != "noop": + raise ValueError(f"Invalid promise {promise}") diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py b/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py index 0c71d5e772f2..8051b991da51 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py +++ b/tests/buildkite/infrastructure/worker-image-pipeline/create_worker_image_pipelines.py @@ -2,6 +2,7 @@ import copy import json import os +import sys from urllib.request import urlopen import boto3 @@ -9,6 +10,9 @@ from metadata import IMAGE_PARAMS current_dir = os.path.dirname(__file__) +sys.path.append(os.path.join(current_dir, "..")) + +from common_blocks.utils import replace_stack, wait BUILDKITE_CF_TEMPLATE_URL = ( "https://s3.amazonaws.com/buildkite-aws-stack/latest/aws-stack.yml" @@ -47,6 +51,9 @@ def main(args): ami_mapping = get_ami_mapping() + client = boto3.client("cloudformation", region_name=args.aws_region) + promises = [] + for stack_id in IMAGE_PARAMS: stack_id_full = get_full_stack_id(stack_id) print(f"Creating EC2 image builder stack {stack_id_full}...") @@ -55,28 +62,20 @@ def main(args): stack_id=stack_id, aws_region=args.aws_region, ami_mapping=ami_mapping ) - client = boto3.client("cloudformation", region_name=args.aws_region) - response = client.create_stack( - StackName=stack_id_full, - TemplateBody=ec2_image_pipeline_template, - Capabilities=[ - "CAPABILITY_IAM", - "CAPABILITY_NAMED_IAM", - "CAPABILITY_AUTO_EXPAND", - ], - OnFailure="ROLLBACK", - EnableTerminationProtection=False, - Parameters=params, + promise = replace_stack( + args, + client=client, + stack_name=stack_id_full, + template_body=ec2_image_pipeline_template, + params=params, ) + promises.append(promise) print( f"EC2 image builder stack {stack_id_full} is in progress in the background" ) - for stack_id in IMAGE_PARAMS: - stack_id_full = get_full_stack_id(stack_id) - waiter = client.get_waiter("stack_create_complete") - waiter.wait(StackName=stack_id_full) - print(f"EC2 image builder stack {stack_id_full} is now finished.") + for promise in promises: + wait(promise, client=client) if __name__ == "__main__": diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml b/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml index 478adf3d4dab..8d3bafa72f08 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml +++ b/tests/buildkite/infrastructure/worker-image-pipeline/ec2-image-builder-pipeline-template.yml @@ -58,7 +58,7 @@ Resources: BootstrapComponent: Type: AWS::ImageBuilder::Component Properties: - Name: !Sub "${AWS::StackName}-bootstrap-component" + Name: !Join ["-", [!Ref AWS::StackName, "bootstrap-component", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Platform: !Ref InstanceOperatingSystem Version: "1.0.0" Description: Execute a bootstrap script. @@ -67,7 +67,7 @@ Resources: Recipe: Type: AWS::ImageBuilder::ImageRecipe Properties: - Name: !Sub "${AWS::StackName}-image" + Name: !Join ["-", [!Ref AWS::StackName, "image", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Components: - ComponentArn: !Ref BootstrapComponent ParentImage: !Ref BaseImageId @@ -83,7 +83,7 @@ Resources: Infrastructure: Type: AWS::ImageBuilder::InfrastructureConfiguration Properties: - Name: !Sub "${AWS::StackName}-image-pipeline-infrastructure" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-infrastructure", !Select [2, !Split ['/', !Ref AWS::StackId]]]] InstanceProfileName: !Ref InstanceProfile InstanceTypes: - !Ref InstanceType @@ -93,7 +93,7 @@ Resources: Distribution: Type: AWS::ImageBuilder::DistributionConfiguration Properties: - Name: !Sub "${AWS::StackName}-image-pipeline-distribution-config" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline-distribution-config", !Select [2, !Split ['/', !Ref AWS::StackId]]]] Distributions: - Region: !Ref AWS::Region AmiDistributionConfiguration: {} @@ -102,7 +102,7 @@ Resources: Pipeline: Type: AWS::ImageBuilder::ImagePipeline Properties: - Name: !Sub "${AWS::StackName}-image-pipeline" + Name: !Join ["-", [!Ref AWS::StackName, "image-pipeline", !Select [2, !Split ['/', !Ref AWS::StackId]]]] DistributionConfigurationArn: !Ref Distribution ImageRecipeArn: !Ref Recipe InfrastructureConfigurationArn: !Ref Infrastructure diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py b/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py index c74914e54840..37100209fe2e 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py +++ b/tests/buildkite/infrastructure/worker-image-pipeline/metadata.py @@ -13,6 +13,6 @@ "BootstrapScript": "windows-gpu-bootstrap.yml", "InstanceType": "g4dn.2xlarge", "InstanceOperatingSystem": "Windows", - "VolumeSize": "80", # in GiBs + "VolumeSize": "120", # in GiBs }, } diff --git a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml index ef3fade44f16..03fb105a7011 100644 --- a/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml +++ b/tests/buildkite/infrastructure/worker-image-pipeline/windows-gpu-bootstrap.yml @@ -15,9 +15,9 @@ phases: choco --version choco feature enable -n=allowGlobalConfirmation - # CMake 3.18 - Write-Host '>>> Installing CMake 3.18...' - choco install cmake --version 3.18.0 --installargs "ADD_CMAKE_TO_PATH=System" + # CMake 3.25 + Write-Host '>>> Installing CMake 3.25...' + choco install cmake --version 3.25.2 --installargs "ADD_CMAKE_TO_PATH=System" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } # Notepad++ @@ -45,18 +45,18 @@ phases: choco install graphviz if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - # Install Visual Studio Community 2017 (15.9) - Write-Host '>>> Installing Visual Studio 2017 Community (15.9)...' - choco install visualstudio2017community --version 15.9.23.0 ` + # Install Visual Studio 2022 Community + Write-Host '>>> Installing Visual Studio 2022 Community...' + choco install visualstudio2022community ` --params "--wait --passive --norestart" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - choco install visualstudio2017-workload-nativedesktop --params ` + choco install visualstudio2022-workload-nativedesktop --params ` "--wait --passive --norestart --includeOptional" if ($LASTEXITCODE -ne 0) { throw "Last command failed" } - # Install CUDA 11.0 - Write-Host '>>> Installing CUDA 11.0...' - choco install cuda --version 11.0.3 + # Install CUDA 11.8 + Write-Host '>>> Installing CUDA 11.8...' + choco install cuda --version=11.8.0.52206 if ($LASTEXITCODE -ne 0) { throw "Last command failed" } # Install Python packages diff --git a/tests/buildkite/pipeline.yml b/tests/buildkite/pipeline.yml index e2a4fcaf2405..2f01c36dbc04 100644 --- a/tests/buildkite/pipeline.yml +++ b/tests/buildkite/pipeline.yml @@ -22,11 +22,11 @@ steps: queue: linux-amd64-cpu - wait #### -------- BUILD -------- - - label: ":console: Run clang-tidy" - command: "tests/buildkite/run-clang-tidy.sh" - key: run-clang-tidy - agents: - queue: linux-amd64-cpu + # - label: ":console: Run clang-tidy" + # command: "tests/buildkite/run-clang-tidy.sh" + # key: run-clang-tidy + # agents: + # queue: linux-amd64-cpu - wait - label: ":console: Build CPU" command: "tests/buildkite/build-cpu.sh" diff --git a/tests/buildkite/test-cpp-gpu.sh b/tests/buildkite/test-cpp-gpu.sh index f1ddf9d5f5e0..75a600d7a855 100755 --- a/tests/buildkite/test-cpp-gpu.sh +++ b/tests/buildkite/test-cpp-gpu.sh @@ -20,4 +20,5 @@ tests/ci_build/ci_build.sh gpu nvidia-docker \ # tests/ci_build/ci_build.sh rmm nvidia-docker \ # --build-arg CUDA_VERSION_ARG=$CUDA_VERSION \ # --build-arg RAPIDS_VERSION_ARG=$RAPIDS_VERSION bash -c \ +# --build-arg NCCL_VERSION_ARG=$NCCL_VERSION bash -c \ # "source activate gpu_test && build/testxgboost --use-rmm-pool" diff --git a/tests/ci_build/Dockerfile.clang_tidy b/tests/ci_build/Dockerfile.clang_tidy index b0166f240b8b..3a33a080cd8b 100644 --- a/tests/ci_build/Dockerfile.clang_tidy +++ b/tests/ci_build/Dockerfile.clang_tidy @@ -15,8 +15,8 @@ RUN \ add-apt-repository -u 'deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main' && \ apt-get update && \ apt-get install -y llvm-11 clang-tidy-11 clang-11 && \ - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr # Set default clang-tidy version RUN \ diff --git a/tests/ci_build/Dockerfile.cpu b/tests/ci_build/Dockerfile.cpu index 54c3c3af4ec7..4e56d2bf5c98 100644 --- a/tests/ci_build/Dockerfile.cpu +++ b/tests/ci_build/Dockerfile.cpu @@ -12,8 +12,8 @@ RUN \ apt-get update && \ apt-get install -y tar unzip wget git build-essential doxygen graphviz llvm libasan2 libidn11 ninja-build gcc-8 g++-8 openjdk-8-jdk-headless && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Python wget -nv https://github.com/conda-forge/miniforge/releases/latest/download/Mambaforge-Linux-x86_64.sh && \ bash Mambaforge-Linux-x86_64.sh -b -p /opt/python diff --git a/tests/ci_build/Dockerfile.gpu b/tests/ci_build/Dockerfile.gpu index d149638acc92..48830722d33c 100644 --- a/tests/ci_build/Dockerfile.gpu +++ b/tests/ci_build/Dockerfile.gpu @@ -22,10 +22,10 @@ ENV PATH=/opt/python/bin:$PATH RUN \ conda install -c conda-forge mamba && \ mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.9 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ + python=3.10 cudf=$RAPIDS_VERSION_ARG* rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG \ dask dask-cuda=$RAPIDS_VERSION_ARG* dask-cudf=$RAPIDS_VERSION_ARG* cupy \ numpy pytest pytest-timeout scipy scikit-learn pandas matplotlib wheel python-kubernetes urllib3 graphviz hypothesis \ - pyspark cloudpickle cuda-python=11.7.0 && \ + pyspark cloudpickle cuda-python && \ mamba clean --all && \ conda run --no-capture-output -n gpu_test pip install buildkite-test-collector diff --git a/tests/ci_build/Dockerfile.gpu_build_centos7 b/tests/ci_build/Dockerfile.gpu_build_centos7 index 4168e430d91e..bfe967d02312 100644 --- a/tests/ci_build/Dockerfile.gpu_build_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_centos7 @@ -1,6 +1,7 @@ ARG CUDA_VERSION_ARG FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 ARG CUDA_VERSION_ARG +ARG NCCL_VERSION_ARG # Install all basic requirements RUN \ @@ -21,7 +22,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ wget -nv -nc https://developer.download.nvidia.com/compute/machine-learning/repos/rhel7/x86_64/nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ rpm -i nvidia-machine-learning-repo-rhel7-1.0.0-1.x86_64.rpm && \ yum -y update && \ diff --git a/tests/ci_build/Dockerfile.gpu_build_r_centos7 b/tests/ci_build/Dockerfile.gpu_build_r_centos7 index 54a63a242202..675e50af3a44 100644 --- a/tests/ci_build/Dockerfile.gpu_build_r_centos7 +++ b/tests/ci_build/Dockerfile.gpu_build_r_centos7 @@ -36,8 +36,8 @@ RUN \ bash Miniconda3.sh -b -p /opt/python && \ /opt/python/bin/python -m pip install auditwheel awscli && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/Dockerfile.jvm b/tests/ci_build/Dockerfile.jvm index 9c7001adebf4..0b2c29597f8b 100644 --- a/tests/ci_build/Dockerfile.jvm +++ b/tests/ci_build/Dockerfile.jvm @@ -12,8 +12,8 @@ RUN \ wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash Miniconda3.sh -b -p /opt/python && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Maven wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ diff --git a/tests/ci_build/Dockerfile.jvm_gpu_build b/tests/ci_build/Dockerfile.jvm_gpu_build index cddbb1f65729..f214052ae061 100644 --- a/tests/ci_build/Dockerfile.jvm_gpu_build +++ b/tests/ci_build/Dockerfile.jvm_gpu_build @@ -1,6 +1,7 @@ ARG CUDA_VERSION_ARG FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-centos7 ARG CUDA_VERSION_ARG +ARG NCCL_VERSION_ARG # Install all basic requirements RUN \ @@ -14,8 +15,8 @@ RUN \ wget -nv -nc -O Miniconda3.sh https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh && \ bash Miniconda3.sh -b -p /opt/python && \ # CMake - wget -nv -nc https://cmake.org/files/v3.14/cmake-3.14.0-Linux-x86_64.sh --no-check-certificate && \ - bash cmake-3.14.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ + wget -nv -nc https://cmake.org/files/v3.18/cmake-3.18.0-Linux-x86_64.sh --no-check-certificate && \ + bash cmake-3.18.0-Linux-x86_64.sh --skip-license --prefix=/usr && \ # Maven wget -nv -nc https://archive.apache.org/dist/maven/maven-3/3.6.1/binaries/apache-maven-3.6.1-bin.tar.gz && \ tar xvf apache-maven-3.6.1-bin.tar.gz -C /opt && \ @@ -24,7 +25,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ yum-config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel7/x86_64/cuda-rhel7.repo && \ yum -y update && \ yum install -y libnccl-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-devel-${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-static-${NCCL_VERSION}+cuda${CUDA_SHORT} diff --git a/tests/ci_build/Dockerfile.rmm b/tests/ci_build/Dockerfile.rmm index 0fbe44865456..0d3dfeca93b2 100644 --- a/tests/ci_build/Dockerfile.rmm +++ b/tests/ci_build/Dockerfile.rmm @@ -1,7 +1,8 @@ ARG CUDA_VERSION_ARG -FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu18.04 +FROM nvidia/cuda:$CUDA_VERSION_ARG-devel-ubuntu20.04 ARG CUDA_VERSION_ARG ARG RAPIDS_VERSION_ARG +ARG NCCL_VERSION_ARG # Environment ENV DEBIAN_FRONTEND noninteractive @@ -19,7 +20,7 @@ RUN \ # NCCL2 (License: https://docs.nvidia.com/deeplearning/sdk/nccl-sla/index.html) RUN \ export CUDA_SHORT=`echo $CUDA_VERSION_ARG | grep -o -E '[0-9]+\.[0-9]'` && \ - export NCCL_VERSION=2.13.4-1 && \ + export NCCL_VERSION=$NCCL_VERSION_ARG && \ apt-get update && \ apt-get install -y --allow-downgrades --allow-change-held-packages libnccl2=${NCCL_VERSION}+cuda${CUDA_SHORT} libnccl-dev=${NCCL_VERSION}+cuda${CUDA_SHORT} @@ -29,7 +30,7 @@ ENV PATH=/opt/python/bin:$PATH RUN \ conda install -c conda-forge mamba && \ mamba create -n gpu_test -c rapidsai-nightly -c rapidsai -c nvidia -c conda-forge -c defaults \ - python=3.9 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \ + python=3.10 rmm=$RAPIDS_VERSION_ARG* cudatoolkit=$CUDA_VERSION_ARG cmake && \ mamba clean --all ENV GOSU_VERSION 1.10 diff --git a/tests/ci_build/build_r_pkg_with_cuda_win64.sh b/tests/ci_build/build_r_pkg_with_cuda_win64.sh index f83795775b2c..042ac2329ad8 100644 --- a/tests/ci_build/build_r_pkg_with_cuda_win64.sh +++ b/tests/ci_build/build_r_pkg_with_cuda_win64.sh @@ -15,7 +15,7 @@ mv xgboost/ xgboost_rpack/ mkdir build cd build -cmake .. -G"Visual Studio 15 2017 Win64" -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3" +cmake .. -G"Visual Studio 17 2022" -A x64 -DUSE_CUDA=ON -DR_LIB=ON -DLIBR_HOME="c:\\Program Files\\R\\R-3.6.3" cmake --build . --config Release --parallel cd .. diff --git a/tests/python-gpu/test_gpu_prediction.py b/tests/python-gpu/test_gpu_prediction.py index 56f488f0cd66..6bd33d0d640f 100644 --- a/tests/python-gpu/test_gpu_prediction.py +++ b/tests/python-gpu/test_gpu_prediction.py @@ -216,6 +216,7 @@ def predict_dense(x): def test_inplace_predict_cupy(self): self.run_inplace_predict_cupy(0) + @pytest.mark.xfail @pytest.mark.skipif(**tm.no_cupy()) @pytest.mark.mgpu def test_inplace_predict_cupy_specified_device(self): From 4be75d852c3aa1644256d491e82e388cc1a5bb32 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 27 Mar 2023 00:33:08 +0800 Subject: [PATCH 089/126] [backport] Fix scope of feature set pointers (#8850) (#8972) --------- Co-authored-by: Rory Mitchell --- src/tree/updater_gpu_hist.cu | 4 ++++ tests/python-gpu/test_gpu_updaters.py | 31 +++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/src/tree/updater_gpu_hist.cu b/src/tree/updater_gpu_hist.cu index c17252902ded..8625395c2ae4 100644 --- a/src/tree/updater_gpu_hist.cu +++ b/src/tree/updater_gpu_hist.cu @@ -306,6 +306,8 @@ struct GPUHistMakerDevice { matrix.is_dense }; dh::TemporaryArray entries(2 * candidates.size()); + // Store the feature set ptrs so they dont go out of scope before the kernel is called + std::vector>> feature_sets; for (size_t i = 0; i < candidates.size(); i++) { auto candidate = candidates.at(i); int left_nidx = tree[candidate.nid].LeftChild(); @@ -314,10 +316,12 @@ struct GPUHistMakerDevice { nidx[i * 2 + 1] = right_nidx; auto left_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(left_nidx)); left_sampled_features->SetDevice(ctx_->gpu_id); + feature_sets.emplace_back(left_sampled_features); common::Span left_feature_set = interaction_constraints.Query(left_sampled_features->DeviceSpan(), left_nidx); auto right_sampled_features = column_sampler.GetFeatureSet(tree.GetDepth(right_nidx)); right_sampled_features->SetDevice(ctx_->gpu_id); + feature_sets.emplace_back(right_sampled_features); common::Span right_feature_set = interaction_constraints.Query(right_sampled_features->DeviceSpan(), right_nidx); diff --git a/tests/python-gpu/test_gpu_updaters.py b/tests/python-gpu/test_gpu_updaters.py index e86152327711..10610d1a8255 100644 --- a/tests/python-gpu/test_gpu_updaters.py +++ b/tests/python-gpu/test_gpu_updaters.py @@ -215,3 +215,34 @@ def test_specified_gpu_id_gpu_update(self, dataset, gpu_id): @pytest.mark.parametrize("weighted", [True, False]) def test_adaptive(self, weighted) -> None: self.cputest.run_adaptive("gpu_hist", weighted) + + @pytest.mark.skipif(**tm.no_pandas()) + def test_issue8824(self): + # column sampling by node crashes because shared pointers go out of scope + import pandas as pd + + data = pd.DataFrame(np.random.rand(1024, 8)) + data.columns = "x" + data.columns.astype(str) + features = data.columns + data["y"] = data.sum(axis=1) < 4 + dtrain = xgb.DMatrix(data[features], label=data["y"]) + model = xgb.train( + dtrain=dtrain, + params={ + "max_depth": 5, + "learning_rate": 0.05, + "objective": "binary:logistic", + "tree_method": "gpu_hist", + "colsample_bytree": 0.5, + "colsample_bylevel": 0.5, + "colsample_bynode": 0.5, # Causes issues + "reg_alpha": 0.05, + "reg_lambda": 0.005, + "seed": 66, + "subsample": 0.5, + "gamma": 0.2, + "predictor": "auto", + "eval_metric": "auc", + }, + num_boost_round=150, + ) From a1c209182d142fa896c8412c0f32343503d77955 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 27 Mar 2023 18:24:25 +0800 Subject: [PATCH 090/126] [backport] Update c++ requirement to 17 for the R package. (#8860) (#8983) --- R-package/DESCRIPTION | 5 +++-- R-package/configure | 6 +++--- R-package/configure.ac | 6 +++--- R-package/src/Makevars.in | 2 +- R-package/src/Makevars.win | 2 +- 5 files changed, 11 insertions(+), 10 deletions(-) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 20c45c0ae0f8..8f8b23d763c0 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -66,5 +66,6 @@ Imports: methods, data.table (>= 1.9.6), jsonlite (>= 1.0), -RoxygenNote: 7.2.2 -SystemRequirements: GNU make, C++14 +RoxygenNote: 7.2.3 +Encoding: UTF-8 +SystemRequirements: GNU make, C++17 diff --git a/R-package/configure b/R-package/configure index 6c157ca17e9e..761f2cf19ef5 100755 --- a/R-package/configure +++ b/R-package/configure @@ -2096,9 +2096,9 @@ if test -z "${R_HOME}"; then exit 1 fi -CXX14=`"${R_HOME}/bin/R" CMD config CXX14` -CXX14STD=`"${R_HOME}/bin/R" CMD config CXX14STD` -CXX="${CXX14} ${CXX14STD}" +CXX17=`"${R_HOME}/bin/R" CMD config CXX17` +CXX17STD=`"${R_HOME}/bin/R" CMD config CXX17STD` +CXX="${CXX17} ${CXX17STD}" CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` CC=`"${R_HOME}/bin/R" CMD config CC` diff --git a/R-package/configure.ac b/R-package/configure.ac index 98dfd9782ba1..f56303c7a3a6 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -10,9 +10,9 @@ if test -z "${R_HOME}"; then exit 1 fi -CXX14=`"${R_HOME}/bin/R" CMD config CXX14` -CXX14STD=`"${R_HOME}/bin/R" CMD config CXX14STD` -CXX="${CXX14} ${CXX14STD}" +CXX17=`"${R_HOME}/bin/R" CMD config CXX17` +CXX17STD=`"${R_HOME}/bin/R" CMD config CXX17STD` +CXX="${CXX17} ${CXX17STD}" CXXFLAGS=`"${R_HOME}/bin/R" CMD config CXXFLAGS` CC=`"${R_HOME}/bin/R" CMD config CC` diff --git a/R-package/src/Makevars.in b/R-package/src/Makevars.in index 9b2610638366..6902b6c1d927 100644 --- a/R-package/src/Makevars.in +++ b/R-package/src/Makevars.in @@ -3,7 +3,7 @@ PKGROOT=../../ ENABLE_STD_THREAD=1 # _*_ mode: Makefile; _*_ -CXX_STD = CXX14 +CXX_STD = CXX17 XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\ -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\ diff --git a/R-package/src/Makevars.win b/R-package/src/Makevars.win index 05399ad4d17b..4a209258d458 100644 --- a/R-package/src/Makevars.win +++ b/R-package/src/Makevars.win @@ -3,7 +3,7 @@ PKGROOT=../../ ENABLE_STD_THREAD=0 # _*_ mode: Makefile; _*_ -CXX_STD = CXX14 +CXX_STD = CXX17 XGB_RFLAGS = -DXGBOOST_STRICT_R_MODE=1 -DDMLC_LOG_BEFORE_THROW=0\ -DDMLC_ENABLE_STD_THREAD=$(ENABLE_STD_THREAD) -DDMLC_DISABLE_STDIN=1\ From f5f03dfb61dd7860cc9520e945e04e6b7fd8cd9b Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Mon, 27 Mar 2023 21:31:30 +0800 Subject: [PATCH 091/126] [backport] Update dmlc-core to get C++17 deprecation warning (#8855) (#8982) Co-authored-by: Rong Ou --- dmlc-core | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dmlc-core b/dmlc-core index dfd9365264a0..81db539486ce 160000 --- a/dmlc-core +++ b/dmlc-core @@ -1 +1 @@ -Subproject commit dfd9365264a060a5096734b7d892e1858b6d2722 +Subproject commit 81db539486ce6525b31b971545edffee2754aced From 2b8cca655dc3a347b5a5ffbba2cb929ce855454e Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 11:34:09 -0600 Subject: [PATCH 092/126] Print result of p_m.get() --- src/c_api/c_api.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index eeb7369c405b..9aa622690517 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1049,6 +1049,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, exit(1); } } + fprintf (stdout, reinterpret_cast(p_m.get())); auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); if (!proxy) { fprintf (stderr, "proxy is null"); From 9e79c2ef0d8731974a48a94de7c33d2ab611bc88 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 13:55:26 -0600 Subject: [PATCH 093/126] Try some stuff with DMatrix --- src/c_api/c_api.cc | 5 +++++ src/predictor/cpu_predictor.cc | 3 ++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 9aa622690517..77517c438dd8 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1050,11 +1050,16 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, } } fprintf (stdout, reinterpret_cast(p_m.get())); + DMatrixProxy* stuff = dynamic_cast(p_m.get()); auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); if (!proxy) { fprintf (stderr, "proxy is null"); exit(1); } + if (!stuff) { + fprintf (stderr, "stuff is null"); + exit(1); + } auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 62ec0b18cce3..7c18d2981e43 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -396,7 +396,8 @@ class CPUPredictor : public Predictor { bool InplacePredict(std::shared_ptr p_m, const gbm::GBTreeModel &model, float missing, PredictionCacheEntry *out_preds, uint32_t tree_begin, unsigned tree_end) const override { - auto proxy = dynamic_cast(p_m.get()); + //auto proxy = dynamic_cast(p_m.get()); + auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); if (!proxy) { fprintf (stderr, "InplacePredict: proxy is null cpu variant"); exit(1); From 7968bca98dc42cd79dee684b2d88ea29a99f34da Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:09:51 -0600 Subject: [PATCH 094/126] Set dense data --- src/c_api/c_api.cc | 5 +++-- src/data/proxy_dmatrix.cc | 8 ++++++++ src/data/proxy_dmatrix.h | 1 + 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 77517c438dd8..03e54425ac9c 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1027,6 +1027,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, API_BEGIN(); CHECK_HANDLE(); xgboost::bst_ulong out_dim; + std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; //std::shared_ptr p_m(dMatrixHandle); std::shared_ptr p_m{nullptr}; if (!dMatrixHandle) { @@ -1042,7 +1043,6 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, } } else { p_m = *static_cast *>(dMatrixHandle); - fprintf (stdout, "dmatrix handle is not null"); if (!p_m) { fprintf (stderr, "p_m 2 is null"); @@ -1050,7 +1050,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, } } fprintf (stdout, reinterpret_cast(p_m.get())); - DMatrixProxy* stuff = dynamic_cast(p_m.get()); + // DMatrixProxy* stuff = dynamic_cast(p_m.get()); auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); if (!proxy) { fprintf (stderr, "proxy is null"); @@ -1062,6 +1062,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, } auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); + proxy->SetDenseData(data) InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, 0, iteration_end, true, len, &out_dim, out_result); // printf("XGBoosterInplacePredict len = %u, dim = %u\n", **len, out_dim); diff --git a/src/data/proxy_dmatrix.cc b/src/data/proxy_dmatrix.cc index fc36f75f28ac..294048e8ced2 100644 --- a/src/data/proxy_dmatrix.cc +++ b/src/data/proxy_dmatrix.cc @@ -15,6 +15,14 @@ void DMatrixProxy::SetArrayData(char const *c_interface) { this->ctx_.gpu_id = Context::kCpuId; } +void DMatrixProxy::SetDenseData(const float *data) { + std::shared_ptr adapter{new xgboost::data::DenseAdapter(data)}; + this->batch_ = adapter; + this->Info().num_col_ = adapter->NumColumns(); + this->Info().num_row_ = adapter->NumRows(); + this->ctx_.gpu_id = Context::kCpuId; +} + void DMatrixProxy::SetCSRData(char const *c_indptr, char const *c_indices, char const *c_values, bst_feature_t n_features, bool on_host) { CHECK(on_host) << "Not implemented on device."; diff --git a/src/data/proxy_dmatrix.h b/src/data/proxy_dmatrix.h index 8375c7c8d6c1..bbe5169f97af 100644 --- a/src/data/proxy_dmatrix.h +++ b/src/data/proxy_dmatrix.h @@ -70,6 +70,7 @@ class DMatrixProxy : public DMatrix { } void SetArrayData(char const* c_interface); + void SetDenseData(const float *data); void SetCSRData(char const *c_indptr, char const *c_indices, char const *c_values, bst_feature_t n_features, bool on_host); From d9dd5c4ec575f087c900481fcd4fe7f69462d9d6 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:22:57 -0600 Subject: [PATCH 095/126] uncomment stuff, more logging --- src/c_api/c_api.cc | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 03e54425ac9c..e35302dc712f 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1050,8 +1050,10 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, } } fprintf (stdout, reinterpret_cast(p_m.get())); - // DMatrixProxy* stuff = dynamic_cast(p_m.get()); - auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); + DMatrixProxy* stuff = dynamic_cast(p_m.get()); + auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); + printf ("stuff is %s", typeid(stuff).name()); + printf ("proxy is %s", typeid(proxy).name()); if (!proxy) { fprintf (stderr, "proxy is null"); exit(1); From 9a3214bdbb2ea244475cb9786c49c041f9eb08ae Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:25:00 -0600 Subject: [PATCH 096/126] change some variables and things --- src/c_api/c_api.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index e35302dc712f..dd610ca2fc7d 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1050,8 +1050,8 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, } } fprintf (stdout, reinterpret_cast(p_m.get())); - DMatrixProxy* stuff = dynamic_cast(p_m.get()); - auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); + auto stuff = dynamic_cast(p_m.get()); + auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); printf ("stuff is %s", typeid(stuff).name()); printf ("proxy is %s", typeid(proxy).name()); if (!proxy) { From 823c92fe80d630abd0a935779a8dd8565b3ec5e6 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:29:38 -0600 Subject: [PATCH 097/126] Dense adapter changes --- src/data/proxy_dmatrix.cc | 5 +++-- src/data/proxy_dmatrix.h | 3 ++- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/data/proxy_dmatrix.cc b/src/data/proxy_dmatrix.cc index 294048e8ced2..ba75dec19d83 100644 --- a/src/data/proxy_dmatrix.cc +++ b/src/data/proxy_dmatrix.cc @@ -15,8 +15,9 @@ void DMatrixProxy::SetArrayData(char const *c_interface) { this->ctx_.gpu_id = Context::kCpuId; } -void DMatrixProxy::SetDenseData(const float *data) { - std::shared_ptr adapter{new xgboost::data::DenseAdapter(data)}; +void DMatrixProxy::SetDenseData(const float *data, size_t num_rows, + size_t num_features) { + std::shared_ptr adapter{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; this->batch_ = adapter; this->Info().num_col_ = adapter->NumColumns(); this->Info().num_row_ = adapter->NumRows(); diff --git a/src/data/proxy_dmatrix.h b/src/data/proxy_dmatrix.h index bbe5169f97af..4e8601b84a8b 100644 --- a/src/data/proxy_dmatrix.h +++ b/src/data/proxy_dmatrix.h @@ -70,7 +70,8 @@ class DMatrixProxy : public DMatrix { } void SetArrayData(char const* c_interface); - void SetDenseData(const float *data); + void SetDenseData(const float *data,size_t num_rows, + size_t num_features); void SetCSRData(char const *c_indptr, char const *c_indices, char const *c_values, bst_feature_t n_features, bool on_host); From 5a08ff84f1f3040b705546a2aab91735b14c1d17 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:33:25 -0600 Subject: [PATCH 098/126] proxy to stuff --- src/c_api/c_api.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index dd610ca2fc7d..d0835ddc0d3f 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1061,10 +1061,10 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, if (!stuff) { fprintf (stderr, "stuff is null"); exit(1); - } + } auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); - proxy->SetDenseData(data) + stuff->SetDenseData(data, num_rows, num_features); InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, 0, iteration_end, true, len, &out_dim, out_result); // printf("XGBoosterInplacePredict len = %u, dim = %u\n", **len, out_dim); From a1737632a3047efa4d43e225d8b1bad583269573 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:35:58 -0600 Subject: [PATCH 099/126] change how proxy is cast in cpu predictor --- src/predictor/cpu_predictor.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 7c18d2981e43..5ac3ef41b059 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -396,12 +396,8 @@ class CPUPredictor : public Predictor { bool InplacePredict(std::shared_ptr p_m, const gbm::GBTreeModel &model, float missing, PredictionCacheEntry *out_preds, uint32_t tree_begin, unsigned tree_end) const override { - //auto proxy = dynamic_cast(p_m.get()); - auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); - if (!proxy) { - fprintf (stderr, "InplacePredict: proxy is null cpu variant"); - exit(1); - } + auto proxy = dynamic_cast(p_m.get()); + //auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); auto x = proxy->Adapter(); if (x.type() == typeid(std::shared_ptr)) { this->DispatchedInplacePredict( From 63728055a6ab45c3fcc70e1086afaa5245fb68f1 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:39:18 -0600 Subject: [PATCH 100/126] More logging --- src/c_api/c_api.cc | 4 ++-- src/predictor/cpu_predictor.cc | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index d0835ddc0d3f..9a44801bc946 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1055,11 +1055,11 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, printf ("stuff is %s", typeid(stuff).name()); printf ("proxy is %s", typeid(proxy).name()); if (!proxy) { - fprintf (stderr, "proxy is null"); + fprintf (stderr, "proxy is null line 1058"); exit(1); } if (!stuff) { - fprintf (stderr, "stuff is null"); + fprintf (stderr, "stuff is null line 1062"); exit(1); } auto *learner = static_cast(handle); diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index 5ac3ef41b059..ed7f60837ce4 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -396,6 +396,7 @@ class CPUPredictor : public Predictor { bool InplacePredict(std::shared_ptr p_m, const gbm::GBTreeModel &model, float missing, PredictionCacheEntry *out_preds, uint32_t tree_begin, unsigned tree_end) const override { + fprintf (stdout, "We are in cpu_predictor"); auto proxy = dynamic_cast(p_m.get()); //auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); auto x = proxy->Adapter(); From 148e29ec4c29a516a3b45101c1e23858229adf71 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:41:13 -0600 Subject: [PATCH 101/126] undo static cast --- src/c_api/c_api.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 9a44801bc946..46e069ec9cdf 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1050,6 +1050,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, } } fprintf (stdout, reinterpret_cast(p_m.get())); + p_m.reset(new data::DMatrixProxy); auto stuff = dynamic_cast(p_m.get()); auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); printf ("stuff is %s", typeid(stuff).name()); From f6a25087f7d07d9ca01c39e86764f2ea96b0f455 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:52:20 -0600 Subject: [PATCH 102/126] comment out some logging --- src/c_api/c_api.cc | 12 ++++++------ src/predictor/cpu_predictor.cc | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 46e069ec9cdf..389c12481b7b 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1030,20 +1030,20 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; //std::shared_ptr p_m(dMatrixHandle); std::shared_ptr p_m{nullptr}; - if (!dMatrixHandle) { + /*if (!dMatrixHandle) { fprintf (stderr, "dMatrixHandle is null"); exit(1); - } + }*/ if (!dMatrixHandle) { p_m.reset(new data::DMatrixProxy); - fprintf (stdout, "dmatrix handle is null"); + //fprintf (stdout, "dmatrix handle is null"); if (!p_m) { fprintf (stderr, "p_m 1 is null"); exit(1); } } else { p_m = *static_cast *>(dMatrixHandle); - fprintf (stdout, "dmatrix handle is not null"); + //fprintf (stdout, "dmatrix handle is not null"); if (!p_m) { fprintf (stderr, "p_m 2 is null"); exit(1); @@ -1053,8 +1053,8 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, p_m.reset(new data::DMatrixProxy); auto stuff = dynamic_cast(p_m.get()); auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); - printf ("stuff is %s", typeid(stuff).name()); - printf ("proxy is %s", typeid(proxy).name()); + //printf ("stuff is %s", typeid(stuff).name()); + //printf ("proxy is %s", typeid(proxy).name()); if (!proxy) { fprintf (stderr, "proxy is null line 1058"); exit(1); diff --git a/src/predictor/cpu_predictor.cc b/src/predictor/cpu_predictor.cc index ed7f60837ce4..a2f3cb863393 100644 --- a/src/predictor/cpu_predictor.cc +++ b/src/predictor/cpu_predictor.cc @@ -396,7 +396,7 @@ class CPUPredictor : public Predictor { bool InplacePredict(std::shared_ptr p_m, const gbm::GBTreeModel &model, float missing, PredictionCacheEntry *out_preds, uint32_t tree_begin, unsigned tree_end) const override { - fprintf (stdout, "We are in cpu_predictor"); + //fprintf (stdout, "We are in cpu_predictor"); auto proxy = dynamic_cast(p_m.get()); //auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); auto x = proxy->Adapter(); From e31c53d6ddeff0923a233a6a6f4c6a6878231f40 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 27 Mar 2023 14:54:53 -0600 Subject: [PATCH 103/126] another missed logging message --- src/c_api/c_api.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 389c12481b7b..1a2d56709571 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1049,7 +1049,7 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, exit(1); } } - fprintf (stdout, reinterpret_cast(p_m.get())); + //fprintf (stdout, reinterpret_cast(p_m.get())); p_m.reset(new data::DMatrixProxy); auto stuff = dynamic_cast(p_m.get()); auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); From 365da0b8f4233a6fd9d78a6581d74cc8c092943f Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 29 Mar 2023 12:02:51 +0800 Subject: [PATCH 104/126] [backport] [doc] Add missing document for pyspark ranker. (#8692) (#8990) --- doc/python/python_api.rst | 10 ++++++++++ doc/tutorials/spark_estimator.rst | 16 ++++++++-------- python-package/xgboost/spark/__init__.py | 5 +++-- 3 files changed, 21 insertions(+), 10 deletions(-) diff --git a/doc/python/python_api.rst b/doc/python/python_api.rst index 03b431c77d41..b27542a8becb 100644 --- a/doc/python/python_api.rst +++ b/doc/python/python_api.rst @@ -173,3 +173,13 @@ PySpark API :members: :inherited-members: :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBRanker + :members: + :inherited-members: + :show-inheritance: + +.. autoclass:: xgboost.spark.SparkXGBRankerModel + :members: + :inherited-members: + :show-inheritance: diff --git a/doc/tutorials/spark_estimator.rst b/doc/tutorials/spark_estimator.rst index 44e7a957513b..e0acc745433d 100644 --- a/doc/tutorials/spark_estimator.rst +++ b/doc/tutorials/spark_estimator.rst @@ -43,10 +43,10 @@ in spark estimator, and some parameters are replaced with pyspark specific param such as `weight_col`, `validation_indicator_col`, `use_gpu`, for details please see `SparkXGBRegressor` doc. -The following code snippet shows how to train a spark xgboost regressor model, -first we need to prepare a training dataset as a spark dataframe contains -"label" column and "features" column(s), the "features" column(s) must be `pyspark.ml.linalg.Vector` -type or spark array type or a list of feature column names. +The following code snippet shows how to train a spark xgboost regressor model, first we +need to prepare a training dataset as a spark dataframe contains "label" column and +"features" column(s), the "features" column(s) must be ``pyspark.ml.linalg.Vector`` type +or spark array type or a list of feature column names. .. code-block:: python @@ -54,10 +54,10 @@ type or spark array type or a list of feature column names. xgb_regressor_model = xgb_regressor.fit(train_spark_dataframe) -The following code snippet shows how to predict test data using a spark xgboost regressor model, -first we need to prepare a test dataset as a spark dataframe contains -"features" and "label" column, the "features" column must be `pyspark.ml.linalg.Vector` -type or spark array type. +The following code snippet shows how to predict test data using a spark xgboost regressor +model, first we need to prepare a test dataset as a spark dataframe contains "features" +and "label" column, the "features" column must be ``pyspark.ml.linalg.Vector`` type or +spark array type. .. code-block:: python diff --git a/python-package/xgboost/spark/__init__.py b/python-package/xgboost/spark/__init__.py index c0b347eefb30..2720532c0411 100644 --- a/python-package/xgboost/spark/__init__.py +++ b/python-package/xgboost/spark/__init__.py @@ -1,6 +1,5 @@ # type: ignore -"""PySpark XGBoost integration interface -""" +"""PySpark XGBoost integration interface""" try: import pyspark @@ -11,6 +10,7 @@ SparkXGBClassifier, SparkXGBClassifierModel, SparkXGBRanker, + SparkXGBRankerModel, SparkXGBRegressor, SparkXGBRegressorModel, ) @@ -21,4 +21,5 @@ "SparkXGBRegressor", "SparkXGBRegressorModel", "SparkXGBRanker", + "SparkXGBRankerModel", ] From 1baebe231bace3da9d4d867f2f97c883ba837998 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 29 Mar 2023 14:26:20 +0800 Subject: [PATCH 105/126] [backport] [CI] Fix Windows wheel to be compatible with Poetry (#8991) (#8992) * [CI] Fix Windows wheel to be compatible with Poetry Co-authored-by: Philip Hyunsu Cho --- tests/ci_build/insert_vcomp140.py | 111 ++++++++++++++++++++++++++---- 1 file changed, 97 insertions(+), 14 deletions(-) diff --git a/tests/ci_build/insert_vcomp140.py b/tests/ci_build/insert_vcomp140.py index e45c7d204b73..cfa8d792dee2 100644 --- a/tests/ci_build/insert_vcomp140.py +++ b/tests/ci_build/insert_vcomp140.py @@ -1,19 +1,102 @@ -import sys -import re -import zipfile +import argparse +import base64 import glob +import hashlib +import os +import pathlib +import re +import shutil +import tempfile + +VCOMP140_PATH = "C:\\Windows\\System32\\vcomp140.dll" + + +def get_sha256sum(path): + return ( + base64.urlsafe_b64encode(hashlib.sha256(open(path, "rb").read()).digest()) + .decode("latin1") + .rstrip("=") + ) + + +def update_record(*, wheel_content_dir, xgboost_version): + vcomp140_size = os.path.getsize(VCOMP140_PATH) + vcomp140_hash = get_sha256sum(VCOMP140_PATH) + + record_path = wheel_content_dir / pathlib.Path( + f"xgboost-{xgboost_version}.dist-info/RECORD" + ) + with open(record_path, "r") as f: + record_content = f.read() + record_content += f"xgboost-{xgboost_version}.data/data/xgboost/vcomp140.dll," + record_content += f"sha256={vcomp140_hash},{vcomp140_size}\n" + with open(record_path, "w") as f: + f.write(record_content) + + +def main(args): + candidates = list(sorted(glob.glob(args.wheel_path))) + for wheel_path in candidates: + print(f"Processing wheel {wheel_path}") + m = re.search(r"xgboost-(.*)\+.*-py3", wheel_path) + if not m: + raise ValueError(f"Wheel {wheel_path} has unexpected name") + version = m.group(1) + print(f" Detected version for {wheel_path}: {version}") + print(f" Inserting vcomp140.dll into {wheel_path}...") + with tempfile.TemporaryDirectory() as tempdir: + wheel_content_dir = pathlib.Path(tempdir) / "wheel_content" + print(f" Extract {wheel_path} into {wheel_content_dir}") + shutil.unpack_archive( + wheel_path, extract_dir=wheel_content_dir, format="zip" + ) + data_dir = wheel_content_dir / pathlib.Path( + f"xgboost-{version}.data/data/xgboost" + ) + data_dir.mkdir(parents=True, exist_ok=True) + + print(f" Copy {VCOMP140_PATH} -> {data_dir}") + shutil.copy(VCOMP140_PATH, data_dir) + + print(f" Update RECORD") + update_record(wheel_content_dir=wheel_content_dir, xgboost_version=version) + + print(f" Content of {wheel_content_dir}:") + for e in sorted(wheel_content_dir.rglob("*")): + if e.is_file(): + r = e.relative_to(wheel_content_dir) + print(f" {r}") + + print(f" Create new wheel...") + new_wheel_tmp_path = pathlib.Path(tempdir) / "new_wheel" + shutil.make_archive( + str(new_wheel_tmp_path.resolve()), + format="zip", + root_dir=wheel_content_dir, + ) + new_wheel_tmp_path = new_wheel_tmp_path.resolve().with_suffix(".zip") + new_wheel_tmp_path = new_wheel_tmp_path.rename( + new_wheel_tmp_path.with_suffix(".whl") + ) + print(f" Created new wheel {new_wheel_tmp_path}") + + # Rename the old wheel with suffix .bak + # The new wheel takes the name of the old wheel + wheel_path_obj = pathlib.Path(wheel_path).resolve() + backup_path = wheel_path_obj.with_suffix(".whl.bak") + print(f" Rename {wheel_path_obj} -> {backup_path}") + wheel_path_obj.replace(backup_path) + print(f" Rename {new_wheel_tmp_path} -> {wheel_path_obj}") + new_wheel_tmp_path.replace(wheel_path_obj) -if len(sys.argv) != 2: - print('Usage: {} [wheel]'.format(sys.argv[0])) - sys.exit(1) + shutil.rmtree(wheel_content_dir) -vcomp140_path = 'C:\\Windows\\System32\\vcomp140.dll' -for wheel_path in sorted(glob.glob(sys.argv[1])): - m = re.search(r'xgboost-(.*)-py3', wheel_path) - assert m, f'wheel_path = {wheel_path}' - version = m.group(1) +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument( + "wheel_path", type=str, help="Path to wheel (wildcard permitted)" + ) + args = parser.parse_args() - print(f"Inserting vcomp140.dll into {wheel_path}...") - with zipfile.ZipFile(wheel_path, 'a') as f: - f.write(vcomp140_path, 'xgboost-{}.data/data/xgboost/vcomp140.dll'.format(version)) + main(args) From b8c6b86792183b934c46d69ab288ee7548281f19 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Wed, 29 Mar 2023 21:41:10 +0800 Subject: [PATCH 106/126] Bump version to 1.7.5. (#8994) --- CMakeLists.txt | 2 +- R-package/DESCRIPTION | 4 ++-- R-package/configure | 18 +++++++++--------- R-package/configure.ac | 2 +- include/xgboost/version_config.h | 2 +- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 8 ++++---- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- python-package/xgboost/VERSION | 2 +- 13 files changed, 31 insertions(+), 31 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index af12a49484e5..eb3e89b4d29d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ cmake_minimum_required(VERSION 3.18 FATAL_ERROR) -project(xgboost LANGUAGES CXX C VERSION 1.7.4) +project(xgboost LANGUAGES CXX C VERSION 1.7.5) include(cmake/Utils.cmake) list(APPEND CMAKE_MODULE_PATH "${xgboost_SOURCE_DIR}/cmake/modules") cmake_policy(SET CMP0022 NEW) diff --git a/R-package/DESCRIPTION b/R-package/DESCRIPTION index 8f8b23d763c0..e1cef707d894 100644 --- a/R-package/DESCRIPTION +++ b/R-package/DESCRIPTION @@ -1,8 +1,8 @@ Package: xgboost Type: Package Title: Extreme Gradient Boosting -Version: 1.7.4.1 -Date: 2023-02-15 +Version: 1.7.5.1 +Date: 2023-03-29 Authors@R: c( person("Tianqi", "Chen", role = c("aut"), email = "tianqi.tchen@gmail.com"), diff --git a/R-package/configure b/R-package/configure index 761f2cf19ef5..258f86fd1f8a 100755 --- a/R-package/configure +++ b/R-package/configure @@ -1,6 +1,6 @@ #! /bin/sh # Guess values for system-dependent variables and create Makefiles. -# Generated by GNU Autoconf 2.71 for xgboost 1.7.4. +# Generated by GNU Autoconf 2.71 for xgboost 1.7.5. # # # Copyright (C) 1992-1996, 1998-2017, 2020-2021 Free Software Foundation, @@ -607,8 +607,8 @@ MAKEFLAGS= # Identity of this package. PACKAGE_NAME='xgboost' PACKAGE_TARNAME='xgboost' -PACKAGE_VERSION='1.7.4' -PACKAGE_STRING='xgboost 1.7.4' +PACKAGE_VERSION='1.7.5' +PACKAGE_STRING='xgboost 1.7.5' PACKAGE_BUGREPORT='' PACKAGE_URL='' @@ -1225,7 +1225,7 @@ if test "$ac_init_help" = "long"; then # Omit some internal or obsolete options to make the list less imposing. # This message is too long to be a string in the A/UX 3.1 sh. cat <<_ACEOF -\`configure' configures xgboost 1.7.4 to adapt to many kinds of systems. +\`configure' configures xgboost 1.7.5 to adapt to many kinds of systems. Usage: $0 [OPTION]... [VAR=VALUE]... @@ -1287,7 +1287,7 @@ fi if test -n "$ac_init_help"; then case $ac_init_help in - short | recursive ) echo "Configuration of xgboost 1.7.4:";; + short | recursive ) echo "Configuration of xgboost 1.7.5:";; esac cat <<\_ACEOF @@ -1367,7 +1367,7 @@ fi test -n "$ac_init_help" && exit $ac_status if $ac_init_version; then cat <<\_ACEOF -xgboost configure 1.7.4 +xgboost configure 1.7.5 generated by GNU Autoconf 2.71 Copyright (C) 2021 Free Software Foundation, Inc. @@ -1533,7 +1533,7 @@ cat >config.log <<_ACEOF This file contains any messages produced by compilers while running configure, to aid debugging if configure makes a mistake. -It was created by xgboost $as_me 1.7.4, which was +It was created by xgboost $as_me 1.7.5, which was generated by GNU Autoconf 2.71. Invocation command line was $ $0$ac_configure_args_raw @@ -3412,7 +3412,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_write_fail=1 # report actual input values of CONFIG_FILES etc. instead of their # values after options handling. ac_log=" -This file was extended by xgboost $as_me 1.7.4, which was +This file was extended by xgboost $as_me 1.7.5, which was generated by GNU Autoconf 2.71. Invocation command line was CONFIG_FILES = $CONFIG_FILES @@ -3467,7 +3467,7 @@ ac_cs_config_escaped=`printf "%s\n" "$ac_cs_config" | sed "s/^ //; s/'/'\\\\\\\\ cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1 ac_cs_config='$ac_cs_config_escaped' ac_cs_version="\\ -xgboost config.status 1.7.4 +xgboost config.status 1.7.5 configured by $0, generated by GNU Autoconf 2.71, with options \\"\$ac_cs_config\\" diff --git a/R-package/configure.ac b/R-package/configure.ac index f56303c7a3a6..e78baad137be 100644 --- a/R-package/configure.ac +++ b/R-package/configure.ac @@ -2,7 +2,7 @@ AC_PREREQ(2.69) -AC_INIT([xgboost],[1.7.4],[],[xgboost],[]) +AC_INIT([xgboost],[1.7.5],[],[xgboost],[]) : ${R_HOME=`R RHOME`} if test -z "${R_HOME}"; then diff --git a/include/xgboost/version_config.h b/include/xgboost/version_config.h index f8e7ef976242..22fb2ad5027d 100644 --- a/include/xgboost/version_config.h +++ b/include/xgboost/version_config.h @@ -6,6 +6,6 @@ #define XGBOOST_VER_MAJOR 1 #define XGBOOST_VER_MINOR 7 -#define XGBOOST_VER_PATCH 4 +#define XGBOOST_VER_PATCH 5 #endif // XGBOOST_VERSION_CONFIG_H_ diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 9e25b45734b0..1e813349246e 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.4 + 1.7.5 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 8a9f50dbbbd6..d5b6b03b481f 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.4 + 1.7.5 xgboost4j-example_2.12 - 1.7.4 + 1.7.5 jar @@ -26,7 +26,7 @@ ml.dmlc xgboost4j-spark_${scala.binary.version} - 1.7.4 + 1.7.5 org.apache.spark @@ -37,7 +37,7 @@ ml.dmlc xgboost4j-flink_${scala.binary.version} - 1.7.4 + 1.7.5 org.apache.commons diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 83840d00cc0d..8f7253e6beef 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.4 + 1.7.5 xgboost4j-flink_2.12 - 1.7.4 + 1.7.5 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.4 + 1.7.5 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index c67c9e729bb6..4598f2900f19 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.4 + 1.7.5 xgboost4j-gpu_2.12 - 1.7.4 + 1.7.5 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 7d0c150ae38f..753da75359c9 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.4 + 1.7.5 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.4 + 1.7.5 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 55502a2029f2..f183cf960e83 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.4 + 1.7.5 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.4 + 1.7.5 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 76a9ad808199..758365dc0fe8 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.4 + 1.7.5 xgboost4j_2.12 - 1.7.4 + 1.7.5 jar diff --git a/python-package/xgboost/VERSION b/python-package/xgboost/VERSION index 10c088013f87..6a126f402d53 100644 --- a/python-package/xgboost/VERSION +++ b/python-package/xgboost/VERSION @@ -1 +1 @@ -1.7.4 +1.7.5 From 56091029fa7dca99acfc405587c8365511f378cc Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 29 Mar 2023 08:05:44 -0600 Subject: [PATCH 107/126] Remove need to pass DMatrix --- .../java/ml/dmlc/xgboost4j/java/Booster.java | 20 ++++++++----------- .../dmlc/xgboost4j/java/BoosterImplTest.java | 14 +++++-------- 2 files changed, 13 insertions(+), 21 deletions(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index cebe288820f7..bb7f0a768bdb 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -332,9 +332,9 @@ private synchronized float[][] predict(DMatrix data, */ public float[][] inplace_predict(float[] data, int num_rows, - int num_features, - DMatrix d_matrix) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix, + int num_features) throws XGBoostError { + return this.inplace_predict(data, num_rows, num_features, + Float.NaN, false, 0, false, false); Float.NaN, false, 0, false, false); } @@ -356,9 +356,8 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, - DMatrix d_matrix, float missing) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix, + return this.inplace_predict(data, num_rows, num_features, missing, false, 0, false, false); } @@ -383,10 +382,9 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, - DMatrix d_matrix, float missing, boolean outputMargin) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix, missing, + return this.inplace_predict(data, num_rows, num_features, missing, outputMargin, 0, false, false); } @@ -411,11 +409,10 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, - DMatrix d_matrix, float missing, boolean outputMargin, int treeLimit) throws XGBoostError { - return this.inplace_predict(data, num_rows, num_features, d_matrix, missing, + return this.inplace_predict(data, num_rows, num_features, missing, outputMargin, treeLimit, false, false); } @@ -437,7 +434,6 @@ public float[][] inplace_predict(float[] data, public float[][] inplace_predict(float[] data, int num_rows, int num_features, - DMatrix d_matrix, float missing, boolean outputMargin, int treeLimit, @@ -453,10 +449,10 @@ public float[][] inplace_predict(float[] data, if (predContribs) { optionMask = 4; } - + DMatrix d_mat = new DMatrix(data, num_rows, num_features, missing); float[][] rawPredicts = new float[1][]; XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, - d_matrix.getHandle(), missing, + d_mat.getHandle(), missing, optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? // System.out.println("Booster.inplace_predict rawPredicts[0].length = " + diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index 0a15f93dde2c..690202d7ab4f 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -95,18 +95,16 @@ class InplacePredictThread extends Thread { float[][] testX; int test_rows; int features; - DMatrix dMatrix; float[][] true_predicts; Booster booster; Random rng = new Random(); int n_preds = 100; - public InplacePredictThread(int n, Booster booster, float[][] testX, int test_rows, int features, DMatrix dMatrix, float[][] true_predicts) { + public InplacePredictThread(int n, Booster booster, float[][] testX, int test_rows, int features, float[][] true_predicts) { this.thread_num = n; this.booster = booster; this.testX = testX; this.test_rows = test_rows; - this.dMatrix = dMatrix; this.features = features; this.true_predicts = true_predicts; } @@ -122,7 +120,7 @@ public void run() { int r = this.rng.nextInt(this.test_rows); // In-place predict a single random row - float[][] predictions = booster.inplace_predict(this.testX[r], 1, this.features, this.dMatrix); + float[][] predictions = booster.inplace_predict(this.testX[r], 1, this.features); // Confirm results as expected if (predictions[0][0] != this.true_predicts[r][0]) { @@ -146,19 +144,17 @@ class InplacePredictionTask implements Callable { float[][] testX; int test_rows; int features; - DMatrix dMatrix; float[][] true_predicts; Booster booster; Random rng = new Random(); int n_preds = 100; - public InplacePredictionTask(int n, Booster booster, float[][] testX, int test_rows, int features, DMatrix dMatrix, float[][] true_predicts) { + public InplacePredictionTask(int n, Booster booster, float[][] testX, int test_rows, int features, float[][] true_predicts) { this.task_num = n; this.booster = booster; this.testX = testX; this.test_rows = test_rows; this.features = features; - this.dMatrix = dMatrix; this.true_predicts = true_predicts; } @@ -172,7 +168,7 @@ public Boolean call() throws Exception { int r = this.rng.nextInt(this.test_rows); // In-place predict a single random row - float[][] predictions = booster.inplace_predict(this.testX[r], 1, this.features, this.dMatrix); + float[][] predictions = booster.inplace_predict(this.testX[r], 1, this.features); // Confirm results as expected if (predictions[0][0] != this.true_predicts[r][0]) { @@ -335,7 +331,7 @@ public void testBoosterInplacePredict() throws XGBoostError, IOException { float[][] predicts = booster.predict(testMat); // inplace prediction - float[][] inplace_predicts = booster.inplace_predict(testX, test_rows, features, testMat); + float[][] inplace_predicts = booster.inplace_predict(testX, test_rows, features); // Confirm that the two prediction results are identical TestCase.assertTrue(ArrayComparator.compare(predicts, inplace_predicts)); From b343f89e23ad1178083fad542929030745243104 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 29 Mar 2023 08:13:23 -0600 Subject: [PATCH 108/126] Remove redundant line --- .../xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java | 1 - 1 file changed, 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index bb7f0a768bdb..fd48df3a5f40 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -335,7 +335,6 @@ public float[][] inplace_predict(float[] data, int num_features) throws XGBoostError { return this.inplace_predict(data, num_rows, num_features, Float.NaN, false, 0, false, false); - Float.NaN, false, 0, false, false); } /** From c4d6d52c1bcefb5ce63d260e4975a808a41f7538 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Wed, 29 Mar 2023 08:16:57 -0600 Subject: [PATCH 109/126] missed param --- .../src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java index 690202d7ab4f..4041db945a70 100644 --- a/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java +++ b/jvm-packages/xgboost4j/src/test/java/ml/dmlc/xgboost4j/java/BoosterImplTest.java @@ -356,7 +356,7 @@ public void testBoosterInplacePredict() throws XGBoostError, IOException { // Submit all the tasks for (int i=0; i Date: Wed, 29 Mar 2023 08:42:31 -0600 Subject: [PATCH 110/126] increment our version --- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 4 ++-- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 5e88472aaa06..636edb9238f1 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 3cd694868654..28c9692ffa10 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 xgboost4j-example_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 jar diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index c9f4e0461780..64c34a2f147c 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 xgboost4j-flink_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.1-sovrn + 1.7.1-sovrn-2 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index 43e748a4d342..608f9460d83d 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 xgboost4j-gpu_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 6e2f119e2d76..21af63f450bf 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.1-sovrn + 1.7.1-sovrn-2 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 7a50b5be1725..86930aa13ce0 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.1-sovrn + 1.7.1-sovrn-2 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index ff58f67cc0e5..96199aec489d 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 xgboost4j_2.12 - 1.7.1-sovrn + 1.7.1-sovrn-2 jar From 5cd4015d708e4f0937d0a2b26b723490b9ca7ba9 Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 30 Mar 2023 15:21:42 +0800 Subject: [PATCH 111/126] [backport] Fill column size. (#8997) --- src/data/iterative_dmatrix.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/data/iterative_dmatrix.cc b/src/data/iterative_dmatrix.cc index ebb9d1a98e20..317ea5b5d140 100644 --- a/src/data/iterative_dmatrix.cc +++ b/src/data/iterative_dmatrix.cc @@ -110,6 +110,7 @@ void IterativeDMatrix::InitFromCPU(DataIterHandle iter_handle, float missing, size_t n_threads = ctx_.Threads(); size_t n_features = column_sizes.size(); linalg::Tensor column_sizes_tloc({n_threads, n_features}, Context::kCpuId); + column_sizes_tloc.Data()->Fill(0); auto view = column_sizes_tloc.HostView(); common::ParallelFor(value.Size(), n_threads, common::Sched::Static(256), [&](auto i) { auto const& line = value.GetLine(i); From 21d95f3d8f23873a76f8afaad0fee5fa3e00eafe Mon Sep 17 00:00:00 2001 From: Jiaming Yuan Date: Thu, 30 Mar 2023 20:02:31 +0800 Subject: [PATCH 112/126] [backport] [doc][R] Update link. (#8998) (#9001) --- R-package/LICENSE | 4 ++-- R-package/R/xgb.plot.tree.R | 2 +- R-package/man/xgb.plot.tree.Rd | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R-package/LICENSE b/R-package/LICENSE index b9f38c38aaf9..bc1c21d59fe5 100644 --- a/R-package/LICENSE +++ b/R-package/LICENSE @@ -1,9 +1,9 @@ -Copyright (c) 2014 by Tianqi Chen and Contributors +Copyright (c) 2014-2023, Tianqi Chen and XBGoost Contributors Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at - + http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software diff --git a/R-package/R/xgb.plot.tree.R b/R-package/R/xgb.plot.tree.R index dc2656170222..a11790c25329 100644 --- a/R-package/R/xgb.plot.tree.R +++ b/R-package/R/xgb.plot.tree.R @@ -34,7 +34,7 @@ #' The branches that also used for missing values are marked as bold #' (as in "carrying extra capacity"). #' -#' This function uses \href{http://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR. +#' This function uses \href{https://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR. #' #' @return #' diff --git a/R-package/man/xgb.plot.tree.Rd b/R-package/man/xgb.plot.tree.Rd index 8fd7196afdba..d419eb76a02f 100644 --- a/R-package/man/xgb.plot.tree.Rd +++ b/R-package/man/xgb.plot.tree.Rd @@ -67,7 +67,7 @@ The "Yes" branches are marked by the "< split_value" label. The branches that also used for missing values are marked as bold (as in "carrying extra capacity"). -This function uses \href{http://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR. +This function uses \href{https://www.graphviz.org/}{GraphViz} as a backend of DiagrammeR. } \examples{ data(agaricus.train, package='xgboost') From 1539100b2f1939a9f5a16638586b28cb3bb4fcb0 Mon Sep 17 00:00:00 2001 From: Sean Riggin Date: Wed, 3 May 2023 15:45:53 -0600 Subject: [PATCH 113/126] Cleaning up XGBoosterInplacePredict --- src/c_api/c_api.cc | 42 ++++-------------------------------------- 1 file changed, 4 insertions(+), 38 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 1a2d56709571..27cdc4e4e77a 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1027,48 +1027,14 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, API_BEGIN(); CHECK_HANDLE(); xgboost::bst_ulong out_dim; - std::shared_ptr x{new xgboost::data::DenseAdapter(data, num_rows, num_features)}; - //std::shared_ptr p_m(dMatrixHandle); - std::shared_ptr p_m{nullptr}; - /*if (!dMatrixHandle) { - fprintf (stderr, "dMatrixHandle is null"); - exit(1); - }*/ - if (!dMatrixHandle) { - p_m.reset(new data::DMatrixProxy); - //fprintf (stdout, "dmatrix handle is null"); - if (!p_m) { - fprintf (stderr, "p_m 1 is null"); - exit(1); - } - } else { - p_m = *static_cast *>(dMatrixHandle); - //fprintf (stdout, "dmatrix handle is not null"); - if (!p_m) { - fprintf (stderr, "p_m 2 is null"); - exit(1); - } - } - //fprintf (stdout, reinterpret_cast(p_m.get())); - p_m.reset(new data::DMatrixProxy); - auto stuff = dynamic_cast(p_m.get()); - auto proxy = new std::shared_ptr(new xgboost::data::DMatrixProxy); - //printf ("stuff is %s", typeid(stuff).name()); - //printf ("proxy is %s", typeid(proxy).name()); - if (!proxy) { - fprintf (stderr, "proxy is null line 1058"); - exit(1); - } - if (!stuff) { - fprintf (stderr, "stuff is null line 1062"); - exit(1); - } + + auto *proxy = new data::DMatrixProxy; auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); - stuff->SetDenseData(data, num_rows, num_features); + proxy->SetDenseData(data, num_rows, num_features); + std::shared_ptr p_m{proxy}; InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, 0, iteration_end, true, len, &out_dim, out_result); -// printf("XGBoosterInplacePredict len = %u, dim = %u\n", **len, out_dim); API_END(); } From aea9cd3a4613bb29b2fa4fcb6cca32a34edba132 Mon Sep 17 00:00:00 2001 From: Sean Riggin Date: Thu, 4 May 2023 09:36:37 -0600 Subject: [PATCH 114/126] Further de-newification --- src/c_api/c_api.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index 27cdc4e4e77a..d0fe82db1dfa 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1028,11 +1028,11 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, CHECK_HANDLE(); xgboost::bst_ulong out_dim; - auto *proxy = new data::DMatrixProxy; + auto proxy = data::DMatrixProxy(); auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); - proxy->SetDenseData(data, num_rows, num_features); - std::shared_ptr p_m{proxy}; + proxy.SetDenseData(data, num_rows, num_features); + std::shared_ptr p_m{&proxy}; InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, 0, iteration_end, true, len, &out_dim, out_result); API_END(); From a96c3b9847b5bfadd5b047ad0b88cdbce0d51e0a Mon Sep 17 00:00:00 2001 From: Sean Riggin Date: Thu, 4 May 2023 10:06:53 -0600 Subject: [PATCH 115/126] More clean-up --- src/c_api/c_api.cc | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/c_api/c_api.cc b/src/c_api/c_api.cc index d0fe82db1dfa..d5dbb98bd2d6 100644 --- a/src/c_api/c_api.cc +++ b/src/c_api/c_api.cc @@ -1028,12 +1028,11 @@ XGB_DLL int XGBoosterInplacePredict(BoosterHandle handle, CHECK_HANDLE(); xgboost::bst_ulong out_dim; - auto proxy = data::DMatrixProxy(); auto *learner = static_cast(handle); auto iteration_end = GetIterationFromTreeLimit(ntree_limit, learner); - proxy.SetDenseData(data, num_rows, num_features); - std::shared_ptr p_m{&proxy}; - InplacePredictImplCore(p_m, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, + auto proxy = std::make_shared(); + proxy->SetDenseData(data, num_rows, num_features); + InplacePredictImplCore(proxy, learner, (xgboost::PredictionType)0, missing, num_rows, num_features, 0, iteration_end, true, len, &out_dim, out_result); API_END(); } From ab5bc5e5f81434c15d09a4c3621d37792dfa1a61 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 8 May 2023 11:03:52 -0600 Subject: [PATCH 116/126] Update all the poms, even the unused ones --- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- 5 files changed, 11 insertions(+), 11 deletions(-) diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 1402c2b3a1ce..2430265693d8 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 xgboost4j-flink_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.5-sovrn + 1.7.5-sovrn-2 org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index e35e7953f332..a37f1e2f0a1b 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 xgboost4j-gpu_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 92fc40bd224b..92901b9f4ed6 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 xgboost4j-spark-gpu_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j-gpu_${scala.binary.version} - 1.7.5-sovrn + 1.7.5-sovrn-2 org.apache.spark diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index e4fa299e022d..09e73263692d 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.5-sovrn + 1.7.5-sovrn-2 org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index bb2a43b199c4..10dc8a110a9b 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 xgboost4j_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 jar From 61822448873ff617d58d98812560a5610a43ddcb Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 8 May 2023 11:04:37 -0600 Subject: [PATCH 117/126] One missed pom --- jvm-packages/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index 383685775fd2..f6667410bca5 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 pom XGBoost JVM Package JVM Package for XGBoost From 78f52905cf26aaed7bfc28b936f44951dcb85b38 Mon Sep 17 00:00:00 2001 From: Jon Yoquinto Date: Mon, 8 May 2023 11:10:10 -0600 Subject: [PATCH 118/126] pom change git didn't pick up --- jvm-packages/xgboost4j-example/pom.xml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 38ae68514690..9d83c074178f 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 xgboost4j-example_2.12 - 1.7.5-sovrn + 1.7.5-sovrn-2 jar From 19f044670e8dc73d0eb6efa85a0bc6b1548be870 Mon Sep 17 00:00:00 2001 From: Chris Pabst Date: Mon, 23 Oct 2023 11:34:18 -0600 Subject: [PATCH 119/126] EXDS-242: Stop creating unused DMatrix --- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 4 ++-- jvm-packages/xgboost4j-flink/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- .../src/main/java/ml/dmlc/xgboost4j/java/Booster.java | 7 ++++--- 6 files changed, 13 insertions(+), 12 deletions(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index f6667410bca5..a4e8064b5a89 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 9d83c074178f..b2eccefdd980 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix xgboost4j-example_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix jar diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index 2430265693d8..bb6909746f54 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix xgboost4j-flink_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 09e73263692d..12a142e4f670 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index 10dc8a110a9b..bd2ac6a18f01 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix xgboost4j_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-dmatrix-fix jar diff --git a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java index fd48df3a5f40..4173250a8dc8 100644 --- a/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java +++ b/jvm-packages/xgboost4j/src/main/java/ml/dmlc/xgboost4j/java/Booster.java @@ -448,11 +448,12 @@ public float[][] inplace_predict(float[] data, if (predContribs) { optionMask = 4; } - DMatrix d_mat = new DMatrix(data, num_rows, num_features, missing); float[][] rawPredicts = new float[1][]; + // The DMatrix in this JNI call is completely unused, and creating one actually causes + // performance problems. So we will just pass a 0L handle XGBoostJNI.checkCall(XGBoostJNI.XGBoosterInplacePredict(handle, data, num_rows, num_features, - d_mat.getHandle(), missing, - optionMask, treeLimit, rawPredicts)); // pass missing and treelimit here? + 0L, missing, optionMask, treeLimit, rawPredicts)); + // pass missing and treelimit here? // System.out.println("Booster.inplace_predict rawPredicts[0].length = " + // rawPredicts[0].length); From fdc4eebd7f5a6758c0618aeb30b6a022180722f5 Mon Sep 17 00:00:00 2001 From: Sandra Janicki Date: Wed, 15 Nov 2023 14:01:27 -0700 Subject: [PATCH 120/126] disable openmp; build with debug info --- CMakeLists.txt | 2 +- jvm-packages/create_jni.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb3e89b4d29d..b889b0767f34 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -27,7 +27,7 @@ set_default_configuration_release() #-- Options ## User options option(BUILD_C_DOC "Build documentation for C APIs using Doxygen." OFF) -option(USE_OPENMP "Build with OpenMP support." ON) +option(USE_OPENMP "Build with OpenMP support." OFF) option(BUILD_STATIC_LIB "Build static library" OFF) option(FORCE_SHARED_CRT "Build with dynamic CRT on Windows (/MD)" OFF) option(RABIT_BUILD_MPI "Build MPI" OFF) diff --git a/jvm-packages/create_jni.py b/jvm-packages/create_jni.py index 18908fc1c0d5..835d799612d7 100755 --- a/jvm-packages/create_jni.py +++ b/jvm-packages/create_jni.py @@ -15,7 +15,7 @@ CONFIG = { - "USE_OPENMP": "ON", + "USE_OPENMP": "OFF", "USE_HDFS": "OFF", "USE_AZURE": "OFF", "USE_S3": "OFF", @@ -118,7 +118,7 @@ def normpath(path): lib_dir = os.path.join(os.pardir, 'lib') if os.path.exists(lib_dir): shutil.rmtree(lib_dir) - run("cmake .. " + " ".join(args) + maybe_generator) + run("cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. " + " ".join(args) + maybe_generator) run("cmake --build . --config Release" + maybe_parallel_build) with cd("demo/CLI/regression"): From 1fdd8c45ff53a70cecee6ef848ac3d41172a1427 Mon Sep 17 00:00:00 2001 From: Sandra Janicki Date: Thu, 16 Nov 2023 12:26:28 -0700 Subject: [PATCH 121/126] fix some of the pom files and see if the java build gets further --- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 4 ++-- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j/pom.xml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index a4e8064b5a89..e8401fc98cb0 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index b2eccefdd980..03bed4a55ead 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp xgboost4j-example_2.12 - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp jar diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index bb6909746f54..bcf98b98ba69 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp xgboost4j-flink_2.12 - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.5-sovrn-2 + 1.7.5-sovrn-no-openmp org.apache.commons diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index bd2ac6a18f01..f6491f95b52d 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp xgboost4j_2.12 - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp jar From 143698a446b29b55c967538621a7b9d5fc896c10 Mon Sep 17 00:00:00 2001 From: Sandra Janicki Date: Thu, 16 Nov 2023 12:34:47 -0700 Subject: [PATCH 122/126] fix the rest of the poms too --- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 2 +- jvm-packages/xgboost4j-spark/pom.xml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index a37f1e2f0a1b..b2a144d06ab8 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-no-openmp xgboost4j-gpu_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-no-openmp jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 92901b9f4ed6..236236b1ac6b 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-2 + 1.7.5-sovrn-no-openmp xgboost4j-spark-gpu_2.12 diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 12a142e4f670..2256d81c248d 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp xgboost4j-spark_2.12 From cee52828d8ba6bb059a99817067b013bd9f1f48c Mon Sep 17 00:00:00 2001 From: Sandra Janicki Date: Thu, 16 Nov 2023 12:59:32 -0700 Subject: [PATCH 123/126] another pom version fix --- jvm-packages/xgboost4j-spark/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 2256d81c248d..18f02e32725d 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.5-sovrn-dmatrix-fix + 1.7.5-sovrn-no-openmp org.apache.spark From bb5039cfecac5478dd80852fa45a0bf1f3f9ba4d Mon Sep 17 00:00:00 2001 From: Joe <25804777+ByteSizedJoe@users.noreply.github.com> Date: Wed, 29 Jan 2025 16:51:46 -0700 Subject: [PATCH 124/126] chore: cache cfs cpu count result --- src/common/threading_utils.cc | 67 +++++++++++++++++++++-------------- 1 file changed, 40 insertions(+), 27 deletions(-) diff --git a/src/common/threading_utils.cc b/src/common/threading_utils.cc index bcff45efbd1e..94951a161221 100644 --- a/src/common/threading_utils.cc +++ b/src/common/threading_utils.cc @@ -4,48 +4,61 @@ #include "threading_utils.h" #include -#include +#include #include "xgboost/logging.h" namespace xgboost { namespace common { + /** - * \brief Get thread limit from CFS - * - * Modified from - * github.com/psiha/sweater/blob/master/include/boost/sweater/hardware_concurrency.hpp + * \brief Read an integer value from a system file. * - * MIT License: Copyright (c) 2016 Domagoj Šarić + * \param file_path Path to the system file. + * \return Parsed integer value, or -1 if reading fails. */ -int32_t GetCfsCPUCount() noexcept { -#if defined(__linux__) - // https://bugs.openjdk.java.net/browse/JDK-8146115 - // http://hg.openjdk.java.net/jdk/hs/rev/7f22774a5f42 - // RAM limit /sys/fs/cgroup/memory.limit_in_bytes - // swap limt /sys/fs/cgroup/memory.memsw.limit_in_bytes - - auto read_int = [](char const* const file_path) noexcept { +int ReadIntFromFile(const char* file_path) noexcept { std::ifstream fin(file_path); if (!fin) { - return -1; + return -1; // Return -1 on failure } + std::string value; fin >> value; + try { - return std::stoi(value); - } catch (std::exception const&) { - return -1; + return std::stoi(value); + } catch (const std::exception&) { + return -1; // Return -1 if parsing fails } - }; - // complete fair scheduler from Linux - auto const cfs_quota(read_int("/sys/fs/cgroup/cpu/cpu.cfs_quota_us")); - auto const cfs_period(read_int("/sys/fs/cgroup/cpu/cpu.cfs_period_us")); - if ((cfs_quota > 0) && (cfs_period > 0)) { - return std::max(cfs_quota / cfs_period, 1); - } -#endif // defined(__linux__) - return -1; } + +/** + * \brief Get thread limit from the CFS scheduler. + * + * This function caches the result to avoid repeated file system access. + * + * \return The computed thread limit or -1 if unavailable. + */ +int32_t GetCfsCPUCount() noexcept { + static int32_t cached_cfs_cpu_count = -2; + static std::once_flag cache_flag; + + std::call_once(cache_flag, []() { +#if defined(__linux__) + const int cfs_quota = ReadIntFromFile("/sys/fs/cgroup/cpu/cpu.cfs_quota_us"); + const int cfs_period = ReadIntFromFile("/sys/fs/cgroup/cpu/cpu.cfs_period_us"); + + if (cfs_quota > 0 && cfs_period > 0) { + cached_cfs_cpu_count = std::max(cfs_quota / cfs_period, 1); + return; + } +#endif // defined(__linux__) + cached_cfs_cpu_count = -1; // Default to -1 if no valid quota is found + }); + + return cached_cfs_cpu_count; +} + } // namespace common } // namespace xgboost From 6a69e939ea7a12d9b9cda0aceff73890da5dc2d8 Mon Sep 17 00:00:00 2001 From: Joe <25804777+ByteSizedJoe@users.noreply.github.com> Date: Thu, 30 Jan 2025 08:17:07 -0700 Subject: [PATCH 125/126] chore: fixes, add dockerfile for build --- Dockerfile | 4 ++++ src/common/threading_utils.cc | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 000000000000..f6d1aaa4461b --- /dev/null +++ b/Dockerfile @@ -0,0 +1,4 @@ +FROM azul/zulu-openjdk:8-latest +RUN apt-get -y update && \ + apt-get install -y build-essential cmake python3 git neovim maven python-is-python3 curl +ENTRYPOINT ["/bin/bash"] diff --git a/src/common/threading_utils.cc b/src/common/threading_utils.cc index 94951a161221..f44e9aa99aa5 100644 --- a/src/common/threading_utils.cc +++ b/src/common/threading_utils.cc @@ -4,7 +4,8 @@ #include "threading_utils.h" #include -#include +#include +#include #include "xgboost/logging.h" From de4256ad83e6b0d29cddb586b78ee7949d185469 Mon Sep 17 00:00:00 2001 From: Joe <25804777+ByteSizedJoe@users.noreply.github.com> Date: Thu, 30 Jan 2025 08:27:14 -0700 Subject: [PATCH 126/126] chore: version --- jvm-packages/pom.xml | 2 +- jvm-packages/xgboost4j-example/pom.xml | 4 ++-- jvm-packages/xgboost4j-flink/pom.xml | 6 +++--- jvm-packages/xgboost4j-gpu/pom.xml | 4 ++-- jvm-packages/xgboost4j-spark-gpu/pom.xml | 2 +- jvm-packages/xgboost4j-spark/pom.xml | 4 ++-- jvm-packages/xgboost4j/pom.xml | 4 ++-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/jvm-packages/pom.xml b/jvm-packages/pom.xml index e8401fc98cb0..0086614f0408 100644 --- a/jvm-packages/pom.xml +++ b/jvm-packages/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache pom XGBoost JVM Package JVM Package for XGBoost diff --git a/jvm-packages/xgboost4j-example/pom.xml b/jvm-packages/xgboost4j-example/pom.xml index 03bed4a55ead..4f6885506e01 100644 --- a/jvm-packages/xgboost4j-example/pom.xml +++ b/jvm-packages/xgboost4j-example/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache xgboost4j-example_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache jar diff --git a/jvm-packages/xgboost4j-flink/pom.xml b/jvm-packages/xgboost4j-flink/pom.xml index bcf98b98ba69..0a9a278f08ef 100644 --- a/jvm-packages/xgboost4j-flink/pom.xml +++ b/jvm-packages/xgboost4j-flink/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache xgboost4j-flink_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache @@ -26,7 +26,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache org.apache.commons diff --git a/jvm-packages/xgboost4j-gpu/pom.xml b/jvm-packages/xgboost4j-gpu/pom.xml index b2a144d06ab8..45fa5f563f70 100644 --- a/jvm-packages/xgboost4j-gpu/pom.xml +++ b/jvm-packages/xgboost4j-gpu/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache xgboost4j-gpu_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache jar diff --git a/jvm-packages/xgboost4j-spark-gpu/pom.xml b/jvm-packages/xgboost4j-spark-gpu/pom.xml index 236236b1ac6b..f032d64bad0a 100644 --- a/jvm-packages/xgboost4j-spark-gpu/pom.xml +++ b/jvm-packages/xgboost4j-spark-gpu/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache xgboost4j-spark-gpu_2.12 diff --git a/jvm-packages/xgboost4j-spark/pom.xml b/jvm-packages/xgboost4j-spark/pom.xml index 18f02e32725d..ce2d60846848 100644 --- a/jvm-packages/xgboost4j-spark/pom.xml +++ b/jvm-packages/xgboost4j-spark/pom.xml @@ -6,7 +6,7 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache xgboost4j-spark_2.12 @@ -24,7 +24,7 @@ ml.dmlc xgboost4j_${scala.binary.version} - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache org.apache.spark diff --git a/jvm-packages/xgboost4j/pom.xml b/jvm-packages/xgboost4j/pom.xml index f6491f95b52d..673e0e7f82be 100644 --- a/jvm-packages/xgboost4j/pom.xml +++ b/jvm-packages/xgboost4j/pom.xml @@ -6,10 +6,10 @@ ml.dmlc xgboost-jvm_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache xgboost4j_2.12 - 1.7.5-sovrn-no-openmp + 1.7.5-sovrn-no-openmp-cpucache jar