diff --git a/CHANGELOG.md b/CHANGELOG.md index 55b692ec08c..1e6452fdf21 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,13 +1,20 @@ # cuGraph 0.17.0 (Date TBD) ## New Features +- PR #1245 Add functions to add pandas and numpy compatibility ## Improvements +- PR #1227 Pin cmake policies to cmake 3.17 version ## Bug Fixes +- PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests +- PR #1246 Use latest Gunrock, update HITS implementation +- PR #1250 Updated cuco commit hash to latest as of 2020-10-30 and removed unneeded GIT_SHALLOW param +- PR #1251 Changed the MG context testing class to use updated parameters passed in from the individual tests +- PR #1253 MG test fixes: updated additional comms.initialize() calls, fixed dask DataFrame comparisons -# cuGraph 0.16.0 (Date TBD) +# cuGraph 0.16.0 (21 Oct 2020) ## New Features - PR #1098 Add new graph classes to support 2D partitioning @@ -45,7 +52,7 @@ - PR #1222 Added min CUDA version check to MG Louvain - PR #1217 NetworkX Transition doc - PR #1223 Update mnmg docs - +- PR #1230 Improve gpuCI scripts ## Bug Fixes - PR #1131 Show style checker errors with set +e @@ -62,8 +69,10 @@ - PR #1196 Move subcomms init outside of individual algorithm functions - PR #1198 Remove deprecated call to from_gpu_matrix - PR #1174 Fix bugs in MNMG pattern accelerators and pattern accelerator based implementations of MNMG PageRank, BFS, and SSSP - - +- PR #1233 Temporarily disabling C++ tests for 0.16 +- PR #1240 Require `ucx-proc=*=gpu` +- PR #1241 Fix a bug in personalized PageRank with the new graph primitives API. +- PR #1249 Fix upload script syntax # cuGraph 0.15.0 (26 Aug 2020) diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index 49a6362a904..5f74dca4044 100644 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -9,16 +9,12 @@ set -o pipefail NUMARGS=$# ARGS=$* -function logger { - echo -e "\n>>>> $@\n" -} - function hasArg { (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") } function cleanup { - logger "Removing datasets and temp files..." + gpuci_logger "Removing datasets and temp files" rm -rf $WORKSPACE/datasets/test rm -rf $WORKSPACE/datasets/benchmark rm -f testoutput.txt @@ -26,19 +22,27 @@ function cleanup { # Set cleanup trap for Jenkins if [ ! -z "$JENKINS_HOME" ] ; then - logger "Jenkins environment detected, setting cleanup trap..." + gpuci_logger "Jenkins environment detected, setting cleanup trap" trap cleanup EXIT fi # Set path, build parallel level, and CUDA version cd $WORKSPACE -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} export CUDA_REL=${CUDA_VERSION%.*} + +# Set home export HOME=$WORKSPACE + +# Parse git describe export GIT_DESCRIBE_TAG=`git describe --tags` export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` +# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts) +export GPUCI_CONDA_RETRY_MAX=1 +export GPUCI_CONDA_RETRY_SLEEP=30 + # Set Benchmark Vars export DATASETS_DIR=${WORKSPACE}/datasets export BENCHMARKS_DIR=${WORKSPACE}/benchmarks @@ -49,56 +53,58 @@ export BENCHMARKS_DIR=${WORKSPACE}/benchmarks # TODO: Delete build section when artifacts are available -logger "Check environment..." +gpuci_logger "Check environment" env -logger "Check GPU usage..." +gpuci_logger "Check GPU usage" nvidia-smi -logger "Activate conda env..." -source activate rapids - +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids # Enter dependencies to be shown in ASV tooltips. CUGRAPH_DEPS=(cudf rmm) LIBCUGRAPH_DEPS=(cudf rmm) -logger "conda install required packages" -conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ +gpuci_logger "Install required packages" +gpuci_conda_retry install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ "cudf=${MINOR_VERSION}" \ "rmm=${MINOR_VERSION}" \ "cudatoolkit=$CUDA_REL" \ "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ "ucx-py=${MINOR_VERSION}" \ + "ucx-proc=*=gpu" \ "rapids-build-env=${MINOR_VERSION}" \ rapids-pytest-benchmark -# Install the master version of dask and distributed -logger "pip install git+https://github.com/dask/distributed.git --upgrade --no-deps" +gpuci_logger "Install the master version of dask and distributed" pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps - -logger "pip install git+https://github.com/dask/dask.git --upgrade --no-deps" pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps -logger "Check versions..." +gpuci_logger "Check versions" python --version $CC --version $CXX --version -conda list + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls ########################################## # Build cuGraph # ########################################## -logger "Build libcugraph..." +gpuci_logger "Build libcugraph" $WORKSPACE/build.sh clean libcugraph cugraph ########################################## # Run Benchmarks # ########################################## -logger "Downloading Datasets for Benchmarks..." +gpuci_logger "Downloading Datasets for Benchmarks" cd $DATASETS_DIR bash ./get_test_data.sh --benchmark ERRORCODE=$((ERRORCODE | $?)) @@ -148,7 +154,7 @@ BENCHMARK_META=$(jq -n \ echo "Benchmark meta:" echo "${BENCHMARK_META}" | jq "." -logger "Running Benchmarks..." +gpuci_logger "Running Benchmarks" cd $BENCHMARKS_DIR set +e time pytest -v -m "small and managedmem_on and poolallocator_on" \ diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 2cdb77bbbc2..2c6dc899be2 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -5,14 +5,9 @@ ######################################### set -e -# Logger function for build status output -function logger() { - echo -e "\n>>>> $@\n" -} - # Set path and build parallel level -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} # Set home to the job's workspace export HOME=$WORKSPACE @@ -25,41 +20,59 @@ if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then export VERSION_SUFFIX=`date +%y%m%d` fi +# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts) +export GPUCI_CONDA_RETRY_MAX=1 +export GPUCI_CONDA_RETRY_SLEEP=30 + ################################################################################ # SETUP - Check environment ################################################################################ -logger "Get env..." +gpuci_logger "Check environment variables" env -logger "Activate conda env..." -source activate gdf +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids -logger "Check versions..." +gpuci_logger "Check versions" python --version -gcc --version -g++ --version -conda list +$CC --version +$CXX --version + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls # FIX Added to deal with Anancoda SSL verification issues during conda builds conda config --set ssl_verify False ############################################################################### -# BUILD - Conda package builds (conda deps: libcugraph <- cugraph) -################################################################################ +# BUILD - Conda package builds +############################################################################### -logger "Build conda pkg for libcugraph..." -source ci/cpu/libcugraph/build_libcugraph.sh +gpuci_logger "Build conda pkg for libcugraph" +if [ "$BUILD_LIBCUGRAPH" == '1' ]; then + if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then + conda build conda/recipes/libcugraph + else + conda build --dirty --no-remove-work-dir conda/recipes/libcugraph + fi +fi -logger "Build conda pkg for cugraph..." -source ci/cpu/cugraph/build_cugraph.sh +gpuci_logger "Build conda pkg for cugraph" +if [ "$BUILD_CUGRAPH" == "1" ]; then + if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then + conda build conda/recipes/cugraph --python=$PYTHON + else + conda build conda/recipes/cugraph -c ci/artifacts/cugraph/cpu/conda-bld/ --dirty --no-remove-work-dir --python=$PYTHON + fi +fi ################################################################################ # UPLOAD - Conda packages ################################################################################ -logger "Upload libcugraph conda pkg..." -source ci/cpu/libcugraph/upload-anaconda.sh - -logger "Upload cugraph conda pkg..." -source ci/cpu/cugraph/upload-anaconda.sh +gpuci_logger "Upload conda packages" +source ci/cpu/upload.sh diff --git a/ci/cpu/cugraph/build_cugraph.sh b/ci/cpu/cugraph/build_cugraph.sh deleted file mode 100755 index 70f5baee230..00000000000 --- a/ci/cpu/cugraph/build_cugraph.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [ "$BUILD_CUGRAPH" == "1" ]; then - echo "Building cugraph" - CUDA_REL=${CUDA_VERSION%.*} - if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - conda build conda/recipes/cugraph --python=$PYTHON - else - conda build conda/recipes/cugraph -c ci/artifacts/cugraph/cpu/conda-bld/ --dirty --no-remove-work-dir --python=$PYTHON - fi -fi diff --git a/ci/cpu/cugraph/upload-anaconda.sh b/ci/cpu/cugraph/upload-anaconda.sh deleted file mode 100755 index 9601905d6c4..00000000000 --- a/ci/cpu/cugraph/upload-anaconda.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [[ "$BUILD_CUGRAPH" == "1" && "$UPLOAD_CUGRAPH" == "1" ]]; then - export UPLOADFILE=`conda build conda/recipes/cugraph -c rapidsai -c nvidia -c numba -c conda-forge -c defaults --python=$PYTHON --output` - - - # Have to label all CUDA versions due to the compatibility to work with any CUDA - if [ "$LABEL_MAIN" == "1" ]; then - LABEL_OPTION="--label main" - elif [ "$LABEL_MAIN" == "0" ]; then - LABEL_OPTION="--label dev" - else - echo "Unknown label configuration LABEL_MAIN='$LABEL_MAIN'" - exit 1 - fi - echo "LABEL_OPTION=${LABEL_OPTION}" - - test -e ${UPLOADFILE} - - if [ ${BUILD_MODE} != "branch" ]; then - echo "Skipping upload" - return 0 - fi - - if [ -z "$MY_UPLOAD_KEY" ]; then - echo "No upload key" - return 0 - fi - - echo "Upload" - echo ${UPLOADFILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${UPLOADFILE} -else - echo "Skipping cugraph upload" - return 0 -fi diff --git a/ci/cpu/libcugraph/build_libcugraph.sh b/ci/cpu/libcugraph/build_libcugraph.sh deleted file mode 100755 index e5ff77d7db9..00000000000 --- a/ci/cpu/libcugraph/build_libcugraph.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [ "$BUILD_LIBCUGRAPH" == '1' ]; then - echo "Building libcugraph" - CUDA_REL=${CUDA_VERSION%.*} - if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - conda build conda/recipes/libcugraph - else - conda build --dirty --no-remove-work-dir conda/recipes/libcugraph - fi -fi diff --git a/ci/cpu/libcugraph/upload-anaconda.sh b/ci/cpu/libcugraph/upload-anaconda.sh deleted file mode 100755 index 8cd71070778..00000000000 --- a/ci/cpu/libcugraph/upload-anaconda.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [[ "$BUILD_LIBCUGRAPH" == "1" && "$UPLOAD_LIBCUGRAPH" == "1" ]]; then - CUDA_REL=${CUDA_VERSION%.*} - - export UPLOADFILE=`conda build conda/recipes/libcugraph --output` - - - LABEL_OPTION="--label main" - echo "LABEL_OPTION=${LABEL_OPTION}" - - test -e ${UPLOADFILE} - - if [ ${BUILD_MODE} != "branch" ]; then - echo "Skipping upload" - return 0 - fi - - if [ -z "$MY_UPLOAD_KEY" ]; then - echo "No upload key" - return 0 - fi - - echo "Upload" - echo ${UPLOADFILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${UPLOADFILE} -else - echo "Skipping libcugraph upload" -fi diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh new file mode 100644 index 00000000000..0fca82216c3 --- /dev/null +++ b/ci/cpu/upload.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# +# Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh + +set -e + +# Setup 'gpuci_retry' for upload retries (results in 4 total attempts) +export GPUCI_RETRY_MAX=3 +export GPUCI_RETRY_SLEEP=30 + +# Set default label options if they are not defined elsewhere +export LABEL_OPTION=${LABEL_OPTION:-"--label main"} + +# Skip uploads unless BUILD_MODE == "branch" +if [ ${BUILD_MODE} != "branch" ]; then + echo "Skipping upload" + return 0 +fi + +# Skip uploads if there is no upload key +if [ -z "$MY_UPLOAD_KEY" ]; then + echo "No upload key" + return 0 +fi + +################################################################################ +# SETUP - Get conda file output locations +################################################################################ + +gpuci_logger "Get conda file output locations" + +export LIBCUGRAPH_FILE=`conda build conda/recipes/libcugraph --output` +export CUGRAPH_FILE=`conda build conda/recipes/cugraph --python=$PYTHON --output` + +################################################################################ +# UPLOAD - Conda packages +################################################################################ + +gpuci_logger "Starting conda uploads" + +if [[ "$BUILD_LIBCUGRAPH" == "1" && "$UPLOAD_LIBCUGRAPH" == "1" ]]; then + test -e ${LIBCUGRAPH_FILE} + echo "Upload libcugraph" + echo ${LIBCUGRAPH_FILE} + gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUGRAPH_FILE} +fi + +if [[ "$BUILD_CUGRAPH" == "1" && "$UPLOAD_CUGRAPH" == "1" ]]; then + test -e ${CUGRAPH_FILE} + echo "Upload cugraph" + echo ${CUGRAPH_FILE} + gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUGRAPH_FILE} +fi + diff --git a/ci/docs/build.sh b/ci/docs/build.sh index 71ad79419a0..6ce223d8b2b 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -18,32 +18,38 @@ export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" export NIGHTLY_VERSION=$(echo $BRANCH_VERSION | awk -F. '{print $2}') export PROJECTS=(cugraph libcugraph) -logger "Check environment..." +gpuci_logger "Check environment" env -logger "Check GPU usage..." +gpuci_logger "Check GPU usage" nvidia-smi -logger "Activate conda env..." -source activate rapids +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids + # TODO: Move installs to docs-build-env meta package -conda install -c anaconda markdown beautifulsoup4 jq +gpuci_conda_retry install -c anaconda markdown beautifulsoup4 jq pip install sphinx-markdown-tables -logger "Check versions..." +gpuci_logger "Check versions" python --version $CC --version $CXX --version -conda list + +gpuci_logger "Show conda info" +conda info +conda config --show-sources +conda list --show-channel-urls # Build Doxygen docs -logger "Build Doxygen docs..." +gpuci_logger "Build Doxygen docs" cd $PROJECT_WORKSPACE/cpp/build make docs_cugraph # Build Python docs -logger "Build Sphinx docs..." +gpuci_logger "Build Sphinx docs" cd $PROJECT_WORKSPACE/docs make html diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 0dab0437be7..50b3bfb3ee9 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -8,16 +8,17 @@ set -o pipefail NUMARGS=$# ARGS=$* -function logger { - echo -e "\n>>>> $@\n" -} - function hasArg { (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") } +# Set path, build parallel level, and CUDA version +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} +export CUDA_REL=${CUDA_VERSION%.*} + function cleanup { - logger "Removing datasets and temp files..." + gpuci_logger "Removing datasets and temp files" rm -rf $WORKSPACE/datasets/test rm -rf $WORKSPACE/datasets/benchmark rm -f testoutput.txt @@ -25,15 +26,10 @@ function cleanup { # Set cleanup trap for Jenkins if [ ! -z "$JENKINS_HOME" ] ; then - logger "Jenkins environment detected, setting cleanup trap..." + gpuci_logger "Jenkins environment detected, setting cleanup trap" trap cleanup EXIT fi -# Set path, build parallel level, and CUDA version -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 -export CUDA_REL=${CUDA_VERSION%.*} - # Set home to the job's workspace export HOME=$WORKSPACE @@ -46,17 +42,18 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` # SETUP - Check environment ################################################################################ -logger "Check environment..." +gpuci_logger "Check environment" env -logger "Check GPU usage..." +gpuci_logger "Check GPU usage" nvidia-smi -logger "Activate conda env..." -source activate rapids +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids -logger "conda install required packages" -conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ +gpuci_logger "Install dependencies" +gpuci_conda_retry install -y \ "libcudf=${MINOR_VERSION}" \ "cudf=${MINOR_VERSION}" \ "librmm=${MINOR_VERSION}" \ @@ -65,34 +62,35 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ "ucx-py=${MINOR_VERSION}" \ + "ucx-proc=*=gpu" \ "rapids-build-env=$MINOR_VERSION.*" \ "rapids-notebook-env=$MINOR_VERSION.*" \ rapids-pytest-benchmark # https://docs.rapids.ai/maintainers/depmgmt/ -# conda remove --force rapids-build-env rapids-notebook-env -# conda install "your-pkg=1.0.0" +# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env +# gpuci_conda_retry install -y "your-pkg=1.0.0" -# Install the master version of dask and distributed -logger "pip install git+https://github.com/dask/distributed.git --upgrade --no-deps" +gpuci_logger "Install the master version of dask and distributed" pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps - -logger "pip install git+https://github.com/dask/dask.git --upgrade --no-deps" pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps - -logger "Check versions..." +gpuci_logger "Check versions" python --version $CC --version $CXX --version -conda list + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls ################################################################################ # BUILD - Build libcugraph and cuGraph from source ################################################################################ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - logger "Build libcugraph..." + gpuci_logger "Build from source" $WORKSPACE/build.sh clean libcugraph cugraph fi @@ -105,9 +103,9 @@ EXITCODE=0 trap "EXITCODE=1" ERR if hasArg --skip-tests; then - logger "Skipping Tests..." + gpuci_logger "Skipping Tests" else - logger "Check GPU usage..." + gpuci_logger "Check GPU usage" nvidia-smi # If this is a PR build, skip downloading large datasets and don't run the diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 2a443b4e1ae..d4d759abad5 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -15,6 +15,7 @@ dependencies: - dask-cudf=0.17* - nccl>=2.7 - ucx-py=0.17* +- ucx-proc=*=gpu - scipy - networkx - python-louvain diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 32f08f27f55..e6705daa7b8 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -15,6 +15,7 @@ dependencies: - dask-cudf=0.17* - nccl>=2.7 - ucx-py=0.17* +- ucx-proc=*=gpu - scipy - networkx - python-louvain diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index f8c64ebd53b..c8227521a4c 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -15,6 +15,7 @@ dependencies: - dask-cudf=0.17* - nccl>=2.7 - ucx-py=0.17* +- ucx-proc=*=gpu - scipy - networkx - python-louvain diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index c7eba24d2fb..90f5bed942a 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -28,6 +28,7 @@ requirements: - libcugraph={{ version }} - cudf={{ minor_version }} - ucx-py {{ minor_version }} + - ucx-proc=*=gpu run: - python x.x - libcugraph={{ version }} @@ -38,6 +39,7 @@ requirements: - distributed>=2.12.0 - nccl>=2.7 - ucx-py {{ minor_version }} + - ucx-proc=*=gpu #test: # commands: diff --git a/conda/recipes/libcugraph/build.sh b/conda/recipes/libcugraph/build.sh index ae73202ca35..6051b6eee41 100644 --- a/conda/recipes/libcugraph/build.sh +++ b/conda/recipes/libcugraph/build.sh @@ -1,12 +1,4 @@ #!/usr/bin/env bash # This assumes the script is executed from the root of the repo directory - -# show environment -printenv -# Cleanup local git -if [ -d .git ]; then - git clean -xdf -fi - ./build.sh libcugraph -v diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 89ccd2d56e3..211ec920d27 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -31,11 +31,13 @@ requirements: - libcypher-parser - nccl>=2.7 - ucx-py {{ minor_version }} + - ucx-proc=*=gpu run: - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - nccl>=2.7 - ucx-py {{ minor_version }} + - ucx-proc=*=gpu #test: # commands: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b1fea1eaf88..2d6b9facd8b 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -14,7 +14,7 @@ # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) +cmake_minimum_required(VERSION 3.12..3.17 FATAL_ERROR) project(CUGRAPH VERSION 0.17.0 LANGUAGES C CXX CUDA) @@ -50,6 +50,14 @@ endif(CMAKE_COMPILER_IS_GNUCXX) find_package(CUDA) +# Configure GPU arch to build +set(GUNROCK_GENCODE_SM60 "OFF") +set(GUNROCK_GENCODE_SM61 "OFF") +set(GUNROCK_GENCODE_SM70 "OFF") +set(GUNROCK_GENCODE_SM72 "OFF") +set(GUNROCK_GENCODE_SM75 "OFF") +set(GUNROCK_GENCODE_SM80 "OFF") + # Check for aarch64 vs workstation architectures if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") message(STATUS "CMAKE Detected aarch64 CPU architecture, selecting appropriate gencodes") @@ -57,12 +65,13 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") set(GPU_ARCHS "62") # Default minimum CUDA GenCode - not supported by gunrock if(CUDA_VERSION_MAJOR GREATER_EQUAL 9) set(GPU_ARCHS "${GPU_ARCHS};72") - set(GUNROCK_GENCODE "-DGUNROCK_GENCODE_SM72=TRUE") + set(GUNROCK_GENCODE_SM72 "ON") endif() if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) # This is probably for SBSA CUDA, or a next gen Jetson set(GPU_ARCHS "${GPU_ARCHS};75;80") - set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM75=TRUE -DGUNROCK_GENCODE_SM80=TRUE ") + set(GUNROCK_GENCODE_SM75 "ON") + set(GUNROCK_GENCODE_SM80 "ON") endif() else() @@ -70,20 +79,19 @@ else() # System architecture was not aarch64, # this is datacenter or workstation class hardware set(GPU_ARCHS "60") # Default minimum supported CUDA gencode - set(GUNROCK_GENCODE "-DGUNROCK_GENCODE_SM60=TRUE") + set(GUNROCK_GENCODE_SM60 "ON") if(CUDA_VERSION_MAJOR GREATER_EQUAL 9) set(GPU_ARCHS "${GPU_ARCHS};70") - set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM70=TRUE") + set(GUNROCK_GENCODE_SM70 "ON") endif() if(CUDA_VERSION_MAJOR GREATER_EQUAL 10) set(GPU_ARCHS "${GPU_ARCHS};75") - set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM75=TRUE") + set(GUNROCK_GENCODE_SM75 "ON") endif() if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) set(GPU_ARCHS "${GPU_ARCHS};80") - set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM80=TRUE") + set(GUNROCK_GENCODE_SM80 "ON") endif() - endif() message("-- Building for GPU_ARCHS = ${GPU_ARCHS}") @@ -98,6 +106,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") + # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) @@ -216,8 +225,7 @@ message("Fetching cuco") FetchContent_Declare( cuco GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 729d07db2e544e173efefdd168db21f7b8adcfaf - GIT_SHALLOW true + GIT_TAG 5f94cdd3b3df0e5f79c47fb772497d6e42455414 ) FetchContent_GetProperties(cuco) @@ -248,7 +256,6 @@ set(LIBCUDACXX_INCLUDE_DIR "${libcudacxx_SOURCE_DIR}/include") message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") - ################################################################################################### # - External Projects ----------------------------------------------------------------------------- @@ -270,26 +277,33 @@ ExternalProject_Add(cuhornet ) # - GUNROCK -set(CUGUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/cugunrock CACHE STRING - "Path to cugunrock repo") +set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo") +set(GUNROCK_INCLUDE_DIR ${GUNROCK_DIR}/src/gunrock_ext CACHE STRING "Path to gunrock includes") -ExternalProject_Add(cugunrock - GIT_REPOSITORY https://github.com/rapidsai/cugunrock.git - GIT_TAG 0b92fae6ee9026188a811b4d08915779e7c97178 - PREFIX ${CUGUNROCK_DIR} +ExternalProject_Add(gunrock_ext + GIT_REPOSITORY https://github.com/gunrock/gunrock.git + GIT_TAG dev + PREFIX ${GUNROCK_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DGUNROCK_BUILD_SHARED_LIBS=OFF -DGUNROCK_BUILD_TESTS=OFF - -DCUDA_AUTODETECT_GENCODE=FALSE + -DCUDA_AUTODETECT_GENCODE=OFF + -DGUNROCK_GENCODE_SM60=${GUNROCK_GENCODE_SM60} + -DGUNROCK_GENCODE_SM61=${GUNROCK_GENCODE_SM61} + -DGUNROCK_GENCODE_SM70=${GUNROCK_GENCODE_SM70} + -DGUNROCK_GENCODE_SM72=${GUNROCK_GENCODE_SM72} + -DGUNROCK_GENCODE_SM75=${GUNROCK_GENCODE_SM75} + -DGUNROCK_GENCODE_SM80=${GUNROCK_GENCODE_SM80} ${GUNROCK_GENCODE} - BUILD_BYPRODUCTS ${CUGUNROCK_DIR}/lib/libgunrock.a + BUILD_BYPRODUCTS ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a + INSTALL_COMMAND "" ) add_library(gunrock STATIC IMPORTED) -add_dependencies(gunrock cugunrock) +add_dependencies(gunrock gunrock_ext) -set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${CUGUNROCK_DIR}/lib/libgunrock.a) +set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a) # - NCCL if(NOT NCCL_PATH) @@ -385,7 +399,7 @@ add_library(cugraph SHARED # NOTE: This dependency will force the building of cugraph to # wait until after cugunrock is constructed. # -add_dependencies(cugraph cugunrock) +add_dependencies(cugraph gunrock_ext) add_dependencies(cugraph raft) ################################################################################################### @@ -405,7 +419,7 @@ target_include_directories(cugraph "${CUHORNET_INCLUDE_DIR}/xlib/include" "${CUHORNET_INCLUDE_DIR}/primitives" "${CMAKE_CURRENT_SOURCE_DIR}/src" - "${CUGUNROCK_DIR}/include" + "${GUNROCK_INCLUDE_DIR}" "${NCCL_INCLUDE_DIRS}" "${RAFT_DIR}/cpp/include" PUBLIC diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 331bda228c3..51d6e0ceb4c 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -136,7 +136,7 @@ void katz_centrality(raft::handle_t &handle, iter++; - if (diff_sum < static_cast(num_vertices) * epsilon) { + if (diff_sum < epsilon) { break; } else if (iter >= max_iterations) { CUGRAPH_FAIL("Katz Centrality failed to converge."); diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 4084695deb1..1aa7f37fa6b 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -68,12 +68,20 @@ void pagerank(raft::handle_t const& handle, auto const num_vertices = pull_graph_view.get_number_of_vertices(); if (num_vertices == 0) { return; } + auto aggregate_personalization_vector_size = + GraphViewType::is_multi_gpu + ? host_scalar_allreduce(handle.get_comms(), personalization_vector_size, handle.get_stream()) + : personalization_vector_size; + // 1. check input arguments CUGRAPH_EXPECTS( - (personalization_vertices == nullptr) || (personalization_values != nullptr), - "Invalid input argument: if personalization verties are provided, personalization " - "values should be provided as well."); + ((personalization_vector_size > 0) && (personalization_vertices != nullptr) && + (personalization_values != nullptr)) || + ((personalization_vector_size == 0) && (personalization_vertices == nullptr) && + (personalization_values == nullptr)), + "Invalid input argument: if personalization_vector_size is non-zero, personalization verties " + "and personalization values should be provided. Otherwise, they should not be provided."); CUGRAPH_EXPECTS((alpha >= 0.0) && (alpha <= 1.0), "Invalid input argument: alpha should be in [0.0, 1.0]."); CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative."); @@ -109,7 +117,7 @@ void pagerank(raft::handle_t const& handle, "Invalid input argument: initial guess values should be non-negative."); } - if (personalization_vertices != nullptr) { + if (aggregate_personalization_vector_size > 0) { vertex_partition_device_t vertex_partition(pull_graph_view); auto num_invalid_vertices = count_if_v(handle, @@ -177,7 +185,7 @@ void pagerank(raft::handle_t const& handle, // 4. sum the personalization values result_t personalization_sum{0.0}; - if (personalization_vertices != nullptr) { + if (aggregate_personalization_vector_size > 0) { personalization_sum = reduce_v(handle, pull_graph_view, personalization_values, @@ -229,7 +237,7 @@ void pagerank(raft::handle_t const& handle, copy_to_adj_matrix_row(handle, pull_graph_view, pageranks, adj_matrix_row_pageranks.begin()); - auto unvarying_part = personalization_vertices == nullptr + auto unvarying_part = aggregate_personalization_vector_size == 0 ? (dangling_sum * alpha + static_cast(1.0 - alpha)) / static_cast(num_vertices) : result_t{0.0}; @@ -245,11 +253,12 @@ void pagerank(raft::handle_t const& handle, unvarying_part, pageranks); - if (personalization_vertices != nullptr) { + if (aggregate_personalization_vector_size > 0) { vertex_partition_device_t vertex_partition(pull_graph_view); auto val_first = thrust::make_zip_iterator( thrust::make_tuple(personalization_vertices, personalization_values)); thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), val_first, val_first + personalization_vector_size, [vertex_partition, pageranks, dangling_sum, personalization_sum, alpha] __device__( @@ -271,7 +280,7 @@ void pagerank(raft::handle_t const& handle, iter++; - if (diff_sum < static_cast(num_vertices) * epsilon) { + if (diff_sum < epsilon) { break; } else if (iter >= max_iterations) { CUGRAPH_FAIL("PageRank failed to converge."); diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index 84c6036ad70..8662c3bea79 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -30,6 +30,9 @@ namespace cugraph { namespace gunrock { +const int HOST{1}; // gunrock should expose the device constant at the API level. +const int DEVICE{2}; // gunrock should expose the device constant at the API level. + template void hits(cugraph::GraphCSRView const &graph, int max_iter, @@ -44,49 +47,18 @@ void hits(cugraph::GraphCSRView const &graph, "Invalid API parameter: authorities array should be of size V"); // - // NOTE: gunrock doesn't support tolerance parameter - // gunrock doesn't support passing a starting value - // gunrock doesn't support the normalized parameter - // - // FIXME: gunrock uses a 2-norm, while networkx uses a 1-norm. - // They will add a parameter to allow us to specify - // which norm to use. - // - std::vector local_offsets(graph.number_of_vertices + 1); - std::vector local_indices(graph.number_of_edges); - std::vector local_hubs(graph.number_of_vertices); - std::vector local_authorities(graph.number_of_vertices); - - // Ideally: - // - //::hits(graph.number_of_vertices, graph.number_of_edges, graph.offsets, graph.indices, - // max_iter, hubs, authorities, DEVICE); + // NOTE: gunrock doesn't support passing a starting value // - // For now, the following: - - CUDA_TRY(cudaMemcpy(local_offsets.data(), - graph.offsets, - (graph.number_of_vertices + 1) * sizeof(edge_t), - cudaMemcpyDeviceToHost)); - CUDA_TRY(cudaMemcpy(local_indices.data(), - graph.indices, - graph.number_of_edges * sizeof(vertex_t), - cudaMemcpyDeviceToHost)); - ::hits(graph.number_of_vertices, graph.number_of_edges, - local_offsets.data(), - local_indices.data(), + graph.offsets, + graph.indices, max_iter, - local_hubs.data(), - local_authorities.data()); - - CUDA_TRY(cudaMemcpy( - hubs, local_hubs.data(), graph.number_of_vertices * sizeof(weight_t), cudaMemcpyHostToDevice)); - CUDA_TRY(cudaMemcpy(authorities, - local_authorities.data(), - graph.number_of_vertices * sizeof(weight_t), - cudaMemcpyHostToDevice)); + tolerance, + HITS_NORMALIZATION_METHOD_1, + hubs, + authorities, + DEVICE); } template void hits(cugraph::GraphCSRView const &, @@ -98,5 +70,4 @@ template void hits(cugraph::GraphCSRView const &, float *); } // namespace gunrock - } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 40ae7933b65..a8c789210e0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -16,10 +16,6 @@ # #============================================================================= -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) - -project(CUGRAPH_TESTS LANGUAGES C CXX CUDA) - ################################################################################################### # - compiler function ----------------------------------------------------------------------------- diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 0352637dcf0..c2ac4340319 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -69,9 +69,9 @@ void katz_centrality_reference(edge_t* offsets, result_t diff_sum{0.0}; for (vertex_t i = 0; i < num_vertices; ++i) { - diff_sum += fabs(katz_centralities[i] - old_katz_centralities[i]); + diff_sum += std::abs(katz_centralities[i] - old_katz_centralities[i]); } - if (diff_sum < static_cast(num_vertices) * epsilon) { break; } + if (diff_sum < epsilon) { break; } iter++; ASSERT_TRUE(iter < max_iterations); } @@ -164,7 +164,7 @@ class Tests_KatzCentrality : public ::testing::TestWithParam::max(), false, - false); + true); rmm::device_uvector d_katz_centralities(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -180,7 +180,7 @@ class Tests_KatzCentrality : public ::testing::TestWithParam::max(), false, - false, + true, false); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -193,7 +193,13 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(graph_view.get_number_of_vertices())) * threshold_ratio; + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + auto diff = std::abs(lhs - rhs); + return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); + }; ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), h_reference_katz_centralities.end(), diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 3fe74e279ff..8770c3d0676 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include template @@ -94,20 +95,21 @@ void pagerank_reference(edge_t* offsets, pageranks[i] += alpha * old_pageranks[nbr] * (w / out_weight_sums[nbr]); } if (personalization_vertices == nullptr) { - pageranks[i] += (dangling_sum + (1.0 - alpha)) / static_cast(num_vertices); + pageranks[i] += + (dangling_sum * alpha + (1.0 - alpha)) / static_cast(num_vertices); } } if (personalization_vertices != nullptr) { for (vertex_t i = 0; i < personalization_vector_size; ++i) { auto v = personalization_vertices[i]; - pageranks[v] += (dangling_sum + (1.0 - alpha)) * personalization_values[i]; + pageranks[v] += (dangling_sum * alpha + (1.0 - alpha)) * personalization_values[i]; } } result_t diff_sum{0.0}; for (vertex_t i = 0; i < num_vertices; ++i) { - diff_sum += fabs(pageranks[i] - old_pageranks[i]); + diff_sum += std::abs(pageranks[i] - old_pageranks[i]); } - if (diff_sum < static_cast(num_vertices) * epsilon) { break; } + if (diff_sum < epsilon) { break; } iter++; ASSERT_TRUE(iter < max_iterations); } @@ -117,10 +119,13 @@ void pagerank_reference(edge_t* offsets, typedef struct PageRank_Usecase_t { std::string graph_file_full_path{}; + double personalization_ratio{0.0}; bool test_weighted{false}; - PageRank_Usecase_t(std::string const& graph_file_path, bool test_weighted) - : test_weighted(test_weighted) + PageRank_Usecase_t(std::string const& graph_file_path, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) { if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; @@ -169,6 +174,49 @@ class Tests_PageRank : public ::testing::TestWithParam { } CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + std::vector h_personalization_vertices{}; + std::vector h_personalization_values{}; + if (configuration.personalization_ratio > 0.0) { + std::random_device r{}; + std::default_random_engine generator{r()}; + std::uniform_real_distribution distribution{0.0, 1.0}; + h_personalization_vertices.resize(graph_view.get_number_of_local_vertices()); + std::iota(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + graph_view.get_local_vertex_first()); + h_personalization_vertices.erase( + std::remove_if(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + [&generator, &distribution, configuration](auto v) { + return distribution(generator) >= configuration.personalization_ratio; + }), + h_personalization_vertices.end()); + h_personalization_values.resize(h_personalization_vertices.size()); + std::for_each(h_personalization_values.begin(), + h_personalization_values.end(), + [&distribution, &generator](auto& val) { val = distribution(generator); }); + auto sum = std::accumulate( + h_personalization_values.begin(), h_personalization_values.end(), result_t{0.0}); + std::for_each(h_personalization_values.begin(), + h_personalization_values.end(), + [sum](auto& val) { val /= sum; }); + } + + rmm::device_uvector d_personalization_vertices(h_personalization_vertices.size(), + handle.get_stream()); + rmm::device_uvector d_personalization_values(d_personalization_vertices.size(), + handle.get_stream()); + if (d_personalization_vertices.size() > 0) { + raft::update_device(d_personalization_vertices.data(), + h_personalization_vertices.data(), + h_personalization_vertices.size(), + handle.get_stream()); + raft::update_device(d_personalization_values.data(), + h_personalization_values.data(), + h_personalization_values.size(), + handle.get_stream()); + } + std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); result_t constexpr alpha{0.85}; @@ -177,11 +225,11 @@ class Tests_PageRank : public ::testing::TestWithParam { pagerank_reference(h_offsets.data(), h_indices.data(), h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), - static_cast(nullptr), - static_cast(nullptr), + h_personalization_vertices.data(), + h_personalization_values.data(), h_reference_pageranks.data(), graph_view.get_number_of_vertices(), - vertex_t{0}, + static_cast(h_personalization_vertices.size()), alpha, epsilon, std::numeric_limits::max(), @@ -195,9 +243,9 @@ class Tests_PageRank : public ::testing::TestWithParam { cugraph::experimental::pagerank(handle, graph_view, static_cast(nullptr), - static_cast(nullptr), - static_cast(nullptr), - vertex_t{0}, + d_personalization_vertices.data(), + d_personalization_values.data(), + static_cast(d_personalization_vertices.size()), d_pageranks.begin(), alpha, epsilon, @@ -213,7 +261,13 @@ class Tests_PageRank : public ::testing::TestWithParam { h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (epsilon / static_cast(graph_view.get_number_of_vertices())) * threshold_ratio; + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + auto diff = std::abs(lhs - rhs); + return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); + }; ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), h_reference_pageranks.end(), @@ -229,16 +283,24 @@ TEST_P(Tests_PageRank, CheckInt32Int32FloatFloat) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_PageRank, - ::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", false), - PageRank_Usecase("test/datasets/karate.mtx", true), - PageRank_Usecase("test/datasets/web-Google.mtx", false), - PageRank_Usecase("test/datasets/web-Google.mtx", true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", - false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", true))); +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_PageRank, + ::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/traversal/sssp_test.cu b/cpp/tests/traversal/sssp_test.cu index 5021bd620f8..ea56d1d79cb 100644 --- a/cpp/tests/traversal/sssp_test.cu +++ b/cpp/tests/traversal/sssp_test.cu @@ -425,10 +425,7 @@ TEST_P(Tests_SSSP, CheckFP64_RANDOM_DIST_PREDS) // --gtest_filter=*simple_test* -// FIXME: Enable this for 0.17. Temporarily disabled due to sporadic error hard -// to reproduce: "transform: failed to synchronize: cudaErrorIllegalAddress: an -// illegal memory access was encountered" thrown in the test body. -INSTANTIATE_TEST_CASE_P(DISABLED_simple_test, +INSTANTIATE_TEST_CASE_P(simple_test, Tests_SSSP, ::testing::Values(SSSP_Usecase(MTX, "test/datasets/dblp.mtx", 100), SSSP_Usecase(MTX, "test/datasets/wiki2003.mtx", 100000), diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index f8984f25978..feab1cc3eb9 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -30,6 +30,14 @@ Graph, DiGraph, from_cudf_edgelist, + from_pandas_edgelist, + to_pandas_edgelist, + from_pandas_adjacency, + to_pandas_adjacency, + from_numpy_array, + to_numpy_array, + from_numpy_matrix, + to_numpy_matrix, hypergraph, symmetrize, symmetrize_df, diff --git a/python/cugraph/structure/__init__.py b/python/cugraph/structure/__init__.py index 511e6773469..b8b6fbe0435 100644 --- a/python/cugraph/structure/__init__.py +++ b/python/cugraph/structure/__init__.py @@ -14,6 +14,14 @@ from cugraph.structure.graph import Graph, DiGraph from cugraph.structure.number_map import NumberMap from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf -from cugraph.structure.convert_matrix import from_cudf_edgelist +from cugraph.structure.convert_matrix import (from_cudf_edgelist, + from_pandas_edgelist, + to_pandas_edgelist, + from_pandas_adjacency, + to_pandas_adjacency, + from_numpy_array, + to_numpy_array, + from_numpy_matrix, + to_numpy_matrix) from cugraph.structure.hypergraph import hypergraph from cugraph.structure.shuffle import shuffle diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py index 56bb9086380..8acdc7e1799 100644 --- a/python/cugraph/structure/convert_matrix.py +++ b/python/cugraph/structure/convert_matrix.py @@ -65,3 +65,161 @@ def from_cudf_edgelist(df, source='source', destination='destination', edge_attr=edge_attr, renumber=renumber) return G + + +def from_pandas_edgelist(df, + source="source", + destination="destination", + edge_attr=None, + create_using=Graph, + renumber=True): + """ + Initialize a graph from the edge list. It is an error to call this + method on an initialized Graph object. Source argument is source + column name and destination argument is destination column name. + + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + + If weights are present, edge_attr argument is the weights column name. + + Parameters + ---------- + input_df : pandas.DataFrame + A DataFrame that contains edge information + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + edge_attr : str or None + the weights column name. Default is None + renumber : bool + Indicate whether or not to renumber the source and destination + vertex IDs. Default is True. + create_using: cugraph.DiGraph or cugraph.Graph + Indicate whether to create a directed or undirected graph + + Returns + ------- + G : cugraph.DiGraph or cugraph.Graph + graph containing edges from the pandas edgelist + + Examples + -------- + >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_pandas_edgelist(df, source='0', destination='1', + edge_attr='2', renumber=False) + """ + if create_using is Graph: + G = Graph() + elif create_using is DiGraph: + G = DiGraph() + else: + raise Exception("create_using supports Graph and DiGraph") + + G.from_pandas_edgelist(df, source=source, destination=destination, + edge_attr=edge_attr, renumber=renumber) + return G + + +def to_pandas_edgelist(G, source='source', destination='destination'): + """ + Returns the graph edge list as a Pandas DataFrame. + + Parameters + ---------- + G : cugraph.Graph or cugraph.DiGraph + Graph containg the edgelist. + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + + Returns + ------ + df : pandas.DataFrame + pandas dataframe containing the edgelist as source and + destination columns. + """ + pdf = G.to_pandas_edgelist(source=source, destination=destination) + return pdf + + +def from_pandas_adjacency(df, create_using=Graph): + """ + Initializes the graph from pandas adjacency matrix. + Set create_using to cugraph.DiGraph for directed graph and + cugraph.Graph for undirected Graph. + """ + if create_using is Graph: + G = Graph() + elif create_using is DiGraph: + G = DiGraph() + else: + raise Exception("create_using supports Graph and DiGraph") + + G.from_pandas_adjacency(df) + return G + + +def to_pandas_adjacency(G): + """ + Returns the graph adjacency matrix as a Pandas DataFrame. + The row indices denote source and column names denote destination. + """ + pdf = G.to_pandas_adjacency() + return pdf + + +def from_numpy_array(A, create_using=Graph): + """ + Initializes the graph from numpy array containing adjacency matrix. + Set create_using to cugraph.DiGraph for directed graph and + cugraph.Graph for undirected Graph. + """ + if create_using is Graph: + G = Graph() + elif create_using is DiGraph: + G = DiGraph() + else: + raise Exception("create_using supports Graph and DiGraph") + + G.from_numpy_array(A) + return G + + +def to_numpy_array(G): + """ + Returns the graph adjacency matrix as a NumPy array. + """ + A = G.to_numpy_array() + return A + + +def from_numpy_matrix(A, create_using=Graph): + """ + Initializes the graph from numpy matrix containing adjacency matrix. + Set create_using to cugraph.DiGraph for directed graph and + cugraph.Graph for undirected Graph. + """ + if create_using is Graph: + G = Graph() + elif create_using is DiGraph: + G = DiGraph() + else: + raise Exception("create_using supports Graph and DiGraph") + G.from_numpy_matrix(A) + return G + + +def to_numpy_matrix(G): + """ + Returns the graph adjacency matrix as a NumPy matrix. + """ + A = G.to_numpy_matrix() + return A diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index ffbf4b8ec75..748e240ad4c 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -19,7 +19,8 @@ import cudf import dask_cudf import cugraph.comms.comms as Comms - +import pandas as pd +import numpy as np from cugraph.dask.structure import replication @@ -431,6 +432,140 @@ def from_cudf_edgelist( self.renumber_map = renumber_map + def from_pandas_edgelist( + self, + pdf, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + ): + """ + Initialize a graph from the edge list. It is an error to call this + method on an initialized Graph object. Source argument is source + column name and destination argument is destination column name. + + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + + If weights are present, edge_attr argument is the weights column name. + + Parameters + ---------- + input_df : pandas.DataFrame + A DataFrame that contains edge information + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + edge_attr : str or None + the weights column name. Default is None + renumber : bool + Indicate whether or not to renumber the source and destination + vertex IDs. Default is True. + + Examples + -------- + >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_pandas_edgelist(df, source='0', destination='1', + edge_attr='2', renumber=False) + + """ + gdf = cudf.DataFrame.from_pandas(pdf) + self.from_cudf_edgelist(gdf, source=source, destination=destination, + edge_attr=edge_attr, renumber=renumber) + + def to_pandas_edgelist(self, source='source', destination='destination'): + """ + Returns the graph edge list as a Pandas DataFrame. + + Parameters + ---------- + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + + Returns + ------- + df : pandas.DataFrame + """ + + gdf = self.view_edge_list() + return gdf.to_pandas() + + def from_pandas_adjacency(self, pdf): + """ + Initializes the graph from pandas adjacency matrix + """ + np_array = pdf.to_numpy() + columns = pdf.columns + self.from_numpy_array(np_array, columns) + + def to_pandas_adjacency(self): + """ + Returns the graph adjacency matrix as a Pandas DataFrame. + """ + + np_array_data = self.to_numpy_array() + pdf = pd.DataFrame(np_array_data) + if self.renumbered: + nodes = self.renumber_map.implementation.df['0'].\ + values_host.tolist() + pdf.columns = nodes + pdf.index = nodes + return pdf + + def to_numpy_array(self): + """ + Returns the graph adjacency matrix as a NumPy array. + """ + + nlen = self.number_of_nodes() + elen = self.number_of_edges() + df = self.edgelist.edgelist_df + np_array = np.full((nlen, nlen), 0.0) + for i in range(0, elen): + np_array[df['src'].iloc[i], df['dst'].iloc[i]] = df['weights'].\ + iloc[i] + return np_array + + def to_numpy_matrix(self): + """ + Returns the graph adjacency matrix as a NumPy matrix. + """ + + np_array = self.to_numpy_array() + return np.asmatrix(np_array) + + def from_numpy_array(self, np_array, nodes=None): + """ + Initializes the graph from numpy array containing adjacency matrix. + """ + src, dst = np_array.nonzero() + weight = np_array[src, dst] + df = cudf.DataFrame() + if nodes is not None: + df['src'] = nodes[src] + df['dst'] = nodes[dst] + else: + df['src'] = src + df['dst'] = dst + df['weight'] = weight + self.from_cudf_edgelist(df, 'src', 'dst', edge_attr='weight') + + def from_numpy_matrix(self, np_matrix): + """ + Initializes the graph from numpy matrix containing adjacency matrix. + """ + np_array = np.asarray(np_matrix) + self.from_numpy_array(np_array) + def from_dask_cudf_edgelist( self, input_ddf, diff --git a/python/cugraph/tests/dask/mg_context.py b/python/cugraph/tests/dask/mg_context.py index 9a7ea2ace67..a72cf1c4b04 100644 --- a/python/cugraph/tests/dask/mg_context.py +++ b/python/cugraph/tests/dask/mg_context.py @@ -51,14 +51,27 @@ class MGContext: ----------- number_of_devices : int - Number of devices to use, verification must be done prior to call - to ensure that there are enough devices available. + Number of devices to use, verification must be done prior to call to + ensure that there are enough devices available. If not specified, the + cluster will be initialized to use all visible devices. + rmm_managed_memory : bool + True to enable managed memory (UVM) in RMM as part of the + cluster. Default is False. + p2p : bool + Initialize UCX endpoints if True. Default is False. """ - def __init__(self, number_of_devices=None, rmm_managed_memory=False): + def __init__(self, + number_of_devices=None, + rmm_managed_memory=False, + p2p=False): self._number_of_devices = number_of_devices self._rmm_managed_memory = rmm_managed_memory - self._cluster = None self._client = None + self._p2p = p2p + self._cluster = CUDACluster( + n_workers=self._number_of_devices, + rmm_managed_memory=self._rmm_managed_memory + ) @property def client(self): @@ -73,22 +86,15 @@ def __enter__(self): return self def _prepare_mg(self): - self._prepare_cluster() self._prepare_client() self._prepare_comms() - def _prepare_cluster(self): - self._cluster = CUDACluster( - n_workers=self._number_of_devices, - rmm_managed_memory=self._rmm_managed_memory - ) - def _prepare_client(self): self._client = Client(self._cluster) self._client.wait_for_workers(self._number_of_devices) def _prepare_comms(self): - Comms.initialize() + Comms.initialize(p2p=self._p2p) def _close(self): Comms.destroy() diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index 53942a277c2..4d04bf6df85 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -68,7 +68,8 @@ def test_mg_betweenness_centrality( ): prepare_test() skip_if_not_enough_devices(mg_device_count) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): sorted_df = calc_betweenness_centrality( graph_file, directed=directed, diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py index 7778f7bf421..1e4a1950c53 100644 --- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py @@ -13,8 +13,8 @@ import pytest import numpy as np -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices @@ -64,7 +64,8 @@ def test_mg_edge_betweenness_centrality( ): prepare_test() skip_if_not_enough_devices(mg_device_count) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): sorted_df = calc_edge_betweenness_centrality( graph_file, directed=directed, diff --git a/python/cugraph/tests/dask/test_mg_comms.py b/python/cugraph/tests/dask/test_mg_comms.py index cd94f945f93..29789461018 100644 --- a/python/cugraph/tests/dask/test_mg_comms.py +++ b/python/cugraph/tests/dask/test_mg_comms.py @@ -27,7 +27,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client @@ -62,7 +62,7 @@ def test_dask_pagerank(client_connection): dg1 = cugraph.DiGraph() dg1.from_dask_cudf_edgelist(ddf1, "src", "dst") - result_pr1 = dcg.pagerank(dg1) + result_pr1 = dcg.pagerank(dg1).compute() ddf2 = dask_cudf.read_csv( input_data_path2, @@ -75,7 +75,7 @@ def test_dask_pagerank(client_connection): dg2 = cugraph.DiGraph() dg2.from_dask_cudf_edgelist(ddf2, "src", "dst") - result_pr2 = dcg.pagerank(dg2) + result_pr2 = dcg.pagerank(dg2).compute() # Calculate single GPU pagerank for verification of results df1 = cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py index a903f69d05a..a6600104bc8 100644 --- a/python/cugraph/tests/dask/test_mg_degree.py +++ b/python/cugraph/tests/dask/test_mg_degree.py @@ -28,7 +28,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py index b981a49a0de..8456241ff26 100644 --- a/python/cugraph/tests/dask/test_mg_renumber.py +++ b/python/cugraph/tests/dask/test_mg_renumber.py @@ -36,7 +36,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client @@ -199,7 +199,7 @@ def test_dask_pagerank(client_connection): # dg.compute_local_data(by='dst') expected_pr = cugraph.pagerank(g) - result_pr = dcg.pagerank(dg) + result_pr = dcg.pagerank(dg).compute() err = 0 tol = 1.0e-05 diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index d8a2676b32b..2b8510cd9ff 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -11,14 +11,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest +import gc + +import cudf + import cugraph from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices -import cudf import cugraph.dask.structure.replication as replication from cugraph.dask.common.mg_utils import is_single_gpu import cugraph.tests.utils as utils -import pytest -import gc DATASETS_OPTIONS = utils.DATASETS_SMALL DIRECTED_GRAPH_OPTIONS = [False, True] @@ -42,7 +44,8 @@ def test_replicate_cudf_dataframe_with_weights( names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() @@ -65,7 +68,8 @@ def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): names=["src", "dst"], dtype=["int32", "int32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() @@ -88,7 +92,8 @@ def test_replicate_cudf_series(input_data_path, mg_device_count): names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) @@ -146,7 +151,8 @@ def test_enable_batch_context_then_views( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -181,7 +187,8 @@ def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): G.view_transposed_adj_list() assert G.batch_transposed_adjlists is None - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -204,7 +211,8 @@ def test_enable_batch_context_no_context_views( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -228,7 +236,8 @@ def test_enable_batch_edgelist_replication( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): G.enable_batch() df = G.edgelist.edgelist_df for worker in G.batch_edgelists: @@ -257,7 +266,8 @@ def test_enable_batch_adjlist_replication_weights( G.from_cudf_edgelist( df, source="src", destination="dst", edge_attr="value" ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): G.enable_batch() G.view_adj_list() adjlist = G.adjlist @@ -296,7 +306,8 @@ def test_enable_batch_adjlist_replication_no_weights( ) G = cugraph.DiGraph() if directed else cugraph.Graph() G.from_cudf_edgelist(df, source="src", destination="dst") - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): G.enable_batch() G.view_adj_list() adjlist = G.adjlist diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index f1becb051ad..e802a65c37f 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -32,7 +32,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client diff --git a/python/cugraph/tests/test_convert_matrix.py b/python/cugraph/tests/test_convert_matrix.py new file mode 100644 index 00000000000..29e64f700f4 --- /dev/null +++ b/python/cugraph/tests/test_convert_matrix.py @@ -0,0 +1,147 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import pytest +import cugraph +from cugraph.tests import utils +import numpy as np + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_to_from_pandas(graph_file): + gc.collect() + + # Read in the graph + M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + + # create a NetworkX DiGraph and convert to pandas adjacency + nxG = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph + ) + nx_pdf = nx.to_pandas_adjacency(nxG) + nx_pdf = nx_pdf[sorted(nx_pdf.columns)] + nx_pdf.sort_index(inplace=True) + + # create a cugraph DiGraph and convert to pandas adjacency + cuG = cugraph.from_pandas_edgelist( + M, source="0", destination="1", edge_attr="weight", + create_using=cugraph.DiGraph + ) + + cu_pdf = cugraph.to_pandas_adjacency(cuG) + cu_pdf = cu_pdf[sorted(cu_pdf.columns)] + cu_pdf.sort_index(inplace=True) + + # Compare pandas adjacency list + assert nx_pdf.equals(cu_pdf) + + # Convert pandas adjacency list to graph + new_nxG = nx.from_pandas_adjacency(nx_pdf, create_using=nx.DiGraph) + new_cuG = cugraph.from_pandas_adjacency(cu_pdf, + create_using=cugraph.DiGraph) + + # Compare pandas edgelist + exp_pdf = nx.to_pandas_edgelist(new_nxG) + res_pdf = cugraph.to_pandas_edgelist(new_cuG) + + exp_pdf = exp_pdf.rename(columns={"source": "src", "target": "dst", + "weight": "weights"}) + + exp_pdf = exp_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf[['src', 'dst', 'weights']] + + assert exp_pdf.equals(res_pdf) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_from_to_numpy(graph_file): + gc.collect() + + # Read in the graph + M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + + # create NetworkX and cugraph DiGraph + nxG = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph + ) + + cuG = cugraph.from_pandas_edgelist( + M, source="0", destination="1", edge_attr="weight", + create_using=cugraph.DiGraph + ) + + # convert graphs to numpy array + nparray_nx = nx.to_numpy_array(nxG, + nodelist=cuG.nodes().values_host) + nparray_cu = cugraph.to_numpy_array(cuG) + npmatrix_nx = nx.to_numpy_matrix(nxG, + nodelist=cuG.nodes().values_host) + npmatrix_cu = cugraph.to_numpy_matrix(cuG) + + # Compare arrays and matrices + assert np.array_equal(nparray_nx, nparray_cu) + assert np.array_equal(np.asarray(npmatrix_nx), + np.asarray(npmatrix_cu)) + + # Create graphs from numpy array + new_nxG = nx.from_numpy_array(nparray_nx, + create_using=nx.DiGraph) + new_cuG = cugraph.from_numpy_array(nparray_cu, + create_using=cugraph.DiGraph) + + # Assert graphs are same + exp_pdf = nx.to_pandas_edgelist(new_nxG) + res_pdf = cugraph.to_pandas_edgelist(new_cuG) + + exp_pdf = exp_pdf.rename(columns={"source": "src", "target": "dst", + "weight": "weights"}) + + exp_pdf = exp_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf[['src', 'dst', 'weights']] + + assert exp_pdf.equals(res_pdf) + + # Create graphs from numpy matrix + new_nxG = nx.from_numpy_matrix(npmatrix_nx, + create_using=nx.DiGraph) + new_cuG = cugraph.from_numpy_matrix(npmatrix_cu, + create_using=cugraph.DiGraph) + + # Assert graphs are same + exp_pdf = nx.to_pandas_edgelist(new_nxG) + res_pdf = cugraph.to_pandas_edgelist(new_cuG) + + exp_pdf = exp_pdf.rename(columns={"source": "src", "target": "dst", + "weight": "weights"}) + + exp_pdf = exp_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf[['src', 'dst', 'weights']] + + assert exp_pdf.equals(res_pdf) diff --git a/python/cugraph/tests/test_hits.py b/python/cugraph/tests/test_hits.py index 30b6f20f478..6b6f54937a6 100644 --- a/python/cugraph/tests/test_hits.py +++ b/python/cugraph/tests/test_hits.py @@ -13,7 +13,6 @@ import gc import time -import numpy as np import pandas as pd import pytest @@ -37,16 +36,6 @@ print("Networkx version : {} ".format(nx.__version__)) -def cudify(d): - if d is None: - return None - - k = np.fromiter(d.keys(), dtype="int32") - v = np.fromiter(d.values(), dtype="float32") - cuD = cudf.DataFrame({"vertex": k, "values": v}) - return cuD - - def cugraph_call(cu_M, max_iter, tol): # cugraph hits Call @@ -78,21 +67,18 @@ def networkx_call(M, max_iter, tol): ) # same parameters as in NVGRAPH - pr = nx.hits(Gnx, max_iter, tol, normalized=True) + nx_hits = nx.hits(Gnx, max_iter, tol, normalized=True) t2 = time.time() - t1 print("Networkx Time : " + str(t2)) - return pr + return nx_hits MAX_ITERATIONS = [50] TOLERANCE = [1.0e-06] -# Test all combinations of default/managed and pooled/non-pooled allocation - - @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -105,52 +91,20 @@ def test_hits(graph_file, max_iter, tol): cu_M = utils.read_csv_file(graph_file) cugraph_hits = cugraph_call(cu_M, max_iter, tol) - # Calculating mismatch - # hubs = sorted(hubs.items(), key=lambda x: x[0]) - # print("hubs = ", hubs) - - # - # Scores don't match. Networkx uses the 1-norm, - # gunrock uses a 2-norm. Eventually we'll add that - # as a parameter. For now, let's check the order - # which should match. We'll allow 6 digits to right - # of decimal point accuracy - # pdf = pd.DataFrame.from_dict(hubs, orient="index").sort_index() - pdf = pdf.multiply(1000000).floordiv(1) cugraph_hits["nx_hubs"] = cudf.Series.from_pandas(pdf[0]) pdf = pd.DataFrame.from_dict(authorities, orient="index").sort_index() - pdf = pdf.multiply(1000000).floordiv(1) cugraph_hits["nx_authorities"] = cudf.Series.from_pandas(pdf[0]) - # - # Sort by hubs (cugraph) in descending order. Then we'll - # check to make sure all scores are in descending order. - # - cugraph_hits = cugraph_hits.sort_values("hubs", ascending=False) - - assert cugraph_hits["hubs"].is_monotonic_decreasing - assert cugraph_hits["nx_hubs"].is_monotonic_decreasing - - cugraph_hits = cugraph_hits.sort_values("authorities", ascending=False) - - assert cugraph_hits["authorities"].is_monotonic_decreasing - assert cugraph_hits["nx_authorities"].is_monotonic_decreasing - - -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) -@pytest.mark.parametrize("max_iter", MAX_ITERATIONS) -@pytest.mark.parametrize("tol", TOLERANCE) -def test_hits_nx(graph_file, max_iter, tol): - gc.collect() - - M = utils.read_csv_for_nx(graph_file) - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", create_using=nx.DiGraph() - ) - nx_hubs, nx_authorities = nx.hits(Gnx, max_iter, tol, normalized=True) - cg_hubs, cg_authorities = cugraph.hits(Gnx, max_iter, tol, normalized=True) - - # assert nx_hubs == cg_hubs - # assert nx_authorities == cg_authorities + hubs_diffs1 = cugraph_hits.query('hubs - nx_hubs > 0.00001') + hubs_diffs2 = cugraph_hits.query('hubs - nx_hubs < -0.00001') + authorities_diffs1 = cugraph_hits.query( + 'authorities - nx_authorities > 0.0001') + authorities_diffs2 = cugraph_hits.query( + 'authorities - nx_authorities < -0.0001') + + assert len(hubs_diffs1) == 0 + assert len(hubs_diffs2) == 0 + assert len(authorities_diffs1) == 0 + assert len(authorities_diffs2) == 0