From 74d3b27870ccdb1e05efd435aecbc274c3edfaf7 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Thu, 15 Oct 2020 18:15:56 -0400 Subject: [PATCH 01/41] Pin cmake policies to 3.17 --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 2 +- cpp/tests/CMakeLists.txt | 4 ---- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6a796d13a68..a26d6079fc0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## New Features ## Improvements +- PR #XXXX Pin cmake policies to cmake 3.17 version ## Bug Fixes diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f8f34170c8d..1462514ac20 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -14,7 +14,7 @@ # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) +cmake_minimum_required(VERSION 3.12..3.17 FATAL_ERROR) project(CUGRAPH VERSION 0.17.0 LANGUAGES C CXX CUDA) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ac3a27c7b77..fa3a6847b82 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -16,10 +16,6 @@ # #============================================================================= -cmake_minimum_required(VERSION 3.12 FATAL_ERROR) - -project(CUGRAPH_TESTS LANGUAGES C CXX CUDA) - ################################################################################################### # - compiler function ----------------------------------------------------------------------------- From d670da7b44f3dd2ea58b5eec9f97e28637581f10 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Thu, 15 Oct 2020 18:17:47 -0400 Subject: [PATCH 02/41] changelog # --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a26d6079fc0..6ae499adc4a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,7 +3,7 @@ ## New Features ## Improvements -- PR #XXXX Pin cmake policies to cmake 3.17 version +- PR #1227 Pin cmake policies to cmake 3.17 version ## Bug Fixes From 32795ec53c4a5060cc2240432b7e2a9b4e9065fa Mon Sep 17 00:00:00 2001 From: dillon-cullinan Date: Mon, 19 Oct 2020 12:46:42 -0700 Subject: [PATCH 03/41] ENH Improve gpuCI scripts --- ci/benchmark/build.sh | 53 ++++++++++++---------- ci/cpu/build.sh | 63 ++++++++++++++++----------- ci/cpu/cugraph/build_cugraph.sh | 25 ----------- ci/cpu/cugraph/upload-anaconda.sh | 50 --------------------- ci/cpu/libcugraph/build_libcugraph.sh | 25 ----------- ci/cpu/libcugraph/upload-anaconda.sh | 43 ------------------ ci/cpu/upload.sh | 54 +++++++++++++++++++++++ ci/docs/build.sh | 24 ++++++---- ci/gpu/build.sh | 49 ++++++++++----------- conda/recipes/libcugraph/build.sh | 8 ---- 10 files changed, 159 insertions(+), 235 deletions(-) delete mode 100755 ci/cpu/cugraph/build_cugraph.sh delete mode 100755 ci/cpu/cugraph/upload-anaconda.sh delete mode 100755 ci/cpu/libcugraph/build_libcugraph.sh delete mode 100755 ci/cpu/libcugraph/upload-anaconda.sh create mode 100644 ci/cpu/upload.sh diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index 49a6362a904..66cbc47fde9 100644 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -9,16 +9,12 @@ set -o pipefail NUMARGS=$# ARGS=$* -function logger { - echo -e "\n>>>> $@\n" -} - function hasArg { (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") } function cleanup { - logger "Removing datasets and temp files..." + gpuci_logger "Removing datasets and temp files" rm -rf $WORKSPACE/datasets/test rm -rf $WORKSPACE/datasets/benchmark rm -f testoutput.txt @@ -26,19 +22,27 @@ function cleanup { # Set cleanup trap for Jenkins if [ ! -z "$JENKINS_HOME" ] ; then - logger "Jenkins environment detected, setting cleanup trap..." + gpuci_logger "Jenkins environment detected, setting cleanup trap" trap cleanup EXIT fi # Set path, build parallel level, and CUDA version cd $WORKSPACE -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} export CUDA_REL=${CUDA_VERSION%.*} + +# Set home export HOME=$WORKSPACE + +# Parse git describe export GIT_DESCRIBE_TAG=`git describe --tags` export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` +# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts) +export GPUCI_CONDA_RETRY_MAX=1 +export GPUCI_CONDA_RETRY_SLEEP=30 + # Set Benchmark Vars export DATASETS_DIR=${WORKSPACE}/datasets export BENCHMARKS_DIR=${WORKSPACE}/benchmarks @@ -49,22 +53,22 @@ export BENCHMARKS_DIR=${WORKSPACE}/benchmarks # TODO: Delete build section when artifacts are available -logger "Check environment..." +gpuci_logger "Check environment" env -logger "Check GPU usage..." +gpuci_logger "Check GPU usage" nvidia-smi -logger "Activate conda env..." -source activate rapids - +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids # Enter dependencies to be shown in ASV tooltips. CUGRAPH_DEPS=(cudf rmm) LIBCUGRAPH_DEPS=(cudf rmm) -logger "conda install required packages" -conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ +gpuci_logger "Install required packages" +gpuci_conda_retry install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ "cudf=${MINOR_VERSION}" \ "rmm=${MINOR_VERSION}" \ "cudatoolkit=$CUDA_REL" \ @@ -74,31 +78,32 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul "rapids-build-env=${MINOR_VERSION}" \ rapids-pytest-benchmark -# Install the master version of dask and distributed -logger "pip install git+https://github.com/dask/distributed.git --upgrade --no-deps" +gpuci_logger "Install the master version of dask and distributed" pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps - -logger "pip install git+https://github.com/dask/dask.git --upgrade --no-deps" pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps -logger "Check versions..." +gpuci_logger "Check versions" python --version $CC --version $CXX --version -conda list + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls ########################################## # Build cuGraph # ########################################## -logger "Build libcugraph..." +gpuci_logger "Build libcugraph" $WORKSPACE/build.sh clean libcugraph cugraph ########################################## # Run Benchmarks # ########################################## -logger "Downloading Datasets for Benchmarks..." +gpuci_logger "Downloading Datasets for Benchmarks" cd $DATASETS_DIR bash ./get_test_data.sh --benchmark ERRORCODE=$((ERRORCODE | $?)) @@ -148,7 +153,7 @@ BENCHMARK_META=$(jq -n \ echo "Benchmark meta:" echo "${BENCHMARK_META}" | jq "." -logger "Running Benchmarks..." +gpuci_logger "Running Benchmarks" cd $BENCHMARKS_DIR set +e time pytest -v -m "small and managedmem_on and poolallocator_on" \ diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 2cdb77bbbc2..2c6dc899be2 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -5,14 +5,9 @@ ######################################### set -e -# Logger function for build status output -function logger() { - echo -e "\n>>>> $@\n" -} - # Set path and build parallel level -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} # Set home to the job's workspace export HOME=$WORKSPACE @@ -25,41 +20,59 @@ if [[ "$BUILD_MODE" = "branch" && "$SOURCE_BRANCH" = branch-* ]] ; then export VERSION_SUFFIX=`date +%y%m%d` fi +# Setup 'gpuci_conda_retry' for build retries (results in 2 total attempts) +export GPUCI_CONDA_RETRY_MAX=1 +export GPUCI_CONDA_RETRY_SLEEP=30 + ################################################################################ # SETUP - Check environment ################################################################################ -logger "Get env..." +gpuci_logger "Check environment variables" env -logger "Activate conda env..." -source activate gdf +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids -logger "Check versions..." +gpuci_logger "Check versions" python --version -gcc --version -g++ --version -conda list +$CC --version +$CXX --version + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls # FIX Added to deal with Anancoda SSL verification issues during conda builds conda config --set ssl_verify False ############################################################################### -# BUILD - Conda package builds (conda deps: libcugraph <- cugraph) -################################################################################ +# BUILD - Conda package builds +############################################################################### -logger "Build conda pkg for libcugraph..." -source ci/cpu/libcugraph/build_libcugraph.sh +gpuci_logger "Build conda pkg for libcugraph" +if [ "$BUILD_LIBCUGRAPH" == '1' ]; then + if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then + conda build conda/recipes/libcugraph + else + conda build --dirty --no-remove-work-dir conda/recipes/libcugraph + fi +fi -logger "Build conda pkg for cugraph..." -source ci/cpu/cugraph/build_cugraph.sh +gpuci_logger "Build conda pkg for cugraph" +if [ "$BUILD_CUGRAPH" == "1" ]; then + if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then + conda build conda/recipes/cugraph --python=$PYTHON + else + conda build conda/recipes/cugraph -c ci/artifacts/cugraph/cpu/conda-bld/ --dirty --no-remove-work-dir --python=$PYTHON + fi +fi ################################################################################ # UPLOAD - Conda packages ################################################################################ -logger "Upload libcugraph conda pkg..." -source ci/cpu/libcugraph/upload-anaconda.sh - -logger "Upload cugraph conda pkg..." -source ci/cpu/cugraph/upload-anaconda.sh +gpuci_logger "Upload conda packages" +source ci/cpu/upload.sh diff --git a/ci/cpu/cugraph/build_cugraph.sh b/ci/cpu/cugraph/build_cugraph.sh deleted file mode 100755 index 70f5baee230..00000000000 --- a/ci/cpu/cugraph/build_cugraph.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [ "$BUILD_CUGRAPH" == "1" ]; then - echo "Building cugraph" - CUDA_REL=${CUDA_VERSION%.*} - if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - conda build conda/recipes/cugraph --python=$PYTHON - else - conda build conda/recipes/cugraph -c ci/artifacts/cugraph/cpu/conda-bld/ --dirty --no-remove-work-dir --python=$PYTHON - fi -fi diff --git a/ci/cpu/cugraph/upload-anaconda.sh b/ci/cpu/cugraph/upload-anaconda.sh deleted file mode 100755 index 9601905d6c4..00000000000 --- a/ci/cpu/cugraph/upload-anaconda.sh +++ /dev/null @@ -1,50 +0,0 @@ -#!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [[ "$BUILD_CUGRAPH" == "1" && "$UPLOAD_CUGRAPH" == "1" ]]; then - export UPLOADFILE=`conda build conda/recipes/cugraph -c rapidsai -c nvidia -c numba -c conda-forge -c defaults --python=$PYTHON --output` - - - # Have to label all CUDA versions due to the compatibility to work with any CUDA - if [ "$LABEL_MAIN" == "1" ]; then - LABEL_OPTION="--label main" - elif [ "$LABEL_MAIN" == "0" ]; then - LABEL_OPTION="--label dev" - else - echo "Unknown label configuration LABEL_MAIN='$LABEL_MAIN'" - exit 1 - fi - echo "LABEL_OPTION=${LABEL_OPTION}" - - test -e ${UPLOADFILE} - - if [ ${BUILD_MODE} != "branch" ]; then - echo "Skipping upload" - return 0 - fi - - if [ -z "$MY_UPLOAD_KEY" ]; then - echo "No upload key" - return 0 - fi - - echo "Upload" - echo ${UPLOADFILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${UPLOADFILE} -else - echo "Skipping cugraph upload" - return 0 -fi diff --git a/ci/cpu/libcugraph/build_libcugraph.sh b/ci/cpu/libcugraph/build_libcugraph.sh deleted file mode 100755 index e5ff77d7db9..00000000000 --- a/ci/cpu/libcugraph/build_libcugraph.sh +++ /dev/null @@ -1,25 +0,0 @@ -#!/usr/bin/env bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [ "$BUILD_LIBCUGRAPH" == '1' ]; then - echo "Building libcugraph" - CUDA_REL=${CUDA_VERSION%.*} - if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - conda build conda/recipes/libcugraph - else - conda build --dirty --no-remove-work-dir conda/recipes/libcugraph - fi -fi diff --git a/ci/cpu/libcugraph/upload-anaconda.sh b/ci/cpu/libcugraph/upload-anaconda.sh deleted file mode 100755 index 8cd71070778..00000000000 --- a/ci/cpu/libcugraph/upload-anaconda.sh +++ /dev/null @@ -1,43 +0,0 @@ -#!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -set -e - -if [[ "$BUILD_LIBCUGRAPH" == "1" && "$UPLOAD_LIBCUGRAPH" == "1" ]]; then - CUDA_REL=${CUDA_VERSION%.*} - - export UPLOADFILE=`conda build conda/recipes/libcugraph --output` - - - LABEL_OPTION="--label main" - echo "LABEL_OPTION=${LABEL_OPTION}" - - test -e ${UPLOADFILE} - - if [ ${BUILD_MODE} != "branch" ]; then - echo "Skipping upload" - return 0 - fi - - if [ -z "$MY_UPLOAD_KEY" ]; then - echo "No upload key" - return 0 - fi - - echo "Upload" - echo ${UPLOADFILE} - anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${UPLOADFILE} -else - echo "Skipping libcugraph upload" -fi diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh new file mode 100644 index 00000000000..ff10072ee3c --- /dev/null +++ b/ci/cpu/upload.sh @@ -0,0 +1,54 @@ +#!/bin/bash +# +# Adopted from https://github.com/tmcdonell/travis-scripts/blob/dfaac280ac2082cd6bcaba3217428347899f2975/update-accelerate-buildbot.sh + +set -e + +# Setup 'gpuci_retry' for upload retries (results in 4 total attempts) +export GPUCI_RETRY_MAX=3 +export GPUCI_RETRY_SLEEP=30 + +# Set default label options if they are not defined elsewhere +export LABEL_OPTION=${LABEL_OPTION:-"--label main"} + +# Skip uploads unless BUILD_MODE == "branch" +if [ ${BUILD_MODE} != "branch" ]; then + echo "Skipping upload" + return 0 +fi + +# Skip uploads if there is no upload key +if [ -z "$MY_UPLOAD_KEY" ]; then + echo "No upload key" + return 0 +fi + +################################################################################ +# SETUP - Get conda file output locations +################################################################################ + +gpuci_logger "Get conda file output locations" + +export LIBCUGRAPH_FILE=`conda build conda/recipes/libcugraph --output` +export CUGRAPH_FILE=`conda build conda/recipes/cugraph --python=$PYTHON --output` + +################################################################################ +# UPLOAD - Conda packages +################################################################################ + +gpuci_logger "Starting conda uploads" + +if [ "$BUILD_LIBCUGRAPH" == "1" && "$UPLOAD_LIBCUGRAPH" == "1" ]; then + test -e ${LIBCUGRAPH_FILE} + echo "Upload libcugraph" + echo ${LIBCUGRAPH_FILE} + gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUGRAPH_FILE} +fi + +if [ "$BUILD_CUGRAPH" == "1" && "$UPLOAD_CUGRAPH" == "1" ]; then + test -e ${CUGRAPH_FILE} + echo "Upload cugraph" + echo ${CUGRAPH_FILE} + gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${CUGRAPH_FILE} +fi + diff --git a/ci/docs/build.sh b/ci/docs/build.sh index 71ad79419a0..6ce223d8b2b 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -18,32 +18,38 @@ export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" export NIGHTLY_VERSION=$(echo $BRANCH_VERSION | awk -F. '{print $2}') export PROJECTS=(cugraph libcugraph) -logger "Check environment..." +gpuci_logger "Check environment" env -logger "Check GPU usage..." +gpuci_logger "Check GPU usage" nvidia-smi -logger "Activate conda env..." -source activate rapids +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids + # TODO: Move installs to docs-build-env meta package -conda install -c anaconda markdown beautifulsoup4 jq +gpuci_conda_retry install -c anaconda markdown beautifulsoup4 jq pip install sphinx-markdown-tables -logger "Check versions..." +gpuci_logger "Check versions" python --version $CC --version $CXX --version -conda list + +gpuci_logger "Show conda info" +conda info +conda config --show-sources +conda list --show-channel-urls # Build Doxygen docs -logger "Build Doxygen docs..." +gpuci_logger "Build Doxygen docs" cd $PROJECT_WORKSPACE/cpp/build make docs_cugraph # Build Python docs -logger "Build Sphinx docs..." +gpuci_logger "Build Sphinx docs" cd $PROJECT_WORKSPACE/docs make html diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 0dab0437be7..c983acbfe43 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -8,16 +8,12 @@ set -o pipefail NUMARGS=$# ARGS=$* -function logger { - echo -e "\n>>>> $@\n" -} - function hasArg { (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") } function cleanup { - logger "Removing datasets and temp files..." + gpuci_logger "Removing datasets and temp files" rm -rf $WORKSPACE/datasets/test rm -rf $WORKSPACE/datasets/benchmark rm -f testoutput.txt @@ -25,13 +21,13 @@ function cleanup { # Set cleanup trap for Jenkins if [ ! -z "$JENKINS_HOME" ] ; then - logger "Jenkins environment detected, setting cleanup trap..." + gpuci_logger "Jenkins environment detected, setting cleanup trap" trap cleanup EXIT fi # Set path, build parallel level, and CUDA version -export PATH=/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=4 +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} export CUDA_REL=${CUDA_VERSION%.*} # Set home to the job's workspace @@ -46,17 +42,18 @@ export MINOR_VERSION=`echo $GIT_DESCRIBE_TAG | grep -o -E '([0-9]+\.[0-9]+)'` # SETUP - Check environment ################################################################################ -logger "Check environment..." +gpuci_logger "Check environment" env -logger "Check GPU usage..." +gpuci_logger "Check GPU usage" nvidia-smi -logger "Activate conda env..." -source activate rapids +gpuci_logger "Activate conda env" +. /opt/conda/etc/profile.d/conda.sh +conda activate rapids -logger "conda install required packages" -conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ +gpuci_logger "Install dependencies" +gpuci_conda_retry install -y \ "libcudf=${MINOR_VERSION}" \ "cudf=${MINOR_VERSION}" \ "librmm=${MINOR_VERSION}" \ @@ -70,29 +67,29 @@ conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaul rapids-pytest-benchmark # https://docs.rapids.ai/maintainers/depmgmt/ -# conda remove --force rapids-build-env rapids-notebook-env -# conda install "your-pkg=1.0.0" +# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env +# gpuci_conda_retry install -y "your-pkg=1.0.0" -# Install the master version of dask and distributed -logger "pip install git+https://github.com/dask/distributed.git --upgrade --no-deps" +gpuci_logger "Install the master version of dask and distributed" pip install "git+https://github.com/dask/distributed.git" --upgrade --no-deps - -logger "pip install git+https://github.com/dask/dask.git --upgrade --no-deps" pip install "git+https://github.com/dask/dask.git" --upgrade --no-deps - -logger "Check versions..." +gpuci_logger "Check versions" python --version $CC --version $CXX --version -conda list + +gpuci_logger "Check conda environment" +conda info +conda config --show-sources +conda list --show-channel-urls ################################################################################ # BUILD - Build libcugraph and cuGraph from source ################################################################################ if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then - logger "Build libcugraph..." + gpuci_logger "Build from source" $WORKSPACE/build.sh clean libcugraph cugraph fi @@ -105,9 +102,9 @@ EXITCODE=0 trap "EXITCODE=1" ERR if hasArg --skip-tests; then - logger "Skipping Tests..." + gpuci_logger "Skipping Tests" else - logger "Check GPU usage..." + gpuci_logger "Check GPU usage" nvidia-smi # If this is a PR build, skip downloading large datasets and don't run the diff --git a/conda/recipes/libcugraph/build.sh b/conda/recipes/libcugraph/build.sh index ae73202ca35..6051b6eee41 100644 --- a/conda/recipes/libcugraph/build.sh +++ b/conda/recipes/libcugraph/build.sh @@ -1,12 +1,4 @@ #!/usr/bin/env bash # This assumes the script is executed from the root of the repo directory - -# show environment -printenv -# Cleanup local git -if [ -d .git ]; then - git clean -xdf -fi - ./build.sh libcugraph -v From 38a8088bf82802cd4426529349ec0ee57fde48da Mon Sep 17 00:00:00 2001 From: dillon-cullinan Date: Mon, 19 Oct 2020 13:25:37 -0700 Subject: [PATCH 04/41] DOC Changelog update --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f305e30fe1..aae1085c260 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,7 +36,7 @@ - PR #1222 Added min CUDA version check to MG Louvain - PR #1217 NetworkX Transition doc - PR #1223 Update mnmg docs - +- PR #1230 Improve gpuCI scripts ## Bug Fixes - PR #1131 Show style checker errors with set +e From f6382fbd38c90023874221c59a54ff57e20e2cc2 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Tue, 20 Oct 2020 10:06:53 -0500 Subject: [PATCH 05/41] [REVIEW] BUG Temporarily disabling C++ tests for 0.16 (#1233) * Temporarily disabling all C++ tests for 0.16 due to intermittent failures from what appears to be an issue with Thrust (which does not appear to affect the Python API or notebooks). These will be re-enabled once this issue is resolved in 0.17. * Added PR 1233 to CHANGELOG.md Co-authored-by: Rick Ratzel --- CHANGELOG.md | 3 +-- ci/test.sh | 16 ++++++++++------ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f305e30fe1..5d8cb958e52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -37,7 +37,6 @@ - PR #1217 NetworkX Transition doc - PR #1223 Update mnmg docs - ## Bug Fixes - PR #1131 Show style checker errors with set +e - PR #1150 Update RAFT git tag @@ -53,7 +52,7 @@ - PR #1196 Move subcomms init outside of individual algorithm functions - PR #1198 Remove deprecated call to from_gpu_matrix - PR #1174 Fix bugs in MNMG pattern accelerators and pattern accelerator based implementations of MNMG PageRank, BFS, and SSSP - +- PR #1233 Temporarily disabling C++ tests for 0.16 # cuGraph 0.15.0 (26 Aug 2020) diff --git a/ci/test.sh b/ci/test.sh index fde9bbb3d8d..3655759d171 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -64,12 +64,16 @@ else cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build fi -for gt in gtests/*; do - test_name=$(basename $gt) - echo "Running GoogleTest $test_name" - ${gt} ${GTEST_FILTER} ${GTEST_ARGS} - ERRORCODE=$((ERRORCODE | $?)) -done +# FIXME: temporarily disabling all C++ tests for 0.16 due to intermittent +# failures from what appears to be an issue with Thrust (which does not appear +# to affect the Python API or notebooks). Re-enable once this issue is resolved +# in 0.17. +# for gt in gtests/*; do +# test_name=$(basename $gt) +# echo "Running GoogleTest $test_name" +# ${gt} ${GTEST_FILTER} ${GTEST_ARGS} +# ERRORCODE=$((ERRORCODE | $?)) +# done if [[ "$PROJECT_FLASH" == "1" ]]; then echo "Installing libcugraph..." From b2c09a626eebd79ff04cd13339c8b9f996bb52b5 Mon Sep 17 00:00:00 2001 From: dillon-cullinan Date: Tue, 20 Oct 2020 10:25:05 -0700 Subject: [PATCH 06/41] FIX Move path export above trap definition --- ci/gpu/build.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index c983acbfe43..60279dd5541 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -12,6 +12,11 @@ function hasArg { (( ${NUMARGS} != 0 )) && (echo " ${ARGS} " | grep -q " $1 ") } +# Set path, build parallel level, and CUDA version +export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH +export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} +export CUDA_REL=${CUDA_VERSION%.*} + function cleanup { gpuci_logger "Removing datasets and temp files" rm -rf $WORKSPACE/datasets/test @@ -25,11 +30,6 @@ if [ ! -z "$JENKINS_HOME" ] ; then trap cleanup EXIT fi -# Set path, build parallel level, and CUDA version -export PATH=/opt/conda/bin:/usr/local/cuda/bin:$PATH -export PARALLEL_LEVEL=${PARALLEL_LEVEL:-4} -export CUDA_REL=${CUDA_VERSION%.*} - # Set home to the job's workspace export HOME=$WORKSPACE From a98b99bcbff9b9c376cfe606429b20f0d7488b4d Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 21 Oct 2020 13:27:20 -0400 Subject: [PATCH 07/41] bug fix in personalized PageRank --- cpp/src/experimental/pagerank.cu | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 4084695deb1..67a8bbd2460 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -250,6 +250,7 @@ void pagerank(raft::handle_t const& handle, auto val_first = thrust::make_zip_iterator( thrust::make_tuple(personalization_vertices, personalization_values)); thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), val_first, val_first + personalization_vector_size, [vertex_partition, pageranks, dangling_sum, personalization_sum, alpha] __device__( From f17b20a53edcf3571f3851e69268de790700ad44 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 21 Oct 2020 14:08:32 -0400 Subject: [PATCH 08/41] bug fix in the reference (host) pagerank implementation --- cpp/tests/experimental/pagerank_test.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 3fe74e279ff..c5f133a5f76 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -94,13 +94,13 @@ void pagerank_reference(edge_t* offsets, pageranks[i] += alpha * old_pageranks[nbr] * (w / out_weight_sums[nbr]); } if (personalization_vertices == nullptr) { - pageranks[i] += (dangling_sum + (1.0 - alpha)) / static_cast(num_vertices); + pageranks[i] += (dangling_sum * alpha + (1.0 - alpha)) / static_cast(num_vertices); } } if (personalization_vertices != nullptr) { for (vertex_t i = 0; i < personalization_vector_size; ++i) { auto v = personalization_vertices[i]; - pageranks[v] += (dangling_sum + (1.0 - alpha)) * personalization_values[i]; + pageranks[v] += (dangling_sum * alpha + (1.0 - alpha)) * personalization_values[i]; } } result_t diff_sum{0.0}; From b9e1cfbb89ef8d694d67b621acdc7c0a0b3333a2 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 21 Oct 2020 14:43:31 -0400 Subject: [PATCH 09/41] bug fix in MG personalized pagerank (when the aggregate personalization vector size > 0 but local personalization vector size is 0 in some GPUs) --- cpp/src/experimental/pagerank.cu | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 67a8bbd2460..4950925535e 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -68,12 +68,20 @@ void pagerank(raft::handle_t const& handle, auto const num_vertices = pull_graph_view.get_number_of_vertices(); if (num_vertices == 0) { return; } + auto aggregate_personalization_vector_size = + GraphViewType::is_multi_gpu + ? host_scalar_allreduce(handle.get_comms(), personalization_vector_size, handle.get_stream()) + : vertex_t{0}; + // 1. check input arguments CUGRAPH_EXPECTS( - (personalization_vertices == nullptr) || (personalization_values != nullptr), - "Invalid input argument: if personalization verties are provided, personalization " - "values should be provided as well."); + ((personalization_vector_size > 0) && (personalization_vertices != nullptr) && + (personalization_values != nullptr)) || + ((personalization_vector_size == 0) && (personalization_vertices == nullptr) && + (personalization_values == nullptr)), + "Invalid input argument: if personalization_vector_size is non-zero, personalization verties " + "and personalization values should be provided. Otherwise, they should not be provided."); CUGRAPH_EXPECTS((alpha >= 0.0) && (alpha <= 1.0), "Invalid input argument: alpha should be in [0.0, 1.0]."); CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative."); @@ -109,7 +117,7 @@ void pagerank(raft::handle_t const& handle, "Invalid input argument: initial guess values should be non-negative."); } - if (personalization_vertices != nullptr) { + if (aggregate_personalization_vector_size > 0) { vertex_partition_device_t vertex_partition(pull_graph_view); auto num_invalid_vertices = count_if_v(handle, @@ -177,7 +185,7 @@ void pagerank(raft::handle_t const& handle, // 4. sum the personalization values result_t personalization_sum{0.0}; - if (personalization_vertices != nullptr) { + if (aggregate_personalization_vector_size > 0) { personalization_sum = reduce_v(handle, pull_graph_view, personalization_values, @@ -229,7 +237,7 @@ void pagerank(raft::handle_t const& handle, copy_to_adj_matrix_row(handle, pull_graph_view, pageranks, adj_matrix_row_pageranks.begin()); - auto unvarying_part = personalization_vertices == nullptr + auto unvarying_part = aggregate_personalization_vector_size == 0 ? (dangling_sum * alpha + static_cast(1.0 - alpha)) / static_cast(num_vertices) : result_t{0.0}; @@ -245,7 +253,7 @@ void pagerank(raft::handle_t const& handle, unvarying_part, pageranks); - if (personalization_vertices != nullptr) { + if (aggregate_personalization_vector_size > 0) { vertex_partition_device_t vertex_partition(pull_graph_view); auto val_first = thrust::make_zip_iterator( thrust::make_tuple(personalization_vertices, personalization_values)); From 3e9181a64e9378e60e0b6be9c8d88a6df4b752a8 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 21 Oct 2020 16:31:21 -0400 Subject: [PATCH 10/41] additional bug fix in personalized PageRank --- cpp/src/experimental/pagerank.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 4950925535e..6c212803336 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -71,7 +71,7 @@ void pagerank(raft::handle_t const& handle, auto aggregate_personalization_vector_size = GraphViewType::is_multi_gpu ? host_scalar_allreduce(handle.get_comms(), personalization_vector_size, handle.get_stream()) - : vertex_t{0}; + : personalization_vector_size; // 1. check input arguments From e7f09d4577b057a6b5359ac014cf756ec86e5391 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 21 Oct 2020 16:31:42 -0400 Subject: [PATCH 11/41] add personlized PageRank tests --- cpp/tests/experimental/pagerank_test.cpp | 104 ++++++++++++++++++----- 1 file changed, 82 insertions(+), 22 deletions(-) diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index c5f133a5f76..7a92da30bf1 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -32,6 +32,7 @@ #include #include #include +#include #include template @@ -94,7 +95,8 @@ void pagerank_reference(edge_t* offsets, pageranks[i] += alpha * old_pageranks[nbr] * (w / out_weight_sums[nbr]); } if (personalization_vertices == nullptr) { - pageranks[i] += (dangling_sum * alpha + (1.0 - alpha)) / static_cast(num_vertices); + pageranks[i] += + (dangling_sum * alpha + (1.0 - alpha)) / static_cast(num_vertices); } } if (personalization_vertices != nullptr) { @@ -117,10 +119,13 @@ void pagerank_reference(edge_t* offsets, typedef struct PageRank_Usecase_t { std::string graph_file_full_path{}; + double personalization_ratio{0.0}; bool test_weighted{false}; - PageRank_Usecase_t(std::string const& graph_file_path, bool test_weighted) - : test_weighted(test_weighted) + PageRank_Usecase_t(std::string const& graph_file_path, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) { if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; @@ -169,19 +174,64 @@ class Tests_PageRank : public ::testing::TestWithParam { } CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + std::vector h_personalization_vertices{}; + std::vector h_personalization_values{}; + if (configuration.personalization_ratio > 0.0) { + std::random_device r{}; + std::default_random_engine generator{r()}; + std::uniform_real_distribution distribution{0.0, 1.0}; + h_personalization_vertices.resize(graph_view.get_number_of_local_vertices()); + std::iota(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + graph_view.get_local_vertex_first()); + h_personalization_vertices.erase( + std::remove_if(h_personalization_vertices.begin(), + h_personalization_vertices.end(), + [&generator, &distribution, configuration](auto v) { + return distribution(generator) >= configuration.personalization_ratio; + }), + h_personalization_vertices.end()); + h_personalization_values.resize(h_personalization_vertices.size()); + std::for_each(h_personalization_values.begin(), + h_personalization_values.end(), + [&distribution, &generator](auto& val) { val = distribution(generator); }); + auto sum = std::accumulate( + h_personalization_values.begin(), h_personalization_values.end(), result_t{0.0}); + std::for_each(h_personalization_values.begin(), + h_personalization_values.end(), + [sum](auto& val) { val /= sum; }); + } + + rmm::device_uvector d_personalization_vertices(h_personalization_vertices.size(), + handle.get_stream()); + rmm::device_uvector d_personalization_values(d_personalization_vertices.size(), + handle.get_stream()); + if (d_personalization_vertices.size() > 0) { + raft::update_device(d_personalization_vertices.data(), + h_personalization_vertices.data(), + h_personalization_vertices.size(), + handle.get_stream()); + raft::update_device(d_personalization_values.data(), + h_personalization_values.data(), + h_personalization_values.size(), + handle.get_stream()); + } + std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); result_t constexpr alpha{0.85}; - result_t constexpr epsilon{1e-6}; + auto epsilon = graph_view.get_number_of_vertices() > 0 + ? result_t{1e-3} / static_cast(graph_view.get_number_of_vertices()) + : result_t{1e-3}; pagerank_reference(h_offsets.data(), h_indices.data(), h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), - static_cast(nullptr), - static_cast(nullptr), + h_personalization_vertices.data(), + h_personalization_values.data(), h_reference_pageranks.data(), graph_view.get_number_of_vertices(), - vertex_t{0}, + static_cast(h_personalization_vertices.size()), alpha, epsilon, std::numeric_limits::max(), @@ -195,9 +245,9 @@ class Tests_PageRank : public ::testing::TestWithParam { cugraph::experimental::pagerank(handle, graph_view, static_cast(nullptr), - static_cast(nullptr), - static_cast(nullptr), - vertex_t{0}, + d_personalization_vertices.data(), + d_personalization_values.data(), + static_cast(d_personalization_vertices.size()), d_pageranks.begin(), alpha, epsilon, @@ -213,7 +263,9 @@ class Tests_PageRank : public ::testing::TestWithParam { h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + auto nearly_equal = [epsilon](auto lhs, auto rhs) { + return fabs(lhs - rhs) < std::max(std::max(lhs, rhs), epsilon) * 1e-3; + }; ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), h_reference_pageranks.end(), @@ -229,16 +281,24 @@ TEST_P(Tests_PageRank, CheckInt32Int32FloatFloat) run_current_test(GetParam()); } -INSTANTIATE_TEST_CASE_P(simple_test, - Tests_PageRank, - ::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", false), - PageRank_Usecase("test/datasets/karate.mtx", true), - PageRank_Usecase("test/datasets/web-Google.mtx", false), - PageRank_Usecase("test/datasets/web-Google.mtx", true), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", - false), - PageRank_Usecase("test/datasets/ljournal-2008.mtx", true), - PageRank_Usecase("test/datasets/webbase-1M.mtx", false), - PageRank_Usecase("test/datasets/webbase-1M.mtx", true))); +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_PageRank, + ::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", 0.0, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, false), + PageRank_Usecase("test/datasets/karate.mtx", 0.0, true), + PageRank_Usecase("test/datasets/karate.mtx", 0.5, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true), + PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true))); CUGRAPH_TEST_PROGRAM_MAIN() From c6646043d6141c93f83a70ff041d8d32b057ae7a Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Wed, 21 Oct 2020 16:44:37 -0400 Subject: [PATCH 12/41] update epsilon in katz centrality test --- cpp/tests/experimental/katz_centrality_test.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 0352637dcf0..948a5a2b809 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -150,7 +150,9 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(*max_it + 1); result_t constexpr beta{1.0}; - result_t constexpr epsilon{1e-6}; + auto epsilon = graph_view.get_number_of_vertices() > 0 + ? result_t{1e-3} / static_cast(graph_view.get_number_of_vertices()) + : result_t{1e-3}; katz_centrality_reference( h_offsets.data(), @@ -164,7 +166,7 @@ class Tests_KatzCentrality : public ::testing::TestWithParam::max(), false, - false); + true); rmm::device_uvector d_katz_centralities(graph_view.get_number_of_vertices(), handle.get_stream()); @@ -180,7 +182,7 @@ class Tests_KatzCentrality : public ::testing::TestWithParam::max(), false, - false, + true, false); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -193,7 +195,9 @@ class Tests_KatzCentrality : public ::testing::TestWithParam Date: Wed, 21 Oct 2020 16:50:38 -0400 Subject: [PATCH 13/41] update change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 289895d3664..24ec3e89f51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ - PR #1198 Remove deprecated call to from_gpu_matrix - PR #1174 Fix bugs in MNMG pattern accelerators and pattern accelerator based implementations of MNMG PageRank, BFS, and SSSP - PR #1233 Temporarily disabling C++ tests for 0.16 +- PR #1241 Fix a bug in personalized PageRank with the new graph primitives API. # cuGraph 0.15.0 (26 Aug 2020) From 9efda119f1ab87b7c1f145b53d9368e8ab0911e3 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Thu, 22 Oct 2020 00:21:32 -0400 Subject: [PATCH 14/41] cosmetic updates (fabs=>std::abs) --- cpp/tests/experimental/katz_centrality_test.cpp | 4 ++-- cpp/tests/experimental/pagerank_test.cpp | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 948a5a2b809..19dfa31952b 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -69,7 +69,7 @@ void katz_centrality_reference(edge_t* offsets, result_t diff_sum{0.0}; for (vertex_t i = 0; i < num_vertices; ++i) { - diff_sum += fabs(katz_centralities[i] - old_katz_centralities[i]); + diff_sum += std::abs(katz_centralities[i] - old_katz_centralities[i]); } if (diff_sum < static_cast(num_vertices) * epsilon) { break; } iter++; @@ -196,7 +196,7 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(num_vertices) * epsilon) { break; } iter++; @@ -264,7 +264,7 @@ class Tests_PageRank : public ::testing::TestWithParam { CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); auto nearly_equal = [epsilon](auto lhs, auto rhs) { - return fabs(lhs - rhs) < std::max(std::max(lhs, rhs), epsilon) * 1e-3; + return std::abs(lhs - rhs) < std::max(std::max(lhs, rhs), epsilon) * 1e-3; }; ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), From 05b8d1823e5bfabc14ec9d5918d33f54850721f0 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 23 Oct 2020 13:11:22 -0500 Subject: [PATCH 15/41] [REVIEW][skip ci changelog] Resolving 0.16 to 0.17 auto-merger failures (#1243) * Update CHANGELOG.md * Require `ucx-proc=*=gpu` Co-authored-by: Ray Douglass <3107146+raydouglass@users.noreply.github.com> Co-authored-by: John Kirkham Co-authored-by: Rick Ratzel --- CHANGELOG.md | 3 ++- ci/benchmark/build.sh | 1 + ci/gpu/build.sh | 1 + conda/environments/cugraph_dev_cuda10.1.yml | 1 + conda/environments/cugraph_dev_cuda10.2.yml | 1 + conda/environments/cugraph_dev_cuda11.0.yml | 1 + conda/recipes/cugraph/meta.yaml | 2 ++ conda/recipes/libcugraph/meta.yaml | 2 ++ 8 files changed, 11 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index fb3631955de..c1b9e8d882d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ ## Bug Fixes -# cuGraph 0.16.0 (Date TBD) +# cuGraph 0.16.0 (21 Oct 2020) ## New Features - PR #1098 Add new graph classes to support 2D partitioning @@ -63,6 +63,7 @@ - PR #1198 Remove deprecated call to from_gpu_matrix - PR #1174 Fix bugs in MNMG pattern accelerators and pattern accelerator based implementations of MNMG PageRank, BFS, and SSSP - PR #1233 Temporarily disabling C++ tests for 0.16 +- PR #1240 Require `ucx-proc=*=gpu` # cuGraph 0.15.0 (26 Aug 2020) diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index 66cbc47fde9..5f74dca4044 100644 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -75,6 +75,7 @@ gpuci_conda_retry install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-for "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ "ucx-py=${MINOR_VERSION}" \ + "ucx-proc=*=gpu" \ "rapids-build-env=${MINOR_VERSION}" \ rapids-pytest-benchmark diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 60279dd5541..50b3bfb3ee9 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -62,6 +62,7 @@ gpuci_conda_retry install -y \ "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ "ucx-py=${MINOR_VERSION}" \ + "ucx-proc=*=gpu" \ "rapids-build-env=$MINOR_VERSION.*" \ "rapids-notebook-env=$MINOR_VERSION.*" \ rapids-pytest-benchmark diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 2a443b4e1ae..d4d759abad5 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -15,6 +15,7 @@ dependencies: - dask-cudf=0.17* - nccl>=2.7 - ucx-py=0.17* +- ucx-proc=*=gpu - scipy - networkx - python-louvain diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 32f08f27f55..e6705daa7b8 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -15,6 +15,7 @@ dependencies: - dask-cudf=0.17* - nccl>=2.7 - ucx-py=0.17* +- ucx-proc=*=gpu - scipy - networkx - python-louvain diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index f8c64ebd53b..c8227521a4c 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -15,6 +15,7 @@ dependencies: - dask-cudf=0.17* - nccl>=2.7 - ucx-py=0.17* +- ucx-proc=*=gpu - scipy - networkx - python-louvain diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index c7eba24d2fb..90f5bed942a 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -28,6 +28,7 @@ requirements: - libcugraph={{ version }} - cudf={{ minor_version }} - ucx-py {{ minor_version }} + - ucx-proc=*=gpu run: - python x.x - libcugraph={{ version }} @@ -38,6 +39,7 @@ requirements: - distributed>=2.12.0 - nccl>=2.7 - ucx-py {{ minor_version }} + - ucx-proc=*=gpu #test: # commands: diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 89ccd2d56e3..211ec920d27 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -31,11 +31,13 @@ requirements: - libcypher-parser - nccl>=2.7 - ucx-py {{ minor_version }} + - ucx-proc=*=gpu run: - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - nccl>=2.7 - ucx-py {{ minor_version }} + - ucx-proc=*=gpu #test: # commands: From 645072921113b55bde92b02845260f3a0d6e76b2 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 23 Oct 2020 14:17:44 -0500 Subject: [PATCH 16/41] [REVIEW] BUG Calling gunrock cmake using explicit -D options, re-enabling C++ tests (#1242) * Changed CMakeLists.txt to call gunrock cmake using explicit -D options for each arch, similar to a prior version of the CMakeLists.txt, since the update to expand a variable into the arch options was being expanded as a single quoted value with spaces that wasn't recognized, resulting in gunrock being built for only compute 60 support, which caused C++ test crashes. Also re-enabled C++ tests. * Added PR 1242 and 0.16 release date to CHANGELOG. Co-authored-by: Rick Ratzel --- CHANGELOG.md | 1 + ci/test.sh | 16 ++++++---------- cpp/CMakeLists.txt | 32 +++++++++++++++++++++++--------- cpp/tests/traversal/sssp_test.cu | 5 +---- 4 files changed, 31 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c1b9e8d882d..938a2e9f805 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ## Improvements ## Bug Fixes +- PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests # cuGraph 0.16.0 (21 Oct 2020) diff --git a/ci/test.sh b/ci/test.sh index 3655759d171..fde9bbb3d8d 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -64,16 +64,12 @@ else cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build fi -# FIXME: temporarily disabling all C++ tests for 0.16 due to intermittent -# failures from what appears to be an issue with Thrust (which does not appear -# to affect the Python API or notebooks). Re-enable once this issue is resolved -# in 0.17. -# for gt in gtests/*; do -# test_name=$(basename $gt) -# echo "Running GoogleTest $test_name" -# ${gt} ${GTEST_FILTER} ${GTEST_ARGS} -# ERRORCODE=$((ERRORCODE | $?)) -# done +for gt in gtests/*; do + test_name=$(basename $gt) + echo "Running GoogleTest $test_name" + ${gt} ${GTEST_FILTER} ${GTEST_ARGS} + ERRORCODE=$((ERRORCODE | $?)) +done if [[ "$PROJECT_FLASH" == "1" ]]; then echo "Installing libcugraph..." diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b1fea1eaf88..9abfcc16312 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -50,6 +50,14 @@ endif(CMAKE_COMPILER_IS_GNUCXX) find_package(CUDA) +# Configure GPU arch to build +set(GUNROCK_GENCODE_SM60 "OFF") +set(GUNROCK_GENCODE_SM61 "OFF") +set(GUNROCK_GENCODE_SM70 "OFF") +set(GUNROCK_GENCODE_SM72 "OFF") +set(GUNROCK_GENCODE_SM75 "OFF") +set(GUNROCK_GENCODE_SM80 "OFF") + # Check for aarch64 vs workstation architectures if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") message(STATUS "CMAKE Detected aarch64 CPU architecture, selecting appropriate gencodes") @@ -57,12 +65,13 @@ if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") set(GPU_ARCHS "62") # Default minimum CUDA GenCode - not supported by gunrock if(CUDA_VERSION_MAJOR GREATER_EQUAL 9) set(GPU_ARCHS "${GPU_ARCHS};72") - set(GUNROCK_GENCODE "-DGUNROCK_GENCODE_SM72=TRUE") + set(GUNROCK_GENCODE_SM72 "ON") endif() if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) # This is probably for SBSA CUDA, or a next gen Jetson set(GPU_ARCHS "${GPU_ARCHS};75;80") - set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM75=TRUE -DGUNROCK_GENCODE_SM80=TRUE ") + set(GUNROCK_GENCODE_SM75 "ON") + set(GUNROCK_GENCODE_SM80 "ON") endif() else() @@ -70,20 +79,19 @@ else() # System architecture was not aarch64, # this is datacenter or workstation class hardware set(GPU_ARCHS "60") # Default minimum supported CUDA gencode - set(GUNROCK_GENCODE "-DGUNROCK_GENCODE_SM60=TRUE") + set(GUNROCK_GENCODE_SM60 "ON") if(CUDA_VERSION_MAJOR GREATER_EQUAL 9) set(GPU_ARCHS "${GPU_ARCHS};70") - set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM70=TRUE") + set(GUNROCK_GENCODE_SM70 "ON") endif() if(CUDA_VERSION_MAJOR GREATER_EQUAL 10) set(GPU_ARCHS "${GPU_ARCHS};75") - set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM75=TRUE") + set(GUNROCK_GENCODE_SM75 "ON") endif() if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) set(GPU_ARCHS "${GPU_ARCHS};80") - set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM80=TRUE") + set(GUNROCK_GENCODE_SM80 "ON") endif() - endif() message("-- Building for GPU_ARCHS = ${GPU_ARCHS}") @@ -98,6 +106,7 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") + # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) @@ -248,7 +257,6 @@ set(LIBCUDACXX_INCLUDE_DIR "${libcudacxx_SOURCE_DIR}/include") message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") - ################################################################################################### # - External Projects ----------------------------------------------------------------------------- @@ -280,7 +288,13 @@ ExternalProject_Add(cugunrock CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DGUNROCK_BUILD_SHARED_LIBS=OFF -DGUNROCK_BUILD_TESTS=OFF - -DCUDA_AUTODETECT_GENCODE=FALSE + -DCUDA_AUTODETECT_GENCODE=OFF + -DGUNROCK_GENCODE_SM60=${GUNROCK_GENCODE_SM60} + -DGUNROCK_GENCODE_SM61=${GUNROCK_GENCODE_SM61} + -DGUNROCK_GENCODE_SM70=${GUNROCK_GENCODE_SM70} + -DGUNROCK_GENCODE_SM72=${GUNROCK_GENCODE_SM72} + -DGUNROCK_GENCODE_SM75=${GUNROCK_GENCODE_SM75} + -DGUNROCK_GENCODE_SM80=${GUNROCK_GENCODE_SM80} ${GUNROCK_GENCODE} BUILD_BYPRODUCTS ${CUGUNROCK_DIR}/lib/libgunrock.a ) diff --git a/cpp/tests/traversal/sssp_test.cu b/cpp/tests/traversal/sssp_test.cu index 5021bd620f8..ea56d1d79cb 100644 --- a/cpp/tests/traversal/sssp_test.cu +++ b/cpp/tests/traversal/sssp_test.cu @@ -425,10 +425,7 @@ TEST_P(Tests_SSSP, CheckFP64_RANDOM_DIST_PREDS) // --gtest_filter=*simple_test* -// FIXME: Enable this for 0.17. Temporarily disabled due to sporadic error hard -// to reproduce: "transform: failed to synchronize: cudaErrorIllegalAddress: an -// illegal memory access was encountered" thrown in the test body. -INSTANTIATE_TEST_CASE_P(DISABLED_simple_test, +INSTANTIATE_TEST_CASE_P(simple_test, Tests_SSSP, ::testing::Values(SSSP_Usecase(MTX, "test/datasets/dblp.mtx", 100), SSSP_Usecase(MTX, "test/datasets/wiki2003.mtx", 100000), From 0cfbd95e063fa09f0cbd228d76bc373b7e4b1d8e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang Date: Mon, 26 Oct 2020 17:39:10 -0400 Subject: [PATCH 17/41] update PageRank and Katz Centrality's interpretation on input parameter tolerance --- cpp/src/experimental/katz_centrality.cu | 2 +- cpp/src/experimental/pagerank.cu | 2 +- cpp/tests/experimental/katz_centrality_test.cpp | 14 ++++++++------ cpp/tests/experimental/pagerank_test.cpp | 14 ++++++++------ 4 files changed, 18 insertions(+), 14 deletions(-) diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 331bda228c3..51d6e0ceb4c 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -136,7 +136,7 @@ void katz_centrality(raft::handle_t &handle, iter++; - if (diff_sum < static_cast(num_vertices) * epsilon) { + if (diff_sum < epsilon) { break; } else if (iter >= max_iterations) { CUGRAPH_FAIL("Katz Centrality failed to converge."); diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 6c212803336..1aa7f37fa6b 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -280,7 +280,7 @@ void pagerank(raft::handle_t const& handle, iter++; - if (diff_sum < static_cast(num_vertices) * epsilon) { + if (diff_sum < epsilon) { break; } else if (iter >= max_iterations) { CUGRAPH_FAIL("PageRank failed to converge."); diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 19dfa31952b..c2ac4340319 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -71,7 +71,7 @@ void katz_centrality_reference(edge_t* offsets, for (vertex_t i = 0; i < num_vertices; ++i) { diff_sum += std::abs(katz_centralities[i] - old_katz_centralities[i]); } - if (diff_sum < static_cast(num_vertices) * epsilon) { break; } + if (diff_sum < epsilon) { break; } iter++; ASSERT_TRUE(iter < max_iterations); } @@ -150,9 +150,7 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(*max_it + 1); result_t constexpr beta{1.0}; - auto epsilon = graph_view.get_number_of_vertices() > 0 - ? result_t{1e-3} / static_cast(graph_view.get_number_of_vertices()) - : result_t{1e-3}; + result_t constexpr epsilon{1e-6}; katz_centrality_reference( h_offsets.data(), @@ -195,8 +193,12 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(graph_view.get_number_of_vertices())) * threshold_ratio; + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + auto diff = std::abs(lhs - rhs); + return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); }; ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 4a41812fc57..8770c3d0676 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -109,7 +109,7 @@ void pagerank_reference(edge_t* offsets, for (vertex_t i = 0; i < num_vertices; ++i) { diff_sum += std::abs(pageranks[i] - old_pageranks[i]); } - if (diff_sum < static_cast(num_vertices) * epsilon) { break; } + if (diff_sum < epsilon) { break; } iter++; ASSERT_TRUE(iter < max_iterations); } @@ -220,9 +220,7 @@ class Tests_PageRank : public ::testing::TestWithParam { std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); result_t constexpr alpha{0.85}; - auto epsilon = graph_view.get_number_of_vertices() > 0 - ? result_t{1e-3} / static_cast(graph_view.get_number_of_vertices()) - : result_t{1e-3}; + result_t constexpr epsilon{1e-6}; pagerank_reference(h_offsets.data(), h_indices.data(), @@ -263,8 +261,12 @@ class Tests_PageRank : public ::testing::TestWithParam { h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream()); CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - auto nearly_equal = [epsilon](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < std::max(std::max(lhs, rhs), epsilon) * 1e-3; + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (epsilon / static_cast(graph_view.get_number_of_vertices())) * threshold_ratio; + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + auto diff = std::abs(lhs - rhs); + return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); }; ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), From a3332933c06d0d0c598a83bac310fb3c9fb6d659 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Mon, 26 Oct 2020 17:40:57 -0500 Subject: [PATCH 18/41] add pandas api --- python/cugraph/structure/graph.py | 54 +++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index ffbf4b8ec75..f1ef2afb9d2 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -431,6 +431,60 @@ def from_cudf_edgelist( self.renumber_map = renumber_map + def from_pandas_edgelist( + self, + input_df, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + ): + """ + Initialize a graph from the edge list. It is an error to call this + method on an initialized Graph object. The passed input_df argument + wraps gdf_column objects that represent a graph using the edge list + format. source argument is source column name and destination argument + is destination column name. + + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + + If weights are present, edge_attr argument is the weights column name. + + Parameters + ---------- + input_df : pandas.DataFrame + A DataFrame that contains edge information + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + edge_attr : str or None + the weights column name. Default is None + renumber : bool + Indicate whether or not to renumber the source and destination + vertex IDs. Default is True. + + Examples + -------- + >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_pandas_edgelist(df, source='0', destination='1', + edge_attr='2', renumber=False) + + """ + gdf = cudf.DataFrame.from_pandas(df) + self.from_cudf_edgelist(gdf, source=source, destination=destination, + edge_attr=edge_attr, renumber=renumber) + + def to_pandas_edgelist(self, source='source', destination='destination'): + gdf = self.view_edge_list() + return gdf.to_pandas() + def from_dask_cudf_edgelist( self, input_ddf, From 1c6c5ad2fc3c31201f9b84f200c66196bce67061 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 27 Oct 2020 14:13:33 -0500 Subject: [PATCH 19/41] add numpy support --- python/cugraph/structure/graph.py | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index f1ef2afb9d2..2a21c8df818 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -485,6 +485,34 @@ def to_pandas_edgelist(self, source='source', destination='destination'): gdf = self.view_edge_list() return gdf.to_pandas() + def to_numpy_array(self): + import numpy as np + nlen = self.number_of_nodes() + elen = self.number_of_edges() + df = self.edgelist.edgelist_df + np_array = np.full((nlen, nlen), 0.0) + for i in range(0, elen): + np_array[df['src'].iloc[i],df['dst'].iloc[i]] = 1.0 + return np_array + + def to_numpy_matrix(self): + import numpy as np + np_array = self.to_numpy_array() + return np.asmatrix(np_array) + + def from_numpy_array(self, np_array): + src, dst = np_array.nonzero() + weight = np_array[src, dst] + df = cudf.DataFrame() + df['src'] = src + df['dst'] = dst + df['weight'] = weight + self.from_cudf_edgelist(df, 'src', 'dst', edge_attr='weight', renumber=False) + + def from_numpy_matrix(self, np_matrix): + np_array = np.asarray(np_matrix) + self.from_numpy_array(np_array) + def from_dask_cudf_edgelist( self, input_ddf, From 6d8c418ff86011b74f1592c6128999087b4b8a99 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 27 Oct 2020 14:54:31 -0500 Subject: [PATCH 20/41] add pandas adjacency input --- python/cugraph/structure/graph.py | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 2a21c8df818..3c8b423f1c5 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -441,10 +441,8 @@ def from_pandas_edgelist( ): """ Initialize a graph from the edge list. It is an error to call this - method on an initialized Graph object. The passed input_df argument - wraps gdf_column objects that represent a graph using the edge list - format. source argument is source column name and destination argument - is destination column name. + method on an initialized Graph object. Source argument is source + column name and destination argument is destination column name. By default, renumbering is enabled to map the source and destination vertices into an index in the range [0, V) where V is the number @@ -485,6 +483,15 @@ def to_pandas_edgelist(self, source='source', destination='destination'): gdf = self.view_edge_list() return gdf.to_pandas() + def from_pandas_adjacency(self, pdf): + np_array = pdf.to_numpy() + self.from_numpy_array(np_array) + + def to_pandas_adjacency(self): + np_array_data = self.to_numpy_array() + pdf = pd.DataFrame(np_array_data) + return pdf + def to_numpy_array(self): import numpy as np nlen = self.number_of_nodes() From 9e54f8b604f90e2a893726f4219be03252e814a9 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Tue, 27 Oct 2020 15:19:04 -0500 Subject: [PATCH 21/41] expose functions in convert_matrix --- python/cugraph/structure/__init__.py | 10 +++- python/cugraph/structure/convert_matrix.py | 65 ++++++++++++++++++++++ 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/python/cugraph/structure/__init__.py b/python/cugraph/structure/__init__.py index 511e6773469..b8b6fbe0435 100644 --- a/python/cugraph/structure/__init__.py +++ b/python/cugraph/structure/__init__.py @@ -14,6 +14,14 @@ from cugraph.structure.graph import Graph, DiGraph from cugraph.structure.number_map import NumberMap from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf -from cugraph.structure.convert_matrix import from_cudf_edgelist +from cugraph.structure.convert_matrix import (from_cudf_edgelist, + from_pandas_edgelist, + to_pandas_edgelist, + from_pandas_adjacency, + to_pandas_adjacency, + from_numpy_array, + to_numpy_array, + from_numpy_matrix, + to_numpy_matrix) from cugraph.structure.hypergraph import hypergraph from cugraph.structure.shuffle import shuffle diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py index 56bb9086380..e749d46e48e 100644 --- a/python/cugraph/structure/convert_matrix.py +++ b/python/cugraph/structure/convert_matrix.py @@ -65,3 +65,68 @@ def from_cudf_edgelist(df, source='source', destination='destination', edge_attr=edge_attr, renumber=renumber) return G + +def from_pandas_edgelist(df, + source="source", + destination="destination", + edge_attr=None, + create_using=Graph, + renumber=True): + if create_using is Graph: + G = Graph() + elif create_using is DiGraph: + G = DiGraph() + else: + raise Exception("create_using supports Graph and DiGraph") + + G.from_pandas_edgelist(df, source=source, destination=destination, + edge_attr=edge_attr, renumber=renumber) + return G + +def to_pandas_edgelist(G, source='source', destination='destination'): + pdf = G.to_pandas_edgelist(source=source, destination=destination) + return pdf + +def from_pandas_adjacency(df, create_using=Graph): + if create_using is Graph: + G = Graph() + elif create_using is DiGraph: + G = DiGraph() + else: + raise Exception("create_using supports Graph and DiGraph") + + G.from_pandas_adjacency(df) + return G + +def to_pandas_adjacency(G): + pdf = G.to_pandas_adjacency() + return pdf + +def from_numpy_array(A, create_using=Graph): + if create_using is Graph: + G = Graph() + elif create_using is DiGraph: + G = DiGraph() + else: + raise Exception("create_using supports Graph and DiGraph") + + G.from_numpy_array(A) + return G + +def to_numpy_array(G): + A = G.to_numpy_array() + return A + +def from_numpy_matrix(A, create_using=Graph): + if create_using is Graph: + G = Graph() + elif create_using is DiGraph: + G = DiGraph() + else: + raise Exception("create_using supports Graph and DiGraph") + G.from_numpy_matrix(A) + return G + +def to_numpy_matrix(G): + A = G.to_numpy_matrix() + return A From f744a66c758197c2b467281b26a8990ea8effd9e Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 28 Oct 2020 15:39:56 -0400 Subject: [PATCH 22/41] Use latest Gunrock 1) Update to use gunrock repo directly 2) Update to use new HITS API 3) Update unit test to compare results to networkx --- cpp/CMakeLists.txt | 23 ++++----- cpp/src/link_analysis/gunrock_hits.cpp | 51 +++++--------------- python/cugraph/tests/test_hits.py | 67 ++++---------------------- 3 files changed, 33 insertions(+), 108 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7fb82258cc5..5d2ca8901f8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -278,13 +278,13 @@ ExternalProject_Add(cuhornet ) # - GUNROCK -set(CUGUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/cugunrock CACHE STRING - "Path to cugunrock repo") +set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo") +set(GUNROCK_INCLUDE_DIR ${GUNROCK_DIR}/src/gunrock_ext CACHE STRING "Path to gunrock includes") -ExternalProject_Add(cugunrock - GIT_REPOSITORY https://github.com/rapidsai/cugunrock.git - GIT_TAG 0b92fae6ee9026188a811b4d08915779e7c97178 - PREFIX ${CUGUNROCK_DIR} +ExternalProject_Add(gunrock_ext + GIT_REPOSITORY https://github.com/gunrock/gunrock.git + GIT_TAG dev + PREFIX ${GUNROCK_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DGUNROCK_BUILD_SHARED_LIBS=OFF -DGUNROCK_BUILD_TESTS=OFF @@ -296,14 +296,15 @@ ExternalProject_Add(cugunrock -DGUNROCK_GENCODE_SM75=${GUNROCK_GENCODE_SM75} -DGUNROCK_GENCODE_SM80=${GUNROCK_GENCODE_SM80} ${GUNROCK_GENCODE} - BUILD_BYPRODUCTS ${CUGUNROCK_DIR}/lib/libgunrock.a + BUILD_BYPRODUCTS ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a + INSTALL_COMMAND "" ) add_library(gunrock STATIC IMPORTED) -add_dependencies(gunrock cugunrock) +add_dependencies(gunrock gunrock_ext) -set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${CUGUNROCK_DIR}/lib/libgunrock.a) +set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a) # - NCCL if(NOT NCCL_PATH) @@ -399,7 +400,7 @@ add_library(cugraph SHARED # NOTE: This dependency will force the building of cugraph to # wait until after cugunrock is constructed. # -add_dependencies(cugraph cugunrock) +add_dependencies(cugraph gunrock_ext) add_dependencies(cugraph raft) ################################################################################################### @@ -419,7 +420,7 @@ target_include_directories(cugraph "${CUHORNET_INCLUDE_DIR}/xlib/include" "${CUHORNET_INCLUDE_DIR}/primitives" "${CMAKE_CURRENT_SOURCE_DIR}/src" - "${CUGUNROCK_DIR}/include" + "${GUNROCK_INCLUDE_DIR}" "${NCCL_INCLUDE_DIRS}" "${RAFT_DIR}/cpp/include" PUBLIC diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index 84c6036ad70..5eef66e6a4d 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -30,6 +30,9 @@ namespace cugraph { namespace gunrock { +const int HOST{1}; // gunrock should expose the device constant at the API level. +const int DEVICE{2}; // gunrock should expose the device constant at the API level. + template void hits(cugraph::GraphCSRView const &graph, int max_iter, @@ -44,49 +47,18 @@ void hits(cugraph::GraphCSRView const &graph, "Invalid API parameter: authorities array should be of size V"); // - // NOTE: gunrock doesn't support tolerance parameter - // gunrock doesn't support passing a starting value - // gunrock doesn't support the normalized parameter - // - // FIXME: gunrock uses a 2-norm, while networkx uses a 1-norm. - // They will add a parameter to allow us to specify - // which norm to use. - // - std::vector local_offsets(graph.number_of_vertices + 1); - std::vector local_indices(graph.number_of_edges); - std::vector local_hubs(graph.number_of_vertices); - std::vector local_authorities(graph.number_of_vertices); - - // Ideally: - // - //::hits(graph.number_of_vertices, graph.number_of_edges, graph.offsets, graph.indices, - // max_iter, hubs, authorities, DEVICE); + // NOTE: gunrock doesn't support passing a starting value // - // For now, the following: - - CUDA_TRY(cudaMemcpy(local_offsets.data(), - graph.offsets, - (graph.number_of_vertices + 1) * sizeof(edge_t), - cudaMemcpyDeviceToHost)); - CUDA_TRY(cudaMemcpy(local_indices.data(), - graph.indices, - graph.number_of_edges * sizeof(vertex_t), - cudaMemcpyDeviceToHost)); - ::hits(graph.number_of_vertices, graph.number_of_edges, - local_offsets.data(), - local_indices.data(), + graph.offsets, + graph.indices, max_iter, - local_hubs.data(), - local_authorities.data()); - - CUDA_TRY(cudaMemcpy( - hubs, local_hubs.data(), graph.number_of_vertices * sizeof(weight_t), cudaMemcpyHostToDevice)); - CUDA_TRY(cudaMemcpy(authorities, - local_authorities.data(), - graph.number_of_vertices * sizeof(weight_t), - cudaMemcpyHostToDevice)); + tolerance, + HITS_NORMALIZATION_METHOD_1, + hubs, + authorities, + DEVICE); } template void hits(cugraph::GraphCSRView const &, @@ -98,5 +70,4 @@ template void hits(cugraph::GraphCSRView const &, float *); } // namespace gunrock - } // namespace cugraph diff --git a/python/cugraph/tests/test_hits.py b/python/cugraph/tests/test_hits.py index 30b6f20f478..0a261abc02c 100644 --- a/python/cugraph/tests/test_hits.py +++ b/python/cugraph/tests/test_hits.py @@ -37,16 +37,6 @@ print("Networkx version : {} ".format(nx.__version__)) -def cudify(d): - if d is None: - return None - - k = np.fromiter(d.keys(), dtype="int32") - v = np.fromiter(d.values(), dtype="float32") - cuD = cudf.DataFrame({"vertex": k, "values": v}) - return cuD - - def cugraph_call(cu_M, max_iter, tol): # cugraph hits Call @@ -78,21 +68,17 @@ def networkx_call(M, max_iter, tol): ) # same parameters as in NVGRAPH - pr = nx.hits(Gnx, max_iter, tol, normalized=True) + nx_hits = nx.hits(Gnx, max_iter, tol, normalized=True) t2 = time.time() - t1 print("Networkx Time : " + str(t2)) - return pr + return nx_hits MAX_ITERATIONS = [50] TOLERANCE = [1.0e-06] - -# Test all combinations of default/managed and pooled/non-pooled allocation - - @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -105,52 +91,19 @@ def test_hits(graph_file, max_iter, tol): cu_M = utils.read_csv_file(graph_file) cugraph_hits = cugraph_call(cu_M, max_iter, tol) - # Calculating mismatch - # hubs = sorted(hubs.items(), key=lambda x: x[0]) - # print("hubs = ", hubs) - - # - # Scores don't match. Networkx uses the 1-norm, - # gunrock uses a 2-norm. Eventually we'll add that - # as a parameter. For now, let's check the order - # which should match. We'll allow 6 digits to right - # of decimal point accuracy - # pdf = pd.DataFrame.from_dict(hubs, orient="index").sort_index() - pdf = pdf.multiply(1000000).floordiv(1) cugraph_hits["nx_hubs"] = cudf.Series.from_pandas(pdf[0]) pdf = pd.DataFrame.from_dict(authorities, orient="index").sort_index() - pdf = pdf.multiply(1000000).floordiv(1) cugraph_hits["nx_authorities"] = cudf.Series.from_pandas(pdf[0]) - # - # Sort by hubs (cugraph) in descending order. Then we'll - # check to make sure all scores are in descending order. - # - cugraph_hits = cugraph_hits.sort_values("hubs", ascending=False) - - assert cugraph_hits["hubs"].is_monotonic_decreasing - assert cugraph_hits["nx_hubs"].is_monotonic_decreasing - - cugraph_hits = cugraph_hits.sort_values("authorities", ascending=False) - - assert cugraph_hits["authorities"].is_monotonic_decreasing - assert cugraph_hits["nx_authorities"].is_monotonic_decreasing - + hubs_diffs1 = cugraph_hits.query('hubs - nx_hubs > 0.00001') + hubs_diffs2 = cugraph_hits.query('hubs - nx_hubs < -0.00001') + authorities_diffs1 = cugraph_hits.query('authorities - nx_authorities > 0.0001') + authorities_diffs2 = cugraph_hits.query('authorities - nx_authorities < -0.0001') -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) -@pytest.mark.parametrize("max_iter", MAX_ITERATIONS) -@pytest.mark.parametrize("tol", TOLERANCE) -def test_hits_nx(graph_file, max_iter, tol): - gc.collect() - - M = utils.read_csv_for_nx(graph_file) - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", create_using=nx.DiGraph() - ) - nx_hubs, nx_authorities = nx.hits(Gnx, max_iter, tol, normalized=True) - cg_hubs, cg_authorities = cugraph.hits(Gnx, max_iter, tol, normalized=True) + assert len(hubs_diffs1) == 0 + assert len(hubs_diffs2) == 0 + assert len(authorities_diffs1) == 0 + assert len(authorities_diffs2) == 0 - # assert nx_hubs == cg_hubs - # assert nx_authorities == cg_authorities From 2eea759b23050a17b60eee8fc6148dbe33b45458 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 28 Oct 2020 15:52:18 -0400 Subject: [PATCH 23/41] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 858e6acfe51..ff312841b51 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ## Bug Fixes - PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests +- PR #1246 Use latest Gunrock, update HITS implementation # cuGraph 0.16.0 (21 Oct 2020) From 1f9f15e709537e8599db40df8ae17b4cf68b134f Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 28 Oct 2020 15:57:53 -0400 Subject: [PATCH 24/41] fix format issues --- cpp/src/link_analysis/gunrock_hits.cpp | 2 +- python/cugraph/tests/test_hits.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index 5eef66e6a4d..8662c3bea79 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -30,7 +30,7 @@ namespace cugraph { namespace gunrock { -const int HOST{1}; // gunrock should expose the device constant at the API level. +const int HOST{1}; // gunrock should expose the device constant at the API level. const int DEVICE{2}; // gunrock should expose the device constant at the API level. template diff --git a/python/cugraph/tests/test_hits.py b/python/cugraph/tests/test_hits.py index 0a261abc02c..6b6f54937a6 100644 --- a/python/cugraph/tests/test_hits.py +++ b/python/cugraph/tests/test_hits.py @@ -13,7 +13,6 @@ import gc import time -import numpy as np import pandas as pd import pytest @@ -79,6 +78,7 @@ def networkx_call(M, max_iter, tol): MAX_ITERATIONS = [50] TOLERANCE = [1.0e-06] + @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -99,11 +99,12 @@ def test_hits(graph_file, max_iter, tol): hubs_diffs1 = cugraph_hits.query('hubs - nx_hubs > 0.00001') hubs_diffs2 = cugraph_hits.query('hubs - nx_hubs < -0.00001') - authorities_diffs1 = cugraph_hits.query('authorities - nx_authorities > 0.0001') - authorities_diffs2 = cugraph_hits.query('authorities - nx_authorities < -0.0001') + authorities_diffs1 = cugraph_hits.query( + 'authorities - nx_authorities > 0.0001') + authorities_diffs2 = cugraph_hits.query( + 'authorities - nx_authorities < -0.0001') assert len(hubs_diffs1) == 0 assert len(hubs_diffs2) == 0 assert len(authorities_diffs1) == 0 assert len(authorities_diffs2) == 0 - From 7fef2f1e8659ccd6673eb713cba084e3e445d8cd Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Wed, 28 Oct 2020 18:45:44 -0500 Subject: [PATCH 25/41] update tests --- python/cugraph/__init__.py | 8 ++ python/cugraph/structure/graph.py | 30 ++-- python/cugraph/tests/test_convert_matrix.py | 147 ++++++++++++++++++++ 3 files changed, 174 insertions(+), 11 deletions(-) create mode 100644 python/cugraph/tests/test_convert_matrix.py diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index f8984f25978..feab1cc3eb9 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -30,6 +30,14 @@ Graph, DiGraph, from_cudf_edgelist, + from_pandas_edgelist, + to_pandas_edgelist, + from_pandas_adjacency, + to_pandas_adjacency, + from_numpy_array, + to_numpy_array, + from_numpy_matrix, + to_numpy_matrix, hypergraph, symmetrize, symmetrize_df, diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 3c8b423f1c5..956ab6cd059 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -19,7 +19,8 @@ import cudf import dask_cudf import cugraph.comms.comms as Comms - +import pandas as pd +import numpy as np from cugraph.dask.structure import replication @@ -433,7 +434,7 @@ def from_cudf_edgelist( def from_pandas_edgelist( self, - input_df, + pdf, source="source", destination="destination", edge_attr=None, @@ -475,7 +476,7 @@ def from_pandas_edgelist( edge_attr='2', renumber=False) """ - gdf = cudf.DataFrame.from_pandas(df) + gdf = cudf.DataFrame.from_pandas(pdf) self.from_cudf_edgelist(gdf, source=source, destination=destination, edge_attr=edge_attr, renumber=renumber) @@ -485,36 +486,43 @@ def to_pandas_edgelist(self, source='source', destination='destination'): def from_pandas_adjacency(self, pdf): np_array = pdf.to_numpy() - self.from_numpy_array(np_array) + columns = pdf.columns + self.from_numpy_array(np_array, columns) def to_pandas_adjacency(self): np_array_data = self.to_numpy_array() pdf = pd.DataFrame(np_array_data) + if self.renumbered: + nodes = self.renumber_map.implementation.df['0'].values_host.tolist() + pdf.columns = nodes + pdf.index = nodes return pdf def to_numpy_array(self): - import numpy as np nlen = self.number_of_nodes() elen = self.number_of_edges() df = self.edgelist.edgelist_df np_array = np.full((nlen, nlen), 0.0) for i in range(0, elen): - np_array[df['src'].iloc[i],df['dst'].iloc[i]] = 1.0 + np_array[df['src'].iloc[i],df['dst'].iloc[i]] = df['weights'].iloc[i] return np_array def to_numpy_matrix(self): - import numpy as np np_array = self.to_numpy_array() return np.asmatrix(np_array) - def from_numpy_array(self, np_array): + def from_numpy_array(self, np_array, nodes=None): src, dst = np_array.nonzero() weight = np_array[src, dst] df = cudf.DataFrame() - df['src'] = src - df['dst'] = dst + if nodes is not None: + df['src'] = nodes[src] + df['dst'] = nodes[dst] + else: + df['src'] = src + df['dst'] = dst df['weight'] = weight - self.from_cudf_edgelist(df, 'src', 'dst', edge_attr='weight', renumber=False) + self.from_cudf_edgelist(df, 'src', 'dst', edge_attr='weight') def from_numpy_matrix(self, np_matrix): np_array = np.asarray(np_matrix) diff --git a/python/cugraph/tests/test_convert_matrix.py b/python/cugraph/tests/test_convert_matrix.py new file mode 100644 index 00000000000..29e64f700f4 --- /dev/null +++ b/python/cugraph/tests/test_convert_matrix.py @@ -0,0 +1,147 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import pytest +import cugraph +from cugraph.tests import utils +import numpy as np + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_to_from_pandas(graph_file): + gc.collect() + + # Read in the graph + M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + + # create a NetworkX DiGraph and convert to pandas adjacency + nxG = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph + ) + nx_pdf = nx.to_pandas_adjacency(nxG) + nx_pdf = nx_pdf[sorted(nx_pdf.columns)] + nx_pdf.sort_index(inplace=True) + + # create a cugraph DiGraph and convert to pandas adjacency + cuG = cugraph.from_pandas_edgelist( + M, source="0", destination="1", edge_attr="weight", + create_using=cugraph.DiGraph + ) + + cu_pdf = cugraph.to_pandas_adjacency(cuG) + cu_pdf = cu_pdf[sorted(cu_pdf.columns)] + cu_pdf.sort_index(inplace=True) + + # Compare pandas adjacency list + assert nx_pdf.equals(cu_pdf) + + # Convert pandas adjacency list to graph + new_nxG = nx.from_pandas_adjacency(nx_pdf, create_using=nx.DiGraph) + new_cuG = cugraph.from_pandas_adjacency(cu_pdf, + create_using=cugraph.DiGraph) + + # Compare pandas edgelist + exp_pdf = nx.to_pandas_edgelist(new_nxG) + res_pdf = cugraph.to_pandas_edgelist(new_cuG) + + exp_pdf = exp_pdf.rename(columns={"source": "src", "target": "dst", + "weight": "weights"}) + + exp_pdf = exp_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf[['src', 'dst', 'weights']] + + assert exp_pdf.equals(res_pdf) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_from_to_numpy(graph_file): + gc.collect() + + # Read in the graph + M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + + # create NetworkX and cugraph DiGraph + nxG = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph + ) + + cuG = cugraph.from_pandas_edgelist( + M, source="0", destination="1", edge_attr="weight", + create_using=cugraph.DiGraph + ) + + # convert graphs to numpy array + nparray_nx = nx.to_numpy_array(nxG, + nodelist=cuG.nodes().values_host) + nparray_cu = cugraph.to_numpy_array(cuG) + npmatrix_nx = nx.to_numpy_matrix(nxG, + nodelist=cuG.nodes().values_host) + npmatrix_cu = cugraph.to_numpy_matrix(cuG) + + # Compare arrays and matrices + assert np.array_equal(nparray_nx, nparray_cu) + assert np.array_equal(np.asarray(npmatrix_nx), + np.asarray(npmatrix_cu)) + + # Create graphs from numpy array + new_nxG = nx.from_numpy_array(nparray_nx, + create_using=nx.DiGraph) + new_cuG = cugraph.from_numpy_array(nparray_cu, + create_using=cugraph.DiGraph) + + # Assert graphs are same + exp_pdf = nx.to_pandas_edgelist(new_nxG) + res_pdf = cugraph.to_pandas_edgelist(new_cuG) + + exp_pdf = exp_pdf.rename(columns={"source": "src", "target": "dst", + "weight": "weights"}) + + exp_pdf = exp_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf[['src', 'dst', 'weights']] + + assert exp_pdf.equals(res_pdf) + + # Create graphs from numpy matrix + new_nxG = nx.from_numpy_matrix(npmatrix_nx, + create_using=nx.DiGraph) + new_cuG = cugraph.from_numpy_matrix(npmatrix_cu, + create_using=cugraph.DiGraph) + + # Assert graphs are same + exp_pdf = nx.to_pandas_edgelist(new_nxG) + res_pdf = cugraph.to_pandas_edgelist(new_cuG) + + exp_pdf = exp_pdf.rename(columns={"source": "src", "target": "dst", + "weight": "weights"}) + + exp_pdf = exp_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf.sort_values(by=["src", "dst"]).reset_index(drop=True) + res_pdf = res_pdf[['src', 'dst', 'weights']] + + assert exp_pdf.equals(res_pdf) From 5ef3a2378155897d411511e6f77df94b838972b9 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Fri, 30 Oct 2020 11:04:31 -0500 Subject: [PATCH 26/41] add docs --- python/cugraph/structure/convert_matrix.py | 93 ++++++++++++++++++++++ python/cugraph/structure/graph.py | 42 +++++++++- 2 files changed, 133 insertions(+), 2 deletions(-) diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py index e749d46e48e..8acdc7e1799 100644 --- a/python/cugraph/structure/convert_matrix.py +++ b/python/cugraph/structure/convert_matrix.py @@ -66,12 +66,55 @@ def from_cudf_edgelist(df, source='source', destination='destination', return G + def from_pandas_edgelist(df, source="source", destination="destination", edge_attr=None, create_using=Graph, renumber=True): + """ + Initialize a graph from the edge list. It is an error to call this + method on an initialized Graph object. Source argument is source + column name and destination argument is destination column name. + + By default, renumbering is enabled to map the source and destination + vertices into an index in the range [0, V) where V is the number + of vertices. If the input vertices are a single column of integers + in the range [0, V), renumbering can be disabled and the original + external vertex ids will be used. + + If weights are present, edge_attr argument is the weights column name. + + Parameters + ---------- + input_df : pandas.DataFrame + A DataFrame that contains edge information + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + edge_attr : str or None + the weights column name. Default is None + renumber : bool + Indicate whether or not to renumber the source and destination + vertex IDs. Default is True. + create_using: cugraph.DiGraph or cugraph.Graph + Indicate whether to create a directed or undirected graph + + Returns + ------- + G : cugraph.DiGraph or cugraph.Graph + graph containing edges from the pandas edgelist + + Examples + -------- + >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_pandas_edgelist(df, source='0', destination='1', + edge_attr='2', renumber=False) + """ if create_using is Graph: G = Graph() elif create_using is DiGraph: @@ -83,11 +126,36 @@ def from_pandas_edgelist(df, edge_attr=edge_attr, renumber=renumber) return G + def to_pandas_edgelist(G, source='source', destination='destination'): + """ + Returns the graph edge list as a Pandas DataFrame. + + Parameters + ---------- + G : cugraph.Graph or cugraph.DiGraph + Graph containg the edgelist. + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + + Returns + ------ + df : pandas.DataFrame + pandas dataframe containing the edgelist as source and + destination columns. + """ pdf = G.to_pandas_edgelist(source=source, destination=destination) return pdf + def from_pandas_adjacency(df, create_using=Graph): + """ + Initializes the graph from pandas adjacency matrix. + Set create_using to cugraph.DiGraph for directed graph and + cugraph.Graph for undirected Graph. + """ if create_using is Graph: G = Graph() elif create_using is DiGraph: @@ -98,11 +166,22 @@ def from_pandas_adjacency(df, create_using=Graph): G.from_pandas_adjacency(df) return G + def to_pandas_adjacency(G): + """ + Returns the graph adjacency matrix as a Pandas DataFrame. + The row indices denote source and column names denote destination. + """ pdf = G.to_pandas_adjacency() return pdf + def from_numpy_array(A, create_using=Graph): + """ + Initializes the graph from numpy array containing adjacency matrix. + Set create_using to cugraph.DiGraph for directed graph and + cugraph.Graph for undirected Graph. + """ if create_using is Graph: G = Graph() elif create_using is DiGraph: @@ -113,11 +192,21 @@ def from_numpy_array(A, create_using=Graph): G.from_numpy_array(A) return G + def to_numpy_array(G): + """ + Returns the graph adjacency matrix as a NumPy array. + """ A = G.to_numpy_array() return A + def from_numpy_matrix(A, create_using=Graph): + """ + Initializes the graph from numpy matrix containing adjacency matrix. + Set create_using to cugraph.DiGraph for directed graph and + cugraph.Graph for undirected Graph. + """ if create_using is Graph: G = Graph() elif create_using is DiGraph: @@ -127,6 +216,10 @@ def from_numpy_matrix(A, create_using=Graph): G.from_numpy_matrix(A) return G + def to_numpy_matrix(G): + """ + Returns the graph adjacency matrix as a NumPy matrix. + """ A = G.to_numpy_matrix() return A diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 956ab6cd059..748e240ad4c 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -481,37 +481,72 @@ def from_pandas_edgelist( edge_attr=edge_attr, renumber=renumber) def to_pandas_edgelist(self, source='source', destination='destination'): + """ + Returns the graph edge list as a Pandas DataFrame. + + Parameters + ---------- + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + + Returns + ------- + df : pandas.DataFrame + """ + gdf = self.view_edge_list() return gdf.to_pandas() def from_pandas_adjacency(self, pdf): + """ + Initializes the graph from pandas adjacency matrix + """ np_array = pdf.to_numpy() columns = pdf.columns self.from_numpy_array(np_array, columns) def to_pandas_adjacency(self): + """ + Returns the graph adjacency matrix as a Pandas DataFrame. + """ + np_array_data = self.to_numpy_array() pdf = pd.DataFrame(np_array_data) if self.renumbered: - nodes = self.renumber_map.implementation.df['0'].values_host.tolist() + nodes = self.renumber_map.implementation.df['0'].\ + values_host.tolist() pdf.columns = nodes pdf.index = nodes return pdf def to_numpy_array(self): + """ + Returns the graph adjacency matrix as a NumPy array. + """ + nlen = self.number_of_nodes() elen = self.number_of_edges() df = self.edgelist.edgelist_df np_array = np.full((nlen, nlen), 0.0) for i in range(0, elen): - np_array[df['src'].iloc[i],df['dst'].iloc[i]] = df['weights'].iloc[i] + np_array[df['src'].iloc[i], df['dst'].iloc[i]] = df['weights'].\ + iloc[i] return np_array def to_numpy_matrix(self): + """ + Returns the graph adjacency matrix as a NumPy matrix. + """ + np_array = self.to_numpy_array() return np.asmatrix(np_array) def from_numpy_array(self, np_array, nodes=None): + """ + Initializes the graph from numpy array containing adjacency matrix. + """ src, dst = np_array.nonzero() weight = np_array[src, dst] df = cudf.DataFrame() @@ -525,6 +560,9 @@ def from_numpy_array(self, np_array, nodes=None): self.from_cudf_edgelist(df, 'src', 'dst', edge_attr='weight') def from_numpy_matrix(self, np_matrix): + """ + Initializes the graph from numpy matrix containing adjacency matrix. + """ np_array = np.asarray(np_matrix) self.from_numpy_array(np_array) From e7dd8e4b3db54ab20fe7fb2d0b0b357688b7a916 Mon Sep 17 00:00:00 2001 From: Ishika Roy Date: Fri, 30 Oct 2020 11:06:14 -0500 Subject: [PATCH 27/41] add changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 938a2e9f805..11d9fbf1144 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # cuGraph 0.17.0 (Date TBD) ## New Features +- PR #1245 Add functions to add pandas and numpy compatibility ## Improvements From 3a4cfac829bb48b43f87ad6f45071d29b446ce9a Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 30 Oct 2020 11:38:08 -0500 Subject: [PATCH 28/41] Changed the MG context testing class to use updated parameters passed in from the individual tests. --- python/cugraph/tests/dask/mg_context.py | 16 ++++--- .../test_mg_batch_betweenness_centrality.py | 6 ++- ...st_mg_batch_edge_betweenness_centrality.py | 7 ++- .../cugraph/tests/dask/test_mg_replication.py | 45 ++++++++++++++----- 4 files changed, 52 insertions(+), 22 deletions(-) diff --git a/python/cugraph/tests/dask/mg_context.py b/python/cugraph/tests/dask/mg_context.py index 9a7ea2ace67..b4296459eab 100644 --- a/python/cugraph/tests/dask/mg_context.py +++ b/python/cugraph/tests/dask/mg_context.py @@ -54,11 +54,12 @@ class MGContext: Number of devices to use, verification must be done prior to call to ensure that there are enough devices available. """ - def __init__(self, number_of_devices=None, rmm_managed_memory=False): + def __init__(self, number_of_devices=None, rmm_managed_memory=False, cluster=None, p2p=False): self._number_of_devices = number_of_devices self._rmm_managed_memory = rmm_managed_memory - self._cluster = None + self._cluster = cluster self._client = None + self._p2p = p2p @property def client(self): @@ -78,17 +79,18 @@ def _prepare_mg(self): self._prepare_comms() def _prepare_cluster(self): - self._cluster = CUDACluster( - n_workers=self._number_of_devices, - rmm_managed_memory=self._rmm_managed_memory - ) + if self._cluster is not None: + self._cluster = CUDACluster( + n_workers=self._number_of_devices, + rmm_managed_memory=self._rmm_managed_memory + ) def _prepare_client(self): self._client = Client(self._cluster) self._client.wait_for_workers(self._number_of_devices) def _prepare_comms(self): - Comms.initialize() + Comms.initialize(p2p=self._p2p) def _close(self): Comms.destroy() diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index 53942a277c2..aede0650487 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -14,6 +14,8 @@ import pytest import numpy as np +from dask_cuda import LocalCUDACluster + from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices from cugraph.dask.common.mg_utils import is_single_gpu @@ -68,7 +70,9 @@ def test_mg_betweenness_centrality( ): prepare_test() skip_if_not_enough_devices(mg_device_count) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): sorted_df = calc_betweenness_centrality( graph_file, directed=directed, diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py index 7778f7bf421..f176342e8f1 100644 --- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py @@ -13,8 +13,9 @@ import pytest import numpy as np -from cugraph.dask.common.mg_utils import is_single_gpu +from dask_cuda import LocalCUDACluster +from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices @@ -64,7 +65,9 @@ def test_mg_edge_betweenness_centrality( ): prepare_test() skip_if_not_enough_devices(mg_device_count) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): sorted_df = calc_edge_betweenness_centrality( graph_file, directed=directed, diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index d8a2676b32b..0db981cc994 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -11,14 +11,17 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest +import gc + +from dask_cuda import LocalCUDACluster +import cudf + import cugraph from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices -import cudf import cugraph.dask.structure.replication as replication from cugraph.dask.common.mg_utils import is_single_gpu import cugraph.tests.utils as utils -import pytest -import gc DATASETS_OPTIONS = utils.DATASETS_SMALL DIRECTED_GRAPH_OPTIONS = [False, True] @@ -42,7 +45,9 @@ def test_replicate_cudf_dataframe_with_weights( names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() @@ -65,7 +70,9 @@ def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): names=["src", "dst"], dtype=["int32", "int32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() @@ -88,7 +95,9 @@ def test_replicate_cudf_series(input_data_path, mg_device_count): names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) @@ -146,7 +155,9 @@ def test_enable_batch_context_then_views( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -181,7 +192,9 @@ def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): G.view_transposed_adj_list() assert G.batch_transposed_adjlists is None - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -204,7 +217,9 @@ def test_enable_batch_context_no_context_views( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -228,7 +243,9 @@ def test_enable_batch_edgelist_replication( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): G.enable_batch() df = G.edgelist.edgelist_df for worker in G.batch_edgelists: @@ -257,7 +274,9 @@ def test_enable_batch_adjlist_replication_weights( G.from_cudf_edgelist( df, source="src", destination="dst", edge_attr="value" ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): G.enable_batch() G.view_adj_list() adjlist = G.adjlist @@ -296,7 +315,9 @@ def test_enable_batch_adjlist_replication_no_weights( ) G = cugraph.DiGraph() if directed else cugraph.Graph() G.from_cudf_edgelist(df, source="src", destination="dst") - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + cluster=LocalCUDACluster(), + p2p=True): G.enable_batch() G.view_adj_list() adjlist = G.adjlist From e04fee80e10c71492f8d55d16b7408cbfbd096aa Mon Sep 17 00:00:00 2001 From: Dillon Cullinan Date: Fri, 30 Oct 2020 12:38:42 -0400 Subject: [PATCH 29/41] FIX Fix upload script --- ci/cpu/upload.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ci/cpu/upload.sh b/ci/cpu/upload.sh index ff10072ee3c..0fca82216c3 100644 --- a/ci/cpu/upload.sh +++ b/ci/cpu/upload.sh @@ -38,14 +38,14 @@ export CUGRAPH_FILE=`conda build conda/recipes/cugraph --python=$PYTHON --output gpuci_logger "Starting conda uploads" -if [ "$BUILD_LIBCUGRAPH" == "1" && "$UPLOAD_LIBCUGRAPH" == "1" ]; then +if [[ "$BUILD_LIBCUGRAPH" == "1" && "$UPLOAD_LIBCUGRAPH" == "1" ]]; then test -e ${LIBCUGRAPH_FILE} echo "Upload libcugraph" echo ${LIBCUGRAPH_FILE} gpuci_retry anaconda -t ${MY_UPLOAD_KEY} upload -u ${CONDA_USERNAME:-rapidsai} ${LABEL_OPTION} --skip-existing ${LIBCUGRAPH_FILE} fi -if [ "$BUILD_CUGRAPH" == "1" && "$UPLOAD_CUGRAPH" == "1" ]; then +if [[ "$BUILD_CUGRAPH" == "1" && "$UPLOAD_CUGRAPH" == "1" ]]; then test -e ${CUGRAPH_FILE} echo "Upload cugraph" echo ${CUGRAPH_FILE} From 4feddc0f07925becc8b5e28987803dc465f27fdf Mon Sep 17 00:00:00 2001 From: Dillon Cullinan Date: Fri, 30 Oct 2020 12:40:47 -0400 Subject: [PATCH 30/41] DOC Update Changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 858e6acfe51..9fcde6add0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -67,7 +67,7 @@ - PR #1233 Temporarily disabling C++ tests for 0.16 - PR #1240 Require `ucx-proc=*=gpu` - PR #1241 Fix a bug in personalized PageRank with the new graph primitives API. - +- PR #1249 Fix upload script syntax # cuGraph 0.15.0 (26 Aug 2020) From c3373c28f8debe6320ffa73585714cd35de7956d Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 30 Oct 2020 11:50:19 -0500 Subject: [PATCH 31/41] Updated cuco commit hash to latest as of 2020-10-30. --- cpp/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7fb82258cc5..2be92159a1d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -225,7 +225,7 @@ message("Fetching cuco") FetchContent_Declare( cuco GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 729d07db2e544e173efefdd168db21f7b8adcfaf + GIT_TAG 5f94cdd3b3df0e5f79c47fb772497d6e42455414 GIT_SHALLOW true ) From d059e06d4428bd3136dff4d20f6556decd8686d7 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 30 Oct 2020 11:55:55 -0500 Subject: [PATCH 32/41] Added PR 1250 to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 858e6acfe51..52b95cd7890 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ## Bug Fixes - PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests +- PR #1250 Updated cuco commit hash to latest as of 2020-10-30 # cuGraph 0.16.0 (21 Oct 2020) From f3772bd2719caa5a97a3e8160b69cc6132427109 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 30 Oct 2020 14:56:59 -0500 Subject: [PATCH 33/41] Removing unneeded GIT_SHALLOW param for cuco since it's only needed for cloning branches or tags. --- cpp/CMakeLists.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2be92159a1d..ce1a1fae88a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -226,7 +226,6 @@ FetchContent_Declare( cuco GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git GIT_TAG 5f94cdd3b3df0e5f79c47fb772497d6e42455414 - GIT_SHALLOW true ) FetchContent_GetProperties(cuco) From 9a81c19d66680a50ea1fde9c654efa3519862e7c Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 30 Oct 2020 15:09:10 -0500 Subject: [PATCH 34/41] Update CHANGELOG.md entry for PR 1250 to be more accurate with recent change. --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 52b95cd7890..dd37212298c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,7 +7,7 @@ ## Bug Fixes - PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests -- PR #1250 Updated cuco commit hash to latest as of 2020-10-30 +- PR #1250 Updated cuco commit hash to latest as of 2020-10-30 and removed unneeded GIT_SHALLOW param # cuGraph 0.16.0 (21 Oct 2020) From bef5e0002863f9fa6abbbaa74041f9038fc64034 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 30 Oct 2020 15:41:02 -0500 Subject: [PATCH 35/41] Added PR 1251 to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index dd37212298c..53097760a61 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ## Bug Fixes - PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests - PR #1250 Updated cuco commit hash to latest as of 2020-10-30 and removed unneeded GIT_SHALLOW param +- PR #1251 Changed the MG context testing class to use updated parameters passed in from the individual tests # cuGraph 0.16.0 (21 Oct 2020) From cb040b425d7a107fcdcad800eafc08fa341f3227 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 30 Oct 2020 16:31:33 -0500 Subject: [PATCH 36/41] Refactored to eliminate the need for _prepare_cluster(), removed unneeded LocalCUDACluster arg. --- python/cugraph/tests/dask/mg_context.py | 25 ++++++++++--------- .../test_mg_batch_betweenness_centrality.py | 3 --- ...st_mg_batch_edge_betweenness_centrality.py | 2 -- .../cugraph/tests/dask/test_mg_replication.py | 10 -------- 4 files changed, 13 insertions(+), 27 deletions(-) diff --git a/python/cugraph/tests/dask/mg_context.py b/python/cugraph/tests/dask/mg_context.py index b4296459eab..5da28a8d4e5 100644 --- a/python/cugraph/tests/dask/mg_context.py +++ b/python/cugraph/tests/dask/mg_context.py @@ -51,15 +51,24 @@ class MGContext: ----------- number_of_devices : int - Number of devices to use, verification must be done prior to call - to ensure that there are enough devices available. + Number of devices to use, verification must be done prior to call to + ensure that there are enough devices available. If not specified, the + cluster will be initialized to use all visible devices. + rmm_managed_memory : bool + True to enable managed memory (UVM) in RMM as part of the + cluster. Default is False. + p2p : bool + Initialize UCX endpoints if True. Default is False. """ - def __init__(self, number_of_devices=None, rmm_managed_memory=False, cluster=None, p2p=False): + def __init__(self, number_of_devices=None, rmm_managed_memory=False, p2p=False): self._number_of_devices = number_of_devices self._rmm_managed_memory = rmm_managed_memory - self._cluster = cluster self._client = None self._p2p = p2p + self._cluster = CUDACluster( + n_workers=self._number_of_devices, + rmm_managed_memory=self._rmm_managed_memory + ) @property def client(self): @@ -74,17 +83,9 @@ def __enter__(self): return self def _prepare_mg(self): - self._prepare_cluster() self._prepare_client() self._prepare_comms() - def _prepare_cluster(self): - if self._cluster is not None: - self._cluster = CUDACluster( - n_workers=self._number_of_devices, - rmm_managed_memory=self._rmm_managed_memory - ) - def _prepare_client(self): self._client = Client(self._cluster) self._client.wait_for_workers(self._number_of_devices) diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index aede0650487..4d04bf6df85 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -14,8 +14,6 @@ import pytest import numpy as np -from dask_cuda import LocalCUDACluster - from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices from cugraph.dask.common.mg_utils import is_single_gpu @@ -71,7 +69,6 @@ def test_mg_betweenness_centrality( prepare_test() skip_if_not_enough_devices(mg_device_count) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): sorted_df = calc_betweenness_centrality( graph_file, diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py index f176342e8f1..1e4a1950c53 100644 --- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py @@ -13,7 +13,6 @@ import pytest import numpy as np -from dask_cuda import LocalCUDACluster from cugraph.dask.common.mg_utils import is_single_gpu @@ -66,7 +65,6 @@ def test_mg_edge_betweenness_centrality( prepare_test() skip_if_not_enough_devices(mg_device_count) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): sorted_df = calc_edge_betweenness_centrality( graph_file, diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index 0db981cc994..2b8510cd9ff 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -14,7 +14,6 @@ import pytest import gc -from dask_cuda import LocalCUDACluster import cudf import cugraph @@ -46,7 +45,6 @@ def test_replicate_cudf_dataframe_with_weights( dtype=["int32", "int32", "float32"], ) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: @@ -71,7 +69,6 @@ def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): dtype=["int32", "int32"], ) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: @@ -96,7 +93,6 @@ def test_replicate_cudf_series(input_data_path, mg_device_count): dtype=["int32", "int32", "float32"], ) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): for column in df.columns.values: series = df[column] @@ -156,7 +152,6 @@ def test_enable_batch_context_then_views( skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() @@ -193,7 +188,6 @@ def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): assert G.batch_transposed_adjlists is None with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() @@ -218,7 +212,6 @@ def test_enable_batch_context_no_context_views( skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() @@ -244,7 +237,6 @@ def test_enable_batch_edgelist_replication( skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): G.enable_batch() df = G.edgelist.edgelist_df @@ -275,7 +267,6 @@ def test_enable_batch_adjlist_replication_weights( df, source="src", destination="dst", edge_attr="value" ) with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): G.enable_batch() G.view_adj_list() @@ -316,7 +307,6 @@ def test_enable_batch_adjlist_replication_no_weights( G = cugraph.DiGraph() if directed else cugraph.Graph() G.from_cudf_edgelist(df, source="src", destination="dst") with MGContext(number_of_devices=mg_device_count, - cluster=LocalCUDACluster(), p2p=True): G.enable_batch() G.view_adj_list() From 359a02b396deccc04573768f5676774668ae1d7c Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 30 Oct 2020 16:43:35 -0500 Subject: [PATCH 37/41] Fixed flake8 error. --- python/cugraph/tests/dask/mg_context.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cugraph/tests/dask/mg_context.py b/python/cugraph/tests/dask/mg_context.py index 5da28a8d4e5..a72cf1c4b04 100644 --- a/python/cugraph/tests/dask/mg_context.py +++ b/python/cugraph/tests/dask/mg_context.py @@ -60,7 +60,10 @@ class MGContext: p2p : bool Initialize UCX endpoints if True. Default is False. """ - def __init__(self, number_of_devices=None, rmm_managed_memory=False, p2p=False): + def __init__(self, + number_of_devices=None, + rmm_managed_memory=False, + p2p=False): self._number_of_devices = number_of_devices self._rmm_managed_memory = rmm_managed_memory self._client = None From edb6de714e07de6eefb30823531786876770709a Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Fri, 30 Oct 2020 18:39:26 -0500 Subject: [PATCH 38/41] [REVIEW] BUG Updated cuco commit hash to latest as of 2020-10-30 (#1250) * Updated cuco commit hash to latest as of 2020-10-30. * Added PR 1250 to CHANGELOG.md * Removing unneeded GIT_SHALLOW param for cuco since it's only needed for cloning branches or tags. * Update CHANGELOG.md entry for PR 1250 to be more accurate with recent change. --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9fcde6add0e..50bbf794b02 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ ## Bug Fixes - PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests +- PR #1250 Updated cuco commit hash to latest as of 2020-10-30 and removed unneeded GIT_SHALLOW param # cuGraph 0.16.0 (21 Oct 2020) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7fb82258cc5..ce1a1fae88a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -225,8 +225,7 @@ message("Fetching cuco") FetchContent_Declare( cuco GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 729d07db2e544e173efefdd168db21f7b8adcfaf - GIT_SHALLOW true + GIT_TAG 5f94cdd3b3df0e5f79c47fb772497d6e42455414 ) FetchContent_GetProperties(cuco) From 86538c22a9a206c6f5399ac8663caa1a94eec672 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Fri, 30 Oct 2020 23:35:08 -0500 Subject: [PATCH 39/41] [REVIEW] BUG Changed the MG context testing class to use updated parameters passed in from the individual tests (#1251) * Changed the MG context testing class to use updated parameters passed in from the individual tests. * Updated cuco commit hash to latest as of 2020-10-30. * Added PR 1250 to CHANGELOG.md * Removing unneeded GIT_SHALLOW param for cuco since it's only needed for cloning branches or tags. * Update CHANGELOG.md entry for PR 1250 to be more accurate with recent change. * Added PR 1251 to CHANGELOG.md * Refactored to eliminate the need for _prepare_cluster(), removed unneeded LocalCUDACluster arg. * Fixed flake8 error. --- CHANGELOG.md | 1 + python/cugraph/tests/dask/mg_context.py | 30 +++++++++------- .../test_mg_batch_betweenness_centrality.py | 3 +- ...st_mg_batch_edge_betweenness_centrality.py | 5 +-- .../cugraph/tests/dask/test_mg_replication.py | 35 ++++++++++++------- 5 files changed, 47 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 50bbf794b02..540c1fed3af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ ## Bug Fixes - PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests - PR #1250 Updated cuco commit hash to latest as of 2020-10-30 and removed unneeded GIT_SHALLOW param +- PR #1251 Changed the MG context testing class to use updated parameters passed in from the individual tests # cuGraph 0.16.0 (21 Oct 2020) diff --git a/python/cugraph/tests/dask/mg_context.py b/python/cugraph/tests/dask/mg_context.py index 9a7ea2ace67..a72cf1c4b04 100644 --- a/python/cugraph/tests/dask/mg_context.py +++ b/python/cugraph/tests/dask/mg_context.py @@ -51,14 +51,27 @@ class MGContext: ----------- number_of_devices : int - Number of devices to use, verification must be done prior to call - to ensure that there are enough devices available. + Number of devices to use, verification must be done prior to call to + ensure that there are enough devices available. If not specified, the + cluster will be initialized to use all visible devices. + rmm_managed_memory : bool + True to enable managed memory (UVM) in RMM as part of the + cluster. Default is False. + p2p : bool + Initialize UCX endpoints if True. Default is False. """ - def __init__(self, number_of_devices=None, rmm_managed_memory=False): + def __init__(self, + number_of_devices=None, + rmm_managed_memory=False, + p2p=False): self._number_of_devices = number_of_devices self._rmm_managed_memory = rmm_managed_memory - self._cluster = None self._client = None + self._p2p = p2p + self._cluster = CUDACluster( + n_workers=self._number_of_devices, + rmm_managed_memory=self._rmm_managed_memory + ) @property def client(self): @@ -73,22 +86,15 @@ def __enter__(self): return self def _prepare_mg(self): - self._prepare_cluster() self._prepare_client() self._prepare_comms() - def _prepare_cluster(self): - self._cluster = CUDACluster( - n_workers=self._number_of_devices, - rmm_managed_memory=self._rmm_managed_memory - ) - def _prepare_client(self): self._client = Client(self._cluster) self._client.wait_for_workers(self._number_of_devices) def _prepare_comms(self): - Comms.initialize() + Comms.initialize(p2p=self._p2p) def _close(self): Comms.destroy() diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index 53942a277c2..4d04bf6df85 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -68,7 +68,8 @@ def test_mg_betweenness_centrality( ): prepare_test() skip_if_not_enough_devices(mg_device_count) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): sorted_df = calc_betweenness_centrality( graph_file, directed=directed, diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py index 7778f7bf421..1e4a1950c53 100644 --- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py @@ -13,8 +13,8 @@ import pytest import numpy as np -from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices @@ -64,7 +64,8 @@ def test_mg_edge_betweenness_centrality( ): prepare_test() skip_if_not_enough_devices(mg_device_count) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): sorted_df = calc_edge_betweenness_centrality( graph_file, directed=directed, diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index d8a2676b32b..2b8510cd9ff 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -11,14 +11,16 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pytest +import gc + +import cudf + import cugraph from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices -import cudf import cugraph.dask.structure.replication as replication from cugraph.dask.common.mg_utils import is_single_gpu import cugraph.tests.utils as utils -import pytest -import gc DATASETS_OPTIONS = utils.DATASETS_SMALL DIRECTED_GRAPH_OPTIONS = [False, True] @@ -42,7 +44,8 @@ def test_replicate_cudf_dataframe_with_weights( names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() @@ -65,7 +68,8 @@ def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): names=["src", "dst"], dtype=["int32", "int32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() @@ -88,7 +92,8 @@ def test_replicate_cudf_series(input_data_path, mg_device_count): names=["src", "dst", "value"], dtype=["int32", "int32", "float32"], ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) @@ -146,7 +151,8 @@ def test_enable_batch_context_then_views( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -181,7 +187,8 @@ def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): G.view_transposed_adj_list() assert G.batch_transposed_adjlists is None - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -204,7 +211,8 @@ def test_enable_batch_context_no_context_views( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" @@ -228,7 +236,8 @@ def test_enable_batch_edgelist_replication( gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): G.enable_batch() df = G.edgelist.edgelist_df for worker in G.batch_edgelists: @@ -257,7 +266,8 @@ def test_enable_batch_adjlist_replication_weights( G.from_cudf_edgelist( df, source="src", destination="dst", edge_attr="value" ) - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): G.enable_batch() G.view_adj_list() adjlist = G.adjlist @@ -296,7 +306,8 @@ def test_enable_batch_adjlist_replication_no_weights( ) G = cugraph.DiGraph() if directed else cugraph.Graph() G.from_cudf_edgelist(df, source="src", destination="dst") - with MGContext(mg_device_count): + with MGContext(number_of_devices=mg_device_count, + p2p=True): G.enable_batch() G.view_adj_list() adjlist = G.adjlist From e3dafa50d5d3553017a36942d3254843b01c14d1 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Tue, 3 Nov 2020 17:51:44 -0600 Subject: [PATCH 40/41] Further updates to get MG tests to pass: set p2p option on comms initialize(), called .compute() on dask dataframes to get a DataFrame to compare against. --- python/cugraph/tests/dask/test_mg_comms.py | 6 +++--- python/cugraph/tests/dask/test_mg_degree.py | 2 +- python/cugraph/tests/dask/test_mg_renumber.py | 4 ++-- python/cugraph/tests/dask/test_mg_utility.py | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/python/cugraph/tests/dask/test_mg_comms.py b/python/cugraph/tests/dask/test_mg_comms.py index cd94f945f93..29789461018 100644 --- a/python/cugraph/tests/dask/test_mg_comms.py +++ b/python/cugraph/tests/dask/test_mg_comms.py @@ -27,7 +27,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client @@ -62,7 +62,7 @@ def test_dask_pagerank(client_connection): dg1 = cugraph.DiGraph() dg1.from_dask_cudf_edgelist(ddf1, "src", "dst") - result_pr1 = dcg.pagerank(dg1) + result_pr1 = dcg.pagerank(dg1).compute() ddf2 = dask_cudf.read_csv( input_data_path2, @@ -75,7 +75,7 @@ def test_dask_pagerank(client_connection): dg2 = cugraph.DiGraph() dg2.from_dask_cudf_edgelist(ddf2, "src", "dst") - result_pr2 = dcg.pagerank(dg2) + result_pr2 = dcg.pagerank(dg2).compute() # Calculate single GPU pagerank for verification of results df1 = cudf.read_csv( diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py index a903f69d05a..a6600104bc8 100644 --- a/python/cugraph/tests/dask/test_mg_degree.py +++ b/python/cugraph/tests/dask/test_mg_degree.py @@ -28,7 +28,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py index b981a49a0de..8456241ff26 100644 --- a/python/cugraph/tests/dask/test_mg_renumber.py +++ b/python/cugraph/tests/dask/test_mg_renumber.py @@ -36,7 +36,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client @@ -199,7 +199,7 @@ def test_dask_pagerank(client_connection): # dg.compute_local_data(by='dst') expected_pr = cugraph.pagerank(g) - result_pr = dcg.pagerank(dg) + result_pr = dcg.pagerank(dg).compute() err = 0 tol = 1.0e-05 diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index f1becb051ad..e802a65c37f 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -32,7 +32,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client From 9335e0955f1ec8d85b7e0045aca7d0a247ca83ac Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Tue, 3 Nov 2020 19:13:37 -0600 Subject: [PATCH 41/41] Added PR 1253 to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 540c1fed3af..152f696868a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - PR #1242 Calling gunrock cmake using explicit -D options, re-enabling C++ tests - PR #1250 Updated cuco commit hash to latest as of 2020-10-30 and removed unneeded GIT_SHALLOW param - PR #1251 Changed the MG context testing class to use updated parameters passed in from the individual tests +- PR #1253 MG test fixes: updated additional comms.initialize() calls, fixed dask DataFrame comparisons # cuGraph 0.16.0 (21 Oct 2020)