diff --git a/CHANGELOG.md b/CHANGELOG.md index 4170e9c4bc0..70cd0a32e50 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,10 @@ -# cuGraph 0.20.0 (Date TBD) +# cuGraph 21.08.00 (Date TBD) -Please see https://github.com/rapidsai/cugraph/releases/tag/v0.20.0a for the latest changes to this development branch. +Please see https://github.com/rapidsai/cugraph/releases/tag/v21.08.00a for the latest changes to this development branch. + +# cuGraph 21.06.00 (Date TBD) + +Please see https://github.com/rapidsai/cugraph/releases/tag/v21.06.00a for the latest changes to this development branch. # cuGraph 0.19.0 (21 Apr 2021) diff --git a/build.sh b/build.sh index 7c99b27f632..8437a32bf94 100755 --- a/build.sh +++ b/build.sh @@ -19,10 +19,11 @@ ARGS=$* REPODIR=$(cd $(dirname $0); pwd) LIBCUGRAPH_BUILD_DIR=${LIBCUGRAPH_BUILD_DIR:=${REPODIR}/cpp/build} -VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" +VALIDARGS="clean uninstall libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) + uninstall - uninstall libcugraph and cugraph from a prior build/install (see also -n) libcugraph - build the cugraph C++ code cugraph - build the cugraph Python package cpp-mgtests - build libcugraph mnmg tests. Builds MPI communicator, adding MPI as a dependency. @@ -30,7 +31,7 @@ HELP="$0 [ ...] [ ...] and is: -v - verbose build mode -g - build for debug - -n - no install step + -n - do not install after a successful build --allgpuarch - build for all supported GPU architectures --buildfaiss - build faiss statically into cugraph --show_depr_warn - show cmake deprecation warnings @@ -52,7 +53,7 @@ INSTALL_TARGET=install BUILD_DISABLE_DEPRECATION_WARNING=ON BUILD_CPP_MG_TESTS=OFF BUILD_STATIC_FAISS=OFF -GPU_ARCH="" +BUILD_ALL_GPU_ARCH=0 # Set defaults for vars that may not have been defined externally # FIXME: if PREFIX is not set, check CONDA_PREFIX, but there is no fallback @@ -95,7 +96,7 @@ if hasArg -n; then INSTALL_TARGET="" fi if hasArg --allgpuarch; then - GPU_ARCH="-DGPU_ARCHS=ALL" + BUILD_ALL_GPU_ARCH=1 fi if hasArg --buildfaiss; then BUILD_STATIC_FAISS=ON @@ -107,12 +108,36 @@ if hasArg cpp-mgtests; then BUILD_CPP_MG_TESTS=ON fi -# If clean given, run it prior to any other steps +# If clean or uninstall given, run them prior to any other steps +if hasArg uninstall; then + # uninstall libcugraph + if [[ "$INSTALL_PREFIX" != "" ]]; then + rm -rf ${INSTALL_PREFIX}/include/cugraph + rm -f ${INSTALL_PREFIX}/lib/libcugraph.so + fi + # This may be redundant given the above, but can also be used in case + # there are other installed files outside of the locations above. + if [ -e ${LIBCUGRAPH_BUILD_DIR}/install_manifest.txt ]; then + xargs rm -f < ${LIBCUGRAPH_BUILD_DIR}/install_manifest.txt > /dev/null 2>&1 + fi + # uninstall cugraph installed from a prior "setup.py install" + pip uninstall -y cugraph +fi + if hasArg clean; then - # FIXME: ideally the "setup.py clean" command below would also be run to - # remove all the "inplace" python build artifacts, but currently, running - # any setup.py command has side effects (eg. cloning repos). - #(cd ${REPODIR}/python && python setup.py clean) + # remove artifacts generated inplace + # FIXME: ideally the "setup.py clean" command would be used for this, but + # currently running any setup.py command has side effects (eg. cloning + # repos). + # (cd ${REPODIR}/python && python setup.py clean) + if [[ -d ${REPODIR}/python ]]; then + pushd ${REPODIR}/python > /dev/null + rm -rf dist dask-worker-space cugraph/raft *.egg-info + find . -name "__pycache__" -type d -exec rm -rf {} \; > /dev/null 2>&1 + find . -name "*.cpp" -type f -delete + find . -name "*.cpython*.so" -type f -delete + popd > /dev/null + fi # If the dirs to clean are mounted dirs in a container, the contents should # be removed but the mounted dirs will remain. The find removes all @@ -129,15 +154,17 @@ fi ################################################################################ # Configure, build, and install libcugraph if buildAll || hasArg libcugraph; then - if [[ ${GPU_ARCH} == "" ]]; then + if (( ${BUILD_ALL_GPU_ARCH} == 0 )); then + CUGRAPH_CMAKE_CUDA_ARCHITECTURES="NATIVE" echo "Building for the architecture of the GPU in the system..." else + CUGRAPH_CMAKE_CUDA_ARCHITECTURES="ALL" echo "Building for *ALL* supported GPU architectures..." fi mkdir -p ${LIBCUGRAPH_BUILD_DIR} cd ${LIBCUGRAPH_BUILD_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ - ${GPU_ARCH} \ + -DCMAKE_CUDA_ARCHITECTURES=${CUGRAPH_CMAKE_CUDA_ARCHITECTURES} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} \ diff --git a/ci/benchmark/build.sh b/ci/benchmark/build.sh index d48f475f2eb..f5cedae5e29 100644 --- a/ci/benchmark/build.sh +++ b/ci/benchmark/build.sh @@ -74,7 +74,7 @@ gpuci_conda_retry install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-for "cudatoolkit=$CUDA_REL" \ "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ - "ucx-py=${MINOR_VERSION}" \ + "ucx-py=0.20.*" \ "ucx-proc=*=gpu" \ "rapids-build-env=${MINOR_VERSION}" \ rapids-pytest-benchmark diff --git a/ci/docs/build.sh b/ci/docs/build.sh index 279faa6a61d..2135ff04b45 100644 --- a/ci/docs/build.sh +++ b/ci/docs/build.sh @@ -15,7 +15,6 @@ export PATH=/conda/bin:/usr/local/cuda/bin:$PATH export HOME=$WORKSPACE export PROJECT_WORKSPACE=/rapids/cugraph export LIBCUDF_KERNEL_CACHE_PATH="$HOME/.jitify-cache" -export NIGHTLY_VERSION=$(echo $BRANCH_VERSION | awk -F. '{print $2}') export PROJECTS=(cugraph libcugraph) gpuci_logger "Check environment" diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 30dc7373e15..7c9c353f23c 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -62,7 +62,7 @@ gpuci_conda_retry install -y \ "cudatoolkit=$CUDA_REL" \ "dask-cudf=${MINOR_VERSION}" \ "dask-cuda=${MINOR_VERSION}" \ - "ucx-py=${MINOR_VERSION}" \ + "ucx-py=0.20.*" \ "ucx-proc=*=gpu" \ "rapids-build-env=$MINOR_VERSION.*" \ "rapids-notebook-env=$MINOR_VERSION.*" \ diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh index 7cd0d9720fc..ce681bad378 100755 --- a/ci/release/update-version.sh +++ b/ci/release/update-version.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,42 +13,25 @@ # limitations under the License. ## Usage -# bash update-version.sh -# where is either `major`, `minor`, `patch` +# bash update-version.sh -set -e -# Grab argument for release type -RELEASE_TYPE=$1 +# Format is YY.MM.PP - no leading 'v' or trailing 'a' +NEXT_FULL_TAG=$1 -# Get current version and calculate next versions -CURRENT_TAG=`git tag | grep -xE 'v[0-9\.]+' | sort --version-sort | tail -n 1 | tr -d 'v'` -CURRENT_MAJOR=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}'` -CURRENT_MINOR=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}'` -CURRENT_PATCH=`echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}'` +# Get current version +CURRENT_TAG=$(git tag --merged HEAD | grep -xE '^v.*' | sort --version-sort | tail -n 1 | tr -d 'v') +CURRENT_MAJOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[1]}') +CURRENT_MINOR=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[2]}') +CURRENT_PATCH=$(echo $CURRENT_TAG | awk '{split($0, a, "."); print a[3]}') CURRENT_SHORT_TAG=${CURRENT_MAJOR}.${CURRENT_MINOR} -NEXT_MAJOR=$((CURRENT_MAJOR + 1)) -NEXT_MINOR=$((CURRENT_MINOR + 1)) -NEXT_PATCH=$((CURRENT_PATCH + 1)) -NEXT_FULL_TAG="" -NEXT_SHORT_TAG="" -# Determine release type -if [ "$RELEASE_TYPE" == "major" ]; then - NEXT_FULL_TAG="${NEXT_MAJOR}.0.0" - NEXT_SHORT_TAG="${NEXT_MAJOR}.0" -elif [ "$RELEASE_TYPE" == "minor" ]; then - NEXT_FULL_TAG="${CURRENT_MAJOR}.${NEXT_MINOR}.0" - NEXT_SHORT_TAG="${CURRENT_MAJOR}.${NEXT_MINOR}" -elif [ "$RELEASE_TYPE" == "patch" ]; then - NEXT_FULL_TAG="${CURRENT_MAJOR}.${CURRENT_MINOR}.${NEXT_PATCH}" - NEXT_SHORT_TAG="${CURRENT_MAJOR}.${CURRENT_MINOR}" -else - echo "Incorrect release type; use 'major', 'minor', or 'patch' as an argument" - exit 1 -fi +#Get . for next version +NEXT_MAJOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[1]}') +NEXT_MINOR=$(echo $NEXT_FULL_TAG | awk '{split($0, a, "."); print a[2]}') +NEXT_SHORT_TAG=${NEXT_MAJOR}.${NEXT_MINOR} -echo "Preparing '$RELEASE_TYPE' release [$CURRENT_TAG -> $NEXT_FULL_TAG]" +echo "Preparing release $CURRENT_TAG => $NEXT_FULL_TAG" # Inplace sed replace; workaround for Linux and Mac function sed_runner() { @@ -67,6 +50,5 @@ for FILE in conda/environments/*.yml; do sed_runner "s/rmm=${CURRENT_SHORT_TAG}/rmm=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cuda=${CURRENT_SHORT_TAG}/dask-cuda=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/dask-cudf=${CURRENT_SHORT_TAG}/dask-cudf=${NEXT_SHORT_TAG}/g" ${FILE}; - sed_runner "s/ucx-py=${CURRENT_SHORT_TAG}/ucx-py=${NEXT_SHORT_TAG}/g" ${FILE}; sed_runner "s/cuxfilter=${CURRENT_SHORT_TAG}/cuxfilter=${NEXT_SHORT_TAG}/g" ${FILE}; -done +done \ No newline at end of file diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 20d56b281d2..7cf456aab97 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -6,22 +6,22 @@ channels: - conda-forge dependencies: - cudatoolkit=11.0 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.08.* +- libcudf=21.08.* +- rmm=21.08.* +- librmm=21.08.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* -- nccl>=2.8.4 -- ucx-py=0.20* +- dask-cuda=21.08.* +- dask-cudf=21.08.* +- nccl>=2.9.9 +- ucx-py=0.21.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.18 +- cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/conda/environments/cugraph_dev_cuda11.1.yml b/conda/environments/cugraph_dev_cuda11.1.yml index 0eba2baccaa..5d6837c1f84 100644 --- a/conda/environments/cugraph_dev_cuda11.1.yml +++ b/conda/environments/cugraph_dev_cuda11.1.yml @@ -6,22 +6,22 @@ channels: - conda-forge dependencies: - cudatoolkit=11.1 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.08.* +- libcudf=21.08.* +- rmm=21.08.* +- librmm=21.08.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* -- nccl>=2.8.4 -- ucx-py=0.20* +- dask-cuda=21.08.* +- dask-cudf=21.08.* +- nccl>=2.9.9 +- ucx-py=0.21.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.18 +- cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/conda/environments/cugraph_dev_cuda11.2.yml b/conda/environments/cugraph_dev_cuda11.2.yml index 55f6ad75cec..8e2f5d9158b 100644 --- a/conda/environments/cugraph_dev_cuda11.2.yml +++ b/conda/environments/cugraph_dev_cuda11.2.yml @@ -6,22 +6,22 @@ channels: - conda-forge dependencies: - cudatoolkit=11.2 -- cudf=0.20.* -- libcudf=0.20.* -- rmm=0.20.* -- librmm=0.20.* +- cudf=21.08.* +- libcudf=21.08.* +- rmm=21.08.* +- librmm=21.08.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.20* -- dask-cudf=0.20* -- nccl>=2.8.4 -- ucx-py=0.20* +- dask-cuda=21.08.* +- dask-cudf=21.08.* +- nccl>=2.9.9 +- ucx-py=0.21.* - ucx-proc=*=gpu - scipy - networkx>=2.5.1 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.18 +- cmake>=3.20.1 - python>=3.6,<3.9 - notebook>=0.5.0 - boost diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index c687e57b74f..ef229c43179 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -27,7 +27,7 @@ requirements: - cython>=0.29,<0.30 - libcugraph={{ version }} - cudf={{ minor_version }} - - ucx-py {{ minor_version }} + - ucx-py 0.21 - ucx-proc=*=gpu run: - python x.x @@ -37,8 +37,7 @@ requirements: - dask-cuda {{ minor_version }} - dask>=2.12.0 - distributed>=2.12.0 - - nccl>=2.8.4 - - ucx-py {{ minor_version }} + - ucx-py 0.21 - ucx-proc=*=gpu #test: diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 71b22c8cf1b..144eb85ce9e 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -31,11 +31,11 @@ build: requirements: build: - - cmake>=3.12.4 + - cmake>=3.20.1 - cudatoolkit {{ cuda_version }}.* - librmm {{ minor_version }}.* - boost-cpp>=1.66 - - nccl>=2.8.4 + - nccl>=2.9.9 - ucx-proc=*=gpu - gtest - gmock @@ -43,7 +43,7 @@ requirements: - conda-forge::libfaiss=1.7.0 run: - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - - nccl>=2.8.4 + - nccl>=2.9.9 - ucx-proc=*=gpu - faiss-proc=*=cuda - conda-forge::libfaiss=1.7.0 diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 6b638441a5b..5d272bfe7f6 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -14,9 +14,46 @@ # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) +cmake_minimum_required(VERSION 3.20.1 FATAL_ERROR) +include(FetchContent) +FetchContent_Declare( + rapids-cmake + GIT_REPOSITORY https://github.com/rapidsai/rapids-cmake.git + GIT_TAG origin/branch-21.06 + ) +FetchContent_MakeAvailable(rapids-cmake) + +include(rapids-cmake) +include(rapids-cpm) +include(rapids-cuda) +include(rapids-export) +include(rapids-find) + +rapids_cuda_init_architectures(CUGRAPH) + +project(CUGRAPH VERSION 21.08.00 LANGUAGES C CXX CUDA) + +# Remove the following archs from CMAKE_CUDA_ARCHITECTURES that +# cuhornet currently doesn't support +# +# < 60 +# >= 86 +set(supported_archs "60" "62" "70" "72" "75" "80") +foreach( arch IN LISTS CMAKE_CUDA_ARCHITECTURES) + string(REPLACE "-real" "" arch ${arch}) + if( arch IN_LIST supported_archs ) + list(APPEND usable_arch_values ${arch}) + endif() +endforeach() +# Make sure everything but the 'newest' arch +# is marked as `-real` so we only generate PTX for +# arch > 80 +list(POP_BACK usable_arch_values latest_arch) +list(TRANSFORM usable_arch_values APPEND "-real") +list(APPEND usable_arch_values ${latest_arch}) + +set(CMAKE_CUDA_ARCHITECTURES ${usable_arch_values}) -project(CUGRAPH VERSION 0.20.0 LANGUAGES C CXX CUDA) # Write the version header include(cmake/Modules/Version.cmake) @@ -26,147 +63,59 @@ write_version() # - build type ------------------------------------------------------------------------------------ # Set a default build type if none was specified -set(DEFAULT_BUILD_TYPE "Release") - -if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) - message(STATUS "Setting build type to '${DEFAULT_BUILD_TYPE}' since none specified.") - set(CMAKE_BUILD_TYPE "${DEFAULT_BUILD_TYPE}" CACHE - STRING "Choose the type of build." FORCE) - # Set the possible values of build type for cmake-gui - set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS - "Debug" "Release" "MinSizeRel" "RelWithDebInfo") -endif() +rapids_cmake_build_type(Release) ############################################################################## # - User Options ------------------------------------------------------------ option(BUILD_CUGRAPH_MG_TESTS "Build cuGraph multigpu algorithm tests" OFF) - -################################################################################################### -# - user options ------------------------------------------------------------------------------ - -set(BLAS_LIBRARIES "" CACHE STRING - "Location of BLAS library for FAISS build.") +set(BLAS_LIBRARIES "" CACHE STRING "Location of BLAS library for FAISS build.") option(BUILD_STATIC_FAISS "Build the FAISS library for nearest neighbors search on GPU" OFF) +option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) +option(BUILD_TESTS "Configure CMake to build tests" ON) ################################################################################################### # - compiler options ------------------------------------------------------------------------------ -set(CMAKE_CXX_STANDARD 17) -set(CMAKE_C_COMPILER $ENV{CC}) -set(CMAKE_CXX_COMPILER $ENV{CXX}) -set(CMAKE_CXX_STANDARD_REQUIRED ON) +rapids_find_package(CUDAToolkit REQUIRED + BUILD_EXPORT_SET cugraph-exports + INSTALL_EXPORT_SET cugraph-exports + ) -set(CMAKE_CUDA_STANDARD 17) -set(CMAKE_CUDA_STANDARD_REQUIRED ON) +set(CUGRAPH_CXX_FLAGS "") +set(CUGRAPH_CUDA_FLAGS "") if(CMAKE_COMPILER_IS_GNUCXX) - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations") + list(APPEND CUGRAPH_CXX_FLAGS -Werror -Wno-error=deprecated-declarations) endif(CMAKE_COMPILER_IS_GNUCXX) -find_package(CUDA) - -# Configure GPU arch to build -set(GUNROCK_GENCODE_SM60 "OFF") -set(GUNROCK_GENCODE_SM61 "OFF") -set(GUNROCK_GENCODE_SM70 "OFF") -set(GUNROCK_GENCODE_SM72 "OFF") -set(GUNROCK_GENCODE_SM75 "OFF") -set(GUNROCK_GENCODE_SM80 "OFF") - -# ARCHS handling: -# -if("${GPU_ARCHS}" STREQUAL "") - include(cmake/EvalGpuArchs.cmake) - evaluate_gpu_archs(GPU_ARCHS) -endif() - -# CUDA 11 onwards cub ships with CTK -if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11)) - set(CUB_IS_PART_OF_CTK ON) -else() - set(CUB_IS_PART_OF_CTK OFF) -endif() +message("-- Building for GPU_ARCHS = ${CMAKE_CUDA_ARCHITECTURES}") -if("${GPU_ARCHS}" STREQUAL "ALL") - set(GPU_ARCHS "60") - if((CUDA_VERSION_MAJOR EQUAL 9) OR (CUDA_VERSION_MAJOR GREATER 9)) - set(GPU_ARCHS "${GPU_ARCHS};70") - endif() - if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10)) - set(GPU_ARCHS "${GPU_ARCHS};75") - endif() - if((CUDA_VERSION_MAJOR EQUAL 11) OR (CUDA_VERSION_MAJOR GREATER 11)) - set(GPU_ARCHS "${GPU_ARCHS};80") - endif() -endif() - -message("-- Building for GPU_ARCHS = ${GPU_ARCHS}") -foreach(arch ${GPU_ARCHS}) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${arch},code=sm_${arch}") - set(GUNROCK_GENCODE_SM${arch} "ON") - set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${arch},code=sm_${arch}") -endforeach() - -list(GET GPU_ARCHS -1 ptx) -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${ptx},code=compute_${ptx}") -set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${ptx},code=compute_${ptx}") - -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xfatbin=-compress-all") +list(APPEND CUGRAPH_CUDA_FLAGS --expt-extended-lambda --expt-relaxed-constexpr) +list(APPEND CUGRAPH_CUDA_FLAGS -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas=--disable-warnings) +list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=-Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable) +list(APPEND CUGRAPH_CUDA_FLAGS -Xfatbin=-compress-all) # Option to enable line info in CUDA device compilation to allow introspection when profiling / # memchecking -option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF) if (CMAKE_CUDA_LINEINFO) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -lineinfo") + list(APPEND CUGRAPH_CUDA_FLAGS -lineinfo) endif(CMAKE_CUDA_LINEINFO) # Debug options if(CMAKE_BUILD_TYPE MATCHES Debug) message(STATUS "Building with debugging flags") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -G -Xcompiler -rdynamic") + list(APPEND CUGRAPH_CUDA_FLAGS -G -Xcompiler=-rdynamic) endif(CMAKE_BUILD_TYPE MATCHES Debug) -# To apply RUNPATH to transitive dependencies (this is a temporary solution) -set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--disable-new-dtags") -set(CMAKE_EXE_LINKER_FLAGS "-Wl,--disable-new-dtags") - -option(BUILD_TESTS "Configure CMake to build tests" - ON) - -################################################################################################### -# - cmake modules --------------------------------------------------------------------------------- - -set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules/" ${CMAKE_MODULE_PATH}) - -include(FeatureSummary) -include(CheckIncludeFiles) -include(CheckLibraryExists) -if(BUILD_TESTS) - include(CTest) -endif(BUILD_TESTS) - -################################################################################################### -# - find boost ------------------------------------------------------------------------------------ - -find_package(Boost REQUIRED) -if(Boost_FOUND) - message(STATUS "Boost found in ${Boost_INCLUDE_DIRS}") -else() - message(FATAL_ERROR "Boost not found, please check your settings.") -endif(Boost_FOUND) - ################################################################################################### # - find openmp ----------------------------------------------------------------------------------- find_package(OpenMP) if(OpenMP_FOUND) # find_package(OPenMP) does not automatically add OpenMP flags to CUDA - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler=${OpenMP_CXX_FLAGS}") + list(APPEND CUGRAPH_CUDA_FLAGS -Xcompiler=${OpenMP_CXX_FLAGS}) endif(OpenMP_FOUND) @@ -180,218 +129,28 @@ else() endif() ################################################################################################### -# - find gtest ------------------------------------------------------------------------------------ +# - find CPM based dependencies ------------------------------------------------------------------ -if(BUILD_TESTS) - find_package(GTest REQUIRED) -endif(BUILD_TESTS) -################################################################################################### -# - find RMM -------------------------------------------------------------------------------------- +rapids_cpm_init() -find_path(RMM_INCLUDE "rmm" - HINTS - "$ENV{RMM_ROOT}/include" - "$ENV{CONDA_PREFIX}/include/rmm" - "$ENV{CONDA_PREFIX}/include") -message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}") +include(cmake/thirdparty/get_thrust.cmake) +include(cmake/thirdparty/get_faiss.cmake) +include(cmake/thirdparty/get_nccl.cmake) +include(cmake/thirdparty/get_rmm.cmake) -################################################################################################### -# - find NCCL ------------------------------------------------------------------------------------- +include(cmake/thirdparty/get_raft.cmake) -if(NOT NCCL_PATH) - find_package(NCCL REQUIRED) -else() - message("-- Manually set NCCL PATH to ${NCCL_PATH}") - set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include) - set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so) -endif(NOT NCCL_PATH) - -################################################################################################### -# - find MPI - only enabled if MG tests are to be built +include(cmake/thirdparty/get_cuco.cmake) +include(cmake/thirdparty/get_cuhornet.cmake) -if(BUILD_CUGRAPH_MG_TESTS) - find_package(MPI REQUIRED) -endif(BUILD_CUGRAPH_MG_TESTS) +include(cmake/thirdparty/get_gunrock.cmake) -################################################################################################### -# - Fetch Content --------------------------------------------------------------------------------- -include(FetchContent) - -# - THRUST/CUB -message("Fetching Thrust") - -FetchContent_Declare( - thrust - GIT_REPOSITORY https://github.com/thrust/thrust.git - GIT_TAG 1.12.0 -) - -FetchContent_GetProperties(thrust) -if(NOT thrust_POPULATED) - FetchContent_Populate(thrust) - # We are not using the thrust CMake targets, so no need to call `add_subdirectory()`. -endif() -set(THRUST_INCLUDE_DIR "${thrust_SOURCE_DIR}") - -# - cuco -message("Fetching cuco") - -FetchContent_Declare( - cuco - GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG 7678a5ecaa192b8983b02a0191a140097171713e -) - -FetchContent_GetProperties(cuco) -if(NOT cuco_POPULATED) - FetchContent_Populate(cuco) -endif() -set(CUCO_INCLUDE_DIR "${cuco_SOURCE_DIR}/include") - -# - libcudacxx -# NOTE: This is necessary because libcudacxx is not supported in -# debian cuda 10.2 packages. Once 10.2 is deprecated -# we should not need this any longer. -message("Fetching libcudacxx") - -FetchContent_Declare( - libcudacxx - GIT_REPOSITORY https://github.com/NVIDIA/libcudacxx.git - GIT_TAG 1.3.0 - GIT_SHALLOW true -) - -FetchContent_GetProperties(libcudacxx) -if(NOT libcudacxx_POPULATED) - message("populating libcudacxx") - FetchContent_Populate(libcudacxx) -endif() -set(LIBCUDACXX_INCLUDE_DIR "${libcudacxx_SOURCE_DIR}/include") -message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") - -# - CUHORNET -FetchContent_Declare( - cuhornet - GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG 6d2fc894cc56dd2ca8fc9d1523a18a6ec444b663 - SOURCE_SUBDIR hornet -) - -FetchContent_GetProperties(cuhornet) -if(NOT cuhornet_POPULATED) - message("populating cuhornet") - FetchContent_Populate(cuhornet) - # We are not using the cuhornet CMake targets, so no need to call `add_subdirectory()`. +if(BUILD_TESTS) + include(cmake/thirdparty/get_gtest.cmake) endif() -set(CUHORNET_INCLUDE_DIR ${cuhornet_SOURCE_DIR} CACHE STRING "Path to cuhornet includes") - -# - raft - (header only) -# Only cloned if RAFT_PATH env variable is not defined -if(DEFINED ENV{RAFT_PATH}) - message(STATUS "RAFT_PATH environment variable detected.") - message(STATUS "RAFT_DIR set to $ENV{RAFT_PATH}") - set(RAFT_DIR "$ENV{RAFT_PATH}") - -else(DEFINED ENV{RAFT_PATH}) - message(STATUS "RAFT_PATH environment variable NOT detected, cloning RAFT") - - FetchContent_Declare( - raft - GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 66f82b4e79a3e268d0da3cc864ec7ce4ad065296 - SOURCE_SUBDIR raft - ) - - FetchContent_GetProperties(raft) - if(NOT raft_POPULATED) - message("populating raft") - FetchContent_Populate(raft) - # We are not using any raft CMake targets, so no need to call `add_subdirectory()`. - endif() - - set(RAFT_DIR "${raft_SOURCE_DIR}") -endif(DEFINED ENV{RAFT_PATH}) - -################################################################################################### -# - External Projects ----------------------------------------------------------------------------- - -# https://cmake.org/cmake/help/v3.0/module/ExternalProject.html - -# FIXME: gunrock is still using ExternalProject instead of -# FetchContent. Consider migrating to FetchContent soon (this may require -# updates to the gunrock cmake files to support this). - -include(ExternalProject) - -# - GUNROCK -set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo") -set(GUNROCK_INCLUDE_DIR ${GUNROCK_DIR}/src/gunrock_ext CACHE STRING "Path to gunrock includes") - -ExternalProject_Add(gunrock_ext - GIT_REPOSITORY https://github.com/gunrock/gunrock.git - GIT_TAG v1.2 - PREFIX ${GUNROCK_DIR} - CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= - -DGUNROCK_BUILD_SHARED_LIBS=OFF - -DGUNROCK_BUILD_TESTS=OFF - -DCUDA_AUTODETECT_GENCODE=OFF - -DGUNROCK_GENCODE_SM60=${GUNROCK_GENCODE_SM60} - -DGUNROCK_GENCODE_SM61=${GUNROCK_GENCODE_SM61} - -DGUNROCK_GENCODE_SM70=${GUNROCK_GENCODE_SM70} - -DGUNROCK_GENCODE_SM72=${GUNROCK_GENCODE_SM72} - -DGUNROCK_GENCODE_SM75=${GUNROCK_GENCODE_SM75} - -DGUNROCK_GENCODE_SM80=${GUNROCK_GENCODE_SM80} - ${GUNROCK_GENCODE} - BUILD_BYPRODUCTS ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a - INSTALL_COMMAND "" -) -add_library(gunrock STATIC IMPORTED) -add_dependencies(gunrock gunrock_ext) -set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a) - -# - FAISS -# FIXME: The commit currently being fetched from faiss is using autotools which -# is more convenient to build with ExternalProjectAdd. -# Consider migrating to FetchContent once the tagged commit is changed. - -if(BUILD_STATIC_FAISS) - set(FAISS_DIR ${CMAKE_CURRENT_BINARY_DIR}/faiss CACHE STRING - "Path to FAISS source directory") - ExternalProject_Add(faiss - GIT_REPOSITORY https://github.com/facebookresearch/faiss.git - GIT_TAG 7c2d2388a492d65fdda934c7e74ae87acaeed066 - CONFIGURE_COMMAND LIBS=-pthread - CPPFLAGS=-w - LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib - cmake -B build . - -DCMAKE_BUILD_TYPE=Release - -DBUILD_TESTING=OFF - -DFAISS_ENABLE_PYTHON=OFF - -DBUILD_SHARED_LIBS=OFF - -DFAISS_ENABLE_GPU=ON - -DCUDAToolkit_ROOT=${CUDA_TOOLKIT_ROOT_DIR} - -DCUDA_ARCHITECTURES=${FAISS_GPU_ARCHS} - -DBLAS_LIBRARIES=${BLAS_LIBRARIES} - PREFIX ${FAISS_DIR} - BUILD_COMMAND make -C build -j${PARALLEL_LEVEL} VERBOSE=1 - BUILD_BYPRODUCTS ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a - BUILD_ALWAYS 1 - INSTALL_COMMAND "" - UPDATE_COMMAND "" - BUILD_IN_SOURCE 1) - - ExternalProject_Get_Property(faiss install_dir) - add_library(FAISS::FAISS STATIC IMPORTED) - set_property(TARGET FAISS::FAISS PROPERTY - IMPORTED_LOCATION ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a) - set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src/faiss") -else() - set(FAISS_INSTALL_DIR ENV{FAISS_ROOT}) - find_package(FAISS REQUIRED) -endif(BUILD_STATIC_FAISS) ################################################################################################### # - library targets ------------------------------------------------------------------------------- @@ -400,6 +159,7 @@ add_library(cugraph SHARED src/utilities/spmv_1D.cu src/utilities/cython.cu src/utilities/path_retrieval.cu + src/utilities/graph_bcast.cu src/structure/graph.cu src/linear_assignment/hungarian.cu src/link_analysis/gunrock_hits.cpp @@ -425,7 +185,10 @@ add_library(cugraph SHARED src/components/connectivity.cu src/centrality/katz_centrality.cu src/centrality/betweenness_centrality.cu - src/experimental/generate_rmat_edgelist.cu + src/generators/generate_rmat_edgelist.cu + src/generators/generator_tools.cu + src/generators/simple_generators.cu + src/generators/erdos_renyi_generator.cu src/experimental/graph.cu src/experimental/graph_view.cu src/experimental/coarsen_graph.cu @@ -437,54 +200,75 @@ add_library(cugraph SHARED src/experimental/sssp.cu src/experimental/pagerank.cu src/experimental/katz_centrality.cu + src/serialization/serializer.cu src/tree/mst.cu + src/components/weakly_connected_components.cu + src/structure/create_graph_from_edgelist.cu src/utilities/host_barrier.cpp ) -target_link_directories(cugraph - PRIVATE - # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the - # link directories for nvcc. - "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") +set_target_properties(cugraph + PROPERTIES BUILD_RPATH "\$ORIGIN" + INSTALL_RPATH "\$ORIGIN" + # set target compile options + CXX_STANDARD 17 + CXX_STANDARD_REQUIRED ON + CUDA_STANDARD 17 + CUDA_STANDARD_REQUIRED ON + INTERFACE_POSITION_INDEPENDENT_CODE ON +) -# -# NOTE: This dependency will force the building of cugraph to -# wait until after cugunrock is constructed. -# -add_dependencies(cugraph gunrock_ext) +target_compile_options(cugraph + PRIVATE "$<$:${CUGRAPH_CXX_FLAGS}>" + "$<$:${CUGRAPH_CUDA_FLAGS}>" +) # Per-thread default stream option see https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html # The per-thread default stream does not synchronize with other streams target_compile_definitions(cugraph PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM) +file(WRITE "${CUGRAPH_BINARY_DIR}/fatbin.ld" +[=[ +SECTIONS +{ + .nvFatBinSegment : { *(.nvFatBinSegment) } + .nv_fatbin : { *(.nv_fatbin) } +} +]=]) +target_link_options(cugraph PRIVATE "${CUGRAPH_BINARY_DIR}/fatbin.ld") + +add_library(cugraph::cugraph ALIAS cugraph) + ################################################################################################### # - include paths --------------------------------------------------------------------------------- target_include_directories(cugraph PRIVATE - "${THRUST_INCLUDE_DIR}" - "${CUCO_INCLUDE_DIR}" - "${LIBCUDACXX_INCLUDE_DIR}" - "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${Boost_INCLUDE_DIRS}" - "${RMM_INCLUDE}" - "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty" - "${CUHORNET_INCLUDE_DIR}/hornet/include" - "${CUHORNET_INCLUDE_DIR}/hornetsnest/include" - "${CUHORNET_INCLUDE_DIR}/xlib/include" - "${CUHORNET_INCLUDE_DIR}/primitives" - "${CMAKE_CURRENT_SOURCE_DIR}/src" - "${GUNROCK_INCLUDE_DIR}" - "${NCCL_INCLUDE_DIRS}" - "${RAFT_DIR}/cpp/include" + "${CMAKE_CURRENT_SOURCE_DIR}/../thirdparty" + "${CMAKE_CURRENT_SOURCE_DIR}/src" + "${NCCL_INCLUDE_DIRS}" PUBLIC - "${CMAKE_CURRENT_SOURCE_DIR}/include" + "$" + "$" ) ################################################################################################### # - link libraries -------------------------------------------------------------------------------- - -target_link_libraries(cugraph PRIVATE - gunrock cublas cusparse curand cusolver cudart cuda FAISS::FAISS ${NCCL_LIBRARIES}) +target_link_libraries(cugraph + PUBLIC + rmm::rmm + cugraph::Thrust + raft::raft + PRIVATE + cuco::cuco + CUDA::cublas + CUDA::curand + CUDA::cusolver + CUDA::cusparse + cugraph::cuHornet + FAISS::FAISS + gunrock + NCCL::NCCL +) if(OpenMP_CXX_FOUND) target_link_libraries(cugraph PRIVATE @@ -537,49 +321,74 @@ target_link_libraries(cugraph PRIVATE ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) -# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the -# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent cmake -# warnings about policy CMP0104. With this setting, arch flags must be manually -# set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism used in -# cpp/CMakeLists.txt for setting arch options). Run "cmake --help-policy -# CMP0104" for policy details. -# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to the -# findcudatoolkit features in cmake 3.17+ -set_target_properties(cugraph PROPERTIES - CUDA_ARCHITECTURES OFF) ################################################################################################### # - generate tests -------------------------------------------------------------------------------- if(BUILD_TESTS) - if(GTEST_FOUND) - add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests) - endif(GTEST_FOUND) + include(CTest) + add_subdirectory(tests) endif(BUILD_TESTS) ################################################################################################### # - install targets ------------------------------------------------------------------------------- -install(TARGETS cugraph LIBRARY - DESTINATION lib) +install(TARGETS cugraph + DESTINATION lib + EXPORT cugraph-exports) install(DIRECTORY include/ - DESTINATION include/cugraph) + DESTINATION include) install(FILES ${CMAKE_CURRENT_BINARY_DIR}/include/cugraph/version_config.hpp DESTINATION include/cugraph) -install(DIRECTORY ${RAFT_DIR}/cpp/include/raft/ - DESTINATION include/cugraph/raft) +################################################################################################ +# - install export ------------------------------------------------------------------------------- +set(doc_string +[=[ +Provide targets for cuGraph. + +cuGraph library is a collection of GPU accelerated graph algorithms that process data found in +[GPU DataFrames](https://github.com/rapidsai/cudf). + +]=]) + +set(code_string +[=[ +thrust_create_target(cugraph::Thrust FROM_OPTIONS) +]=]) + + rapids_export(INSTALL cugraph + EXPORT_SET cugraph-exports + GLOBAL_TARGETS cugraph + NAMESPACE cugraph:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string + ) + +################################################################################################ +# - build export ------------------------------------------------------------------------------- +rapids_export(BUILD cugraph + EXPORT_SET cugraph-exports + GLOBAL_TARGETS cugraph + NAMESPACE cugraph:: + DOCUMENTATION doc_string + FINAL_CODE_BLOCK code_string + ) + ################################################################################################### # - make documentation ---------------------------------------------------------------------------- # requires doxygen and graphviz to be installed # from build directory, run make docs_cugraph -# doc targets for cuGraph -add_custom_command(OUTPUT CUGRAPH_DOXYGEN - WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doxygen - COMMAND doxygen Doxyfile - VERBATIM) +# doc targets for cugraph +find_package(Doxygen 1.8.11) +if(Doxygen_FOUND) + add_custom_command(OUTPUT CUGRAPH_DOXYGEN + WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/doxygen + COMMAND doxygen Doxyfile + VERBATIM) -add_custom_target(docs_cugraph DEPENDS CUGRAPH_DOXYGEN) + add_custom_target(docs_cugraph DEPENDS CUGRAPH_DOXYGEN) +endif() diff --git a/cpp/cmake/EvalGpuArchs.cmake b/cpp/cmake/EvalGpuArchs.cmake deleted file mode 100644 index f3918542db9..00000000000 --- a/cpp/cmake/EvalGpuArchs.cmake +++ /dev/null @@ -1,68 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -function(evaluate_gpu_archs gpu_archs) - set(eval_file ${PROJECT_BINARY_DIR}/eval_gpu_archs.cu) - set(eval_exe ${PROJECT_BINARY_DIR}/eval_gpu_archs) - file(WRITE ${eval_file} - " -#include -#include -#include -using namespace std; -int main(int argc, char** argv) { - set archs; - int nDevices; - if((cudaGetDeviceCount(&nDevices) == cudaSuccess) && (nDevices > 0)) { - for(int dev=0;dev::const_iterator itr=archs.begin();itr!=archs.end();++itr) { - printf(first? \"%s\" : \";%s\", itr->c_str()); - first = false; - } - } - printf(\"\\n\"); - return 0; -} -") - execute_process( - COMMAND ${CUDA_NVCC_EXECUTABLE} - -o ${eval_exe} - --run - ${eval_file} - OUTPUT_VARIABLE __gpu_archs - OUTPUT_STRIP_TRAILING_WHITESPACE) - set(__gpu_archs_filtered "${__gpu_archs}") - foreach(arch ${__gpu_archs}) - if (arch VERSION_LESS 60) - list(REMOVE_ITEM __gpu_archs_filtered ${arch}) - endif() - endforeach() - if (NOT __gpu_archs_filtered) - message(FATAL_ERROR "No supported GPU arch found on this system") - endif() - message("Auto detection of gpu-archs: ${__gpu_archs_filtered}") - set(${gpu_archs} ${__gpu_archs_filtered} PARENT_SCOPE) -endfunction(evaluate_gpu_archs) diff --git a/cpp/cmake/Modules/FindFAISS.cmake b/cpp/cmake/Modules/FindFAISS.cmake deleted file mode 100644 index 7c456edfeef..00000000000 --- a/cpp/cmake/Modules/FindFAISS.cmake +++ /dev/null @@ -1,98 +0,0 @@ -# Copyright (c) 2021, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Based on FindPNG.cmake from cmake 3.14.3 - -#[=======================================================================[.rst: -FindFAISS --------- - -Template to generate FindPKG_NAME.cmake CMake modules - -Find FAISS - -Imported targets -^^^^^^^^^^^^^^^^ - -This module defines the following :prop_tgt:`IMPORTED` target: - -``FAISS::FAISS`` - The libFAISS library, if found. - -Result variables -^^^^^^^^^^^^^^^^ - -This module will set the following variables in your project: - -``FAISS_INCLUDE_DIRS`` - where to find FAISS.hpp , etc. -``FAISS_LIBRARIES`` - the libraries to link against to use libFAISS. -``FAISS_FOUND`` - If false, do not try to use FAISS. -``FAISS_VERSION_STRING`` - the version of the FAISS library found - -#]=======================================================================] - -find_path(FAISS_LOCATION faiss/IndexFlat.h - HINTS ${FAISS_INSTALL_DIR} - PATH_SUFFIXES include include/) - -list(APPEND FAISS_NAMES faiss libfaiss) -set(_FAISS_VERSION_SUFFIXES ) - -foreach(v IN LISTS _FAISS_VERSION_SUFFIXES) - list(APPEND FAISS_NAMES faiss${v} libfaiss${v}) - list(APPEND FAISS_NAMES faiss.${v} libfaiss.${v}) -endforeach() -unset(_FAISS_VERSION_SUFFIXES) - -find_library(FAISS_LIBRARY_RELEASE NAMES ${FAISS_NAMES} - HINTS ${FAISS_INSTALL_DIR} - PATH_SUFFIXES lib) - -include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake) -select_library_configurations(FAISS) -mark_as_advanced(FAISS_LIBRARY_RELEASE) -unset(FAISS_NAMES) - -# Set by select_library_configurations(), but we want the one from -# find_package_handle_standard_args() below. -unset(FAISS_FOUND) - -if (FAISS_LIBRARY AND FAISS_LOCATION) - set(FAISS_INCLUDE_DIRS ${FAISS_LOCATION} ) - set(FAISS_LIBRARY ${FAISS_LIBRARY}) - - if(NOT TARGET FAISS::FAISS) - add_library(FAISS::FAISS UNKNOWN IMPORTED) - set_target_properties(FAISS::FAISS PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIRS}") - if(EXISTS "${FAISS_LIBRARY}") - set_target_properties(FAISS::FAISS PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" - IMPORTED_LOCATION "${FAISS_LIBRARY}") - endif() - endif() -endif () - - -include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) -find_package_handle_standard_args(FAISS - REQUIRED_VARS FAISS_LIBRARY FAISS_LOCATION - VERSION_VAR FAISS_VERSION_STRING) - -mark_as_advanced(FAISS_LOCATION FAISS_LIBRARY) diff --git a/cpp/cmake/Modules/FindNCCL.cmake b/cpp/cmake/Modules/FindNCCL.cmake deleted file mode 100644 index 0f673707444..00000000000 --- a/cpp/cmake/Modules/FindNCCL.cmake +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -# Based on FindPNG.cmake from cmake 3.14.3 - -#[=======================================================================[.rst: -FindNCCL --------- - -Find libnccl, the NVIDIA Collective Communication Library. A hint to find NCCL -can be provided by setting NCCL_INSTALL_DIR. - -Imported targets -^^^^^^^^^^^^^^^^ - -This module defines the following :prop_tgt:`IMPORTED` target: - -``NCCL::NCCL`` - The libnccl library, if found. - -Result variables -^^^^^^^^^^^^^^^^ - -This module will set the following variables in your project: - -``NCCL_INCLUDE_DIRS`` - where to find nccl.h , etc. -``NCCL_LIBRARIES`` - the libraries to link against to use NCCL. -``NCCL_FOUND`` - If false, do not try to use NCCL. -``NCCL_VERSION_STRING`` - the version of the NCCL library found - -#]=======================================================================] - -find_path(NCCL_NCCL_INCLUDE_DIR nccl.h HINTS ${NCCL_INSTALL_DIR} PATH_SUFFIXES include) - -#TODO: Does this need to support finding the static library? - -list(APPEND NCCL_NAMES nccl libnccl) -set(_NCCL_VERSION_SUFFIXES 2) - -foreach(v IN LISTS _NCCL_VERSION_SUFFIXES) - list(APPEND NCCL_NAMES nccl${v} libnccl${v}) -endforeach() -unset(_NCCL_VERSION_SUFFIXES) -# For compatibility with versions prior to this multi-config search, honor -# any NCCL_LIBRARY that is already specified and skip the search. -if(NOT NCCL_LIBRARY) - find_library(NCCL_LIBRARY_RELEASE NAMES ${NCCL_NAMES} HINTS ${NCCL_INSTALL_DIR} PATH_SUFFIXES lib) - include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake) - select_library_configurations(NCCL) - mark_as_advanced(NCCL_LIBRARY_RELEASE) -endif() -unset(NCCL_NAMES) - -# Set by select_library_configurations(), but we want the one from -# find_package_handle_standard_args() below. -unset(NCCL_FOUND) - -if (NCCL_LIBRARY AND NCCL_NCCL_INCLUDE_DIR) - set(NCCL_INCLUDE_DIRS ${NCCL_NCCL_INCLUDE_DIR} ) - set(NCCL_LIBRARY ${NCCL_LIBRARY}) - - if(NOT TARGET NCCL::NCCL) - add_library(NCCL::NCCL UNKNOWN IMPORTED) - set_target_properties(NCCL::NCCL PROPERTIES - INTERFACE_INCLUDE_DIRECTORIES "${NCCL_INCLUDE_DIRS}") - if(EXISTS "${NCCL_LIBRARY}") - set_target_properties(NCCL::NCCL PROPERTIES - IMPORTED_LINK_INTERFACE_LANGUAGES "C" - IMPORTED_LOCATION "${NCCL_LIBRARY}") - endif() - endif() -endif () - -if (NCCL_NCCL_INCLUDE_DIR AND EXISTS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h") - file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_major_version_str REGEX "^#define[ \t]+NCCL_MAJOR[ \t]+[0-9]+") - string(REGEX REPLACE "^#define[ \t]+NCCL_MAJOR[ \t]+([0-9]+)" "\\1" nccl_major_version_str "${nccl_major_version_str}") - - file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_minor_version_str REGEX "^#define[ \t]+NCCL_MINOR[ \t]+[0-9]+") - string(REGEX REPLACE "^#define[ \t]+NCCL_MINOR[ \t]+([0-9]+)" "\\1" nccl_minor_version_str "${nccl_minor_version_str}") - - file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_patch_version_str REGEX "^#define[ \t]+NCCL_PATCH[ \t]+[0-9]+") - string(REGEX REPLACE "^#define[ \t]+NCCL_PATCH[ \t]+([0-9]+)" "\\1" nccl_patch_version_str "${nccl_patch_version_str}") - - file(STRINGS "${NCCL_NCCL_INCLUDE_DIR}/nccl.h" nccl_suffix_version_str REGEX "^#define[ \t]+NCCL_SUFFIX[ \t]+\".*\"") - string(REGEX REPLACE "^#define[ \t]+NCCL_SUFFIX[ \t]+\"(.*)\"" "\\1" nccl_suffix_version_str "${nccl_suffix_version_str}") - - set(NCCL_VERSION_STRING "${nccl_major_version_str}.${nccl_minor_version_str}.${nccl_patch_version_str}${nccl_suffix_version_str}") - - unset(nccl_major_version_str) - unset(nccl_minor_version_str) - unset(nccl_patch_version_str) - unset(nccl_suffix_version_str) -endif () - -include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) -find_package_handle_standard_args(NCCL - REQUIRED_VARS NCCL_LIBRARY NCCL_NCCL_INCLUDE_DIR - VERSION_VAR NCCL_VERSION_STRING) - -mark_as_advanced(NCCL_NCCL_INCLUDE_DIR NCCL_LIBRARY) diff --git a/cpp/cmake/faiss_cuda11.patch b/cpp/cmake/faiss_cuda11.patch deleted file mode 100644 index 496ca0e7b23..00000000000 --- a/cpp/cmake/faiss_cuda11.patch +++ /dev/null @@ -1,40 +0,0 @@ -diff --git a/configure b/configure -index ed40dae..f88ed0a 100755 ---- a/configure -+++ b/configure -@@ -2970,7 +2970,7 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex - ac_compiler_gnu=$ac_cv_cxx_compiler_gnu - - -- ax_cxx_compile_alternatives="11 0x" ax_cxx_compile_cxx11_required=true -+ ax_cxx_compile_alternatives="14 11 0x" ax_cxx_compile_cxx11_required=true - ac_ext=cpp - ac_cpp='$CXXCPP $CPPFLAGS' - ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' -diff --git a/gpu/utils/DeviceDefs.cuh b/gpu/utils/DeviceDefs.cuh -index 89d3dda..bc0f9b5 100644 ---- a/gpu/utils/DeviceDefs.cuh -+++ b/gpu/utils/DeviceDefs.cuh -@@ -13,7 +13,7 @@ - namespace faiss { namespace gpu { - - #ifdef __CUDA_ARCH__ --#if __CUDA_ARCH__ <= 750 -+#if __CUDA_ARCH__ <= 800 - constexpr int kWarpSize = 32; - #else - #error Unknown __CUDA_ARCH__; please define parameters for compute capability -diff --git a/gpu/utils/MatrixMult-inl.cuh b/gpu/utils/MatrixMult-inl.cuh -index ede225e..4f7eb44 100644 ---- a/gpu/utils/MatrixMult-inl.cuh -+++ b/gpu/utils/MatrixMult-inl.cuh -@@ -51,6 +51,9 @@ rawGemm(cublasHandle_t handle, - auto cBT = GetCudaType::Type; - - // Always accumulate in f32 -+# if __CUDACC_VER_MAJOR__ >= 11 -+ cublasSetMathMode(handle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION); -+# endif - return cublasSgemmEx(handle, transa, transb, m, n, k, - &fAlpha, A, cAT, lda, - B, cBT, ldb, diff --git a/cpp/cmake/thirdparty/get_cuco.cmake b/cpp/cmake/thirdparty/get_cuco.cmake new file mode 100644 index 00000000000..b9542a42f26 --- /dev/null +++ b/cpp/cmake/thirdparty/get_cuco.cmake @@ -0,0 +1,35 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_cuco VERSION) + + rapids_cpm_find(cuco ${VERSION} + GLOBAL_TARGETS cuco cuco::cuco + CPM_ARGS + GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git + GIT_TAG 0b672bbde7c85a79df4d7ca5f82e15e5b4a57700 + OPTIONS "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF" + "BUILD_EXAMPLES OFF" + ) + + if(NOT TARGET cuco::cuco) + add_library(cuco::cuco ALIAS cuco) + endif() + +endfunction() + +find_and_configure_cuco(0.0.1) diff --git a/cpp/cmake/thirdparty/get_cuhornet.cmake b/cpp/cmake/thirdparty/get_cuhornet.cmake new file mode 100644 index 00000000000..28c83161ff4 --- /dev/null +++ b/cpp/cmake/thirdparty/get_cuhornet.cmake @@ -0,0 +1,45 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_cuhornet) + + # We are not using the cuhornet CMake targets, so no need to call `add_subdirectory()`, + # or to use CPM + FetchContent_Declare( + cuhornet + GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git + GIT_TAG 261399356e62bd76fa7628880f1a847aee713eed + SOURCE_SUBDIR hornet + ) + FetchContent_GetProperties(cuhornet) + + if(NOT cuhornet_POPULATED) + FetchContent_Populate(cuhornet) + endif() + + if(NOT TARGET cugraph::cuHornet) + add_library(cugraph::cuHornet IMPORTED INTERFACE GLOBAL) + target_include_directories(cugraph::cuHornet INTERFACE + "${cuhornet_SOURCE_DIR}/hornet/include" + "${cuhornet_SOURCE_DIR}/hornetsnest/include" + "${cuhornet_SOURCE_DIR}/xlib/include" + "${cuhornet_SOURCE_DIR}/primitives" + ) + endif() +endfunction() + + +find_and_configure_cuhornet() diff --git a/cpp/cmake/thirdparty/get_faiss.cmake b/cpp/cmake/thirdparty/get_faiss.cmake new file mode 100644 index 00000000000..4991990e379 --- /dev/null +++ b/cpp/cmake/thirdparty/get_faiss.cmake @@ -0,0 +1,50 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_faiss) + set(oneValueArgs VERSION PINNED_TAG) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + rapids_find_generate_module(FAISS + HEADER_NAMES faiss/IndexFlat.h + LIBRARY_NAMES faiss + ) + + rapids_cpm_find(FAISS ${PKG_VERSION} + GLOBAL_TARGETS faiss + CPM_ARGS + GIT_REPOSITORY https://github.com/facebookresearch/faiss.git + GIT_TAG ${PKG_PINNED_TAG} + OPTIONS + "FAISS_ENABLE_PYTHON OFF" + "BUILD_SHARED_LIBS OFF" + "CUDAToolkit_ROOT ${CUDAToolkit_LIBRARY_DIR}" + "FAISS_ENABLE_GPU ON" + "BUILD_TESTING OFF" + "CMAKE_MESSAGE_LOG_LEVEL VERBOSE" + ) + + if(FAISS_ADDED) + set(FAISS_GPU_HEADERS ${FAISS_SOURCE_DIR} PARENT_SCOPE) + add_library(FAISS::FAISS ALIAS faiss) + endif() + +endfunction() + +find_and_configure_faiss(VERSION 1.7.0 + PINNED_TAG bde7c0027191f29c9dadafe4f6e68ca0ee31fb30 + ) diff --git a/cpp/cmake/thirdparty/get_gtest.cmake b/cpp/cmake/thirdparty/get_gtest.cmake new file mode 100644 index 00000000000..e413cad7601 --- /dev/null +++ b/cpp/cmake/thirdparty/get_gtest.cmake @@ -0,0 +1,43 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_gtest VERSION) + + if(TARGET GTest::gtest) + return() + endif() + + rapids_cpm_find(GTest ${VERSION} + GLOBAL_TARGETS gmock gmock_main gtest gtest_main GTest::gmock GTest::gtest GTest::gtest_main + CPM_ARGS + GIT_REPOSITORY https://github.com/google/googletest.git + GIT_TAG release-${VERSION} + GIT_SHALLOW TRUE + OPTIONS "INSTALL_GTEST ON" + # googletest >= 1.10.0 provides a cmake config file -- use it if it exists + FIND_PACKAGE_ARGUMENTS "CONFIG" + ) + + if(NOT TARGET GTest::gtest) + add_library(GTest::gmock ALIAS gmock) + add_library(GTest::gmock_main ALIAS gmock_main) + add_library(GTest::gtest ALIAS gtest) + add_library(GTest::gtest_main ALIAS gtest_main) + endif() + +endfunction() + +find_and_configure_gtest(1.10.0) diff --git a/cpp/cmake/thirdparty/get_gunrock.cmake b/cpp/cmake/thirdparty/get_gunrock.cmake new file mode 100644 index 00000000000..056cd4bd5ea --- /dev/null +++ b/cpp/cmake/thirdparty/get_gunrock.cmake @@ -0,0 +1,64 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_gunrock VERSION) + + if(NOT TARGET gunrock) + set(GUNROCK_GENCODE_SM60 OFF) + set(GUNROCK_GENCODE_SM61 OFF) + set(GUNROCK_GENCODE_SM70 OFF) + set(GUNROCK_GENCODE_SM72 OFF) + set(GUNROCK_GENCODE_SM75 OFF) + set(GUNROCK_GENCODE_SM80 OFF) + + foreach(arch IN LISTS CMAKE_CUDA_ARCHITECTURES) + string(REPLACE "-real" "" arch ${arch}) + set(GUNROCK_GENCODE_SM${arch} "ON") + endforeach() + + # FIXME: gunrock is still using ExternalProject instead of CPM, as version 1.2 + # doesn't work with CPM + + include(ExternalProject) + + set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock) + ExternalProject_Add(gunrock_ext + GIT_REPOSITORY https://github.com/gunrock/gunrock.git + GIT_TAG v${VERSION} + PREFIX ${GUNROCK_DIR} + CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= + -DGUNROCK_BUILD_SHARED_LIBS=OFF + -DGUNROCK_BUILD_TESTS=OFF + -DCUDA_AUTODETECT_GENCODE=OFF + -DGUNROCK_GENCODE_SM60=${GUNROCK_GENCODE_SM60} + -DGUNROCK_GENCODE_SM61=${GUNROCK_GENCODE_SM61} + -DGUNROCK_GENCODE_SM70=${GUNROCK_GENCODE_SM70} + -DGUNROCK_GENCODE_SM72=${GUNROCK_GENCODE_SM72} + -DGUNROCK_GENCODE_SM75=${GUNROCK_GENCODE_SM75} + -DGUNROCK_GENCODE_SM80=${GUNROCK_GENCODE_SM80} + BUILD_BYPRODUCTS ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a + INSTALL_COMMAND "" + ) + + add_library(gunrock STATIC IMPORTED) + add_dependencies(gunrock gunrock_ext) + set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION "${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a") + target_include_directories(gunrock INTERFACE "${GUNROCK_DIR}/src/gunrock_ext") + endif() +endfunction() + + +find_and_configure_gunrock(1.2) diff --git a/cpp/cmake/thirdparty/get_nccl.cmake b/cpp/cmake/thirdparty/get_nccl.cmake new file mode 100644 index 00000000000..30ec976f27c --- /dev/null +++ b/cpp/cmake/thirdparty/get_nccl.cmake @@ -0,0 +1,42 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_nccl) + + if(TARGET NCCL::NCCL) + return() + endif() + + set(oneValueArgs VERSION PINNED_TAG) + cmake_parse_arguments(PKG "${options}" "${oneValueArgs}" + "${multiValueArgs}" ${ARGN} ) + + rapids_find_generate_module(NCCL + HEADER_NAMES nccl.h + LIBRARY_NAMES nccl + ) + + # Currently NCCL has no CMake build-system so we require + # it built and installed on the machine already + rapids_find_package(NCCL REQUIRED) + +endfunction() + +find_and_configure_nccl() + + + + diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake new file mode 100644 index 00000000000..d8c9358e023 --- /dev/null +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -0,0 +1,48 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_raft) + + set(oneValueArgs VERSION FORK PINNED_TAG) + cmake_parse_arguments(PKG "" "${oneValueArgs}" "" ${ARGN} ) + + rapids_cpm_find(raft ${PKG_VERSION} + GLOBAL_TARGETS raft::raft + BUILD_EXPORT_SET cugraph-exports + INSTALL_EXPORT_SET cugraph-exports + CPM_ARGS + GIT_REPOSITORY https://github.com/${PKG_FORK}/raft.git + GIT_TAG ${PKG_PINNED_TAG} + SOURCE_SUBDIR cpp + OPTIONS "BUILD_TESTS OFF" + ) + + message(VERBOSE "CUML: Using RAFT located in ${raft_SOURCE_DIR}") + +endfunction() + +set(CUGRAPH_MIN_VERSION_raft "${CUGRAPH_VERSION_MAJOR}.${CUGRAPH_VERSION_MINOR}.00") +set(CUGRAPH_BRANCH_VERSION_raft "${CUGRAPH_VERSION_MAJOR}.${CUGRAPH_VERSION_MINOR}") + + +# Change pinned tag and fork here to test a commit in CI +# To use a different RAFT locally, set the CMake variable +# RPM_raft_SOURCE=/path/to/local/raft +find_and_configure_raft(VERSION ${CUGRAPH_MIN_VERSION_raft} + FORK rapidsai + PINNED_TAG branch-${CUGRAPH_BRANCH_VERSION_raft} + ) + diff --git a/cpp/cmake/thirdparty/get_rmm.cmake b/cpp/cmake/thirdparty/get_rmm.cmake new file mode 100644 index 00000000000..aecb6489f92 --- /dev/null +++ b/cpp/cmake/thirdparty/get_rmm.cmake @@ -0,0 +1,47 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_rmm VERSION) + + if(${VERSION} MATCHES [=[([0-9]+)\.([0-9]+)\.([0-9]+)]=]) + set(MAJOR_AND_MINOR "${CMAKE_MATCH_1}.${CMAKE_MATCH_2}") + else() + set(MAJOR_AND_MINOR "${VERSION}") + endif() + + if(TARGET rmm::rmm) + return() + endif() + + rapids_cpm_find(rmm ${VERSION} + GLOBAL_TARGETS rmm::rmm + BUILD_EXPORT_SET cugraph-exports + INSTALL_EXPORT_SET cugraph-exports + CPM_ARGS + GIT_REPOSITORY https://github.com/rapidsai/rmm.git + GIT_TAG branch-${MAJOR_AND_MINOR} + GIT_SHALLOW TRUE + OPTIONS "BUILD_TESTS OFF" + "BUILD_BENCHMARKS OFF" + "CUDA_STATIC_RUNTIME ${CUDA_STATIC_RUNTIME}" + "DISABLE_DEPRECATION_WARNING ${DISABLE_DEPRECATION_WARNING}" + ) + +endfunction() + +set(CUGRAPH_MIN_VERSION_rmm "${CUGRAPH_VERSION_MAJOR}.${CUGRAPH_VERSION_MINOR}.00") + +find_and_configure_rmm(${CUGRAPH_MIN_VERSION_rmm}) diff --git a/cpp/cmake/thirdparty/get_thrust.cmake b/cpp/cmake/thirdparty/get_thrust.cmake new file mode 100644 index 00000000000..86fcffed5d2 --- /dev/null +++ b/cpp/cmake/thirdparty/get_thrust.cmake @@ -0,0 +1,29 @@ +#============================================================================= +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_thrust VERSION) + + rapids_cpm_find(Thrust ${VERSION} + CPM_ARGS + GIT_REPOSITORY https://github.com/thrust/thrust.git + GIT_TAG ${VERSION} + ) + + thrust_create_target(cugraph::Thrust FROM_OPTIONS) + +endfunction() + +find_and_configure_thrust(1.12.0) diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md index ba24d68aca5..b369183a262 100644 --- a/cpp/docs/DEVELOPER_GUIDE.md +++ b/cpp/docs/DEVELOPER_GUIDE.md @@ -146,9 +146,9 @@ Allocates a specified number of bytes of untyped, uninitialized device memory us `device_memory_resource`. If no resource is explicitly provided, uses `rmm::mr::get_current_device_resource()`. -`rmm::device_buffer` is copyable and movable. A copy performs a deep copy of the `device_buffer`'s -device memory, whereas a move moves ownership of the device memory from one `device_buffer` to -another. +`rmm::device_buffer` is movable and copyable on a stream. A copy performs a deep copy of the +`device_buffer`'s device memory on the specified stream, whereas a move moves ownership of the +device memory from one `device_buffer` to another. ```c++ // Allocates at least 100 bytes of uninitialized device memory @@ -156,17 +156,21 @@ another. rmm::device_buffer buff(100, stream, mr); void * raw_data = buff.data(); // Raw pointer to underlying device memory -rmm::device_buffer copy(buff); // Deep copies `buff` into `copy` -rmm::device_buffer moved_to(std::move(buff)); // Moves contents of `buff` into `moved_to` +// Deep copies `buff` into `copy` on `stream` +rmm::device_buffer copy(buff, stream); + +// Moves contents of `buff` into `moved_to` +rmm::device_buffer moved_to(std::move(buff)); custom_memory_resource *mr...; -rmm::device_buffer custom_buff(100, mr); // Allocates 100 bytes from the custom_memory_resource +// Allocates 100 bytes from the custom_memory_resource +rmm::device_buffer custom_buff(100, mr, stream); ``` #### `rmm::device_uvector` -Similar to a `rmm::device_vector`, allocates a contiguous set of elements in device memory but with key -differences: +Similar to a `rmm::device_vector`, allocates a contiguous set of elements in device memory but with +key differences: - As an optimization, elements are uninitialized and no synchronization occurs at construction. This limits the types `T` to trivially copyable types. - All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream diff --git a/cpp/include/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp similarity index 96% rename from cpp/include/algorithms.hpp rename to cpp/include/cugraph/algorithms.hpp index 9f1cb02df0c..0b0dd88ce29 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -15,12 +15,12 @@ */ #pragma once -#include -#include -#include +#include +#include +#include -#include -#include +#include +#include #include @@ -142,6 +142,8 @@ void overlap_list(GraphCSRView const &graph, * @tparam weight_t Type of edge weights. Supported values : float * or double. * + * @param[in] handle Library handle (RAFT). If a communicator is set in the + * handle, the multi GPU version will be selected. * @param[in] graph cuGraph graph descriptor, should contain the * connectivity information as a COO. Graph is considered undirected. Edge weights are used for this * algorithm and set to 1 by default. @@ -178,7 +180,8 @@ void overlap_list(GraphCSRView const &graph, * */ template -void force_atlas2(GraphCOOView &graph, +void force_atlas2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, @@ -1066,6 +1069,8 @@ namespace experimental { * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. @@ -1104,6 +1109,8 @@ void bfs(raft::handle_t const &handle, * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. @@ -1136,6 +1143,8 @@ void sssp(raft::handle_t const &handle, * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam result_t Type of PageRank scores. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. @@ -1188,6 +1197,8 @@ void pagerank(raft::handle_t const &handle, * @tparam edge_t Type of edge identifiers. Needs to be an integral type. * @tparam weight_t Type of edge weights. Needs to be a floating point type. * @tparam result_t Type of Katz Centrality scores. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. @@ -1290,5 +1301,28 @@ random_walks(raft::handle_t const &handle, index_t max_depth, bool use_padding = false); +/** + * @brief Finds (weakly-connected-)component IDs of each vertices in the input graph. + * + * The input graph must be symmetric. Component IDs can be arbitrary integers (they can be + * non-consecutive and are not ordered by component size or any other criterion). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * @param graph_view Graph view object. + * @param components Pointer to the output component ID array. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t *components, + bool do_expensive_check = false); + } // namespace experimental -} // namespace cugraph +} // namespace cugraph \ No newline at end of file diff --git a/cpp/include/compute_partition.cuh b/cpp/include/cugraph/compute_partition.cuh similarity index 99% rename from cpp/include/compute_partition.cuh rename to cpp/include/cugraph/compute_partition.cuh index 5c03b0971f2..6405d239adc 100644 --- a/cpp/include/compute_partition.cuh +++ b/cpp/include/cugraph/compute_partition.cuh @@ -17,7 +17,7 @@ #include -#include +#include #include diff --git a/cpp/include/dendrogram.hpp b/cpp/include/cugraph/dendrogram.hpp similarity index 100% rename from cpp/include/dendrogram.hpp rename to cpp/include/cugraph/dendrogram.hpp diff --git a/cpp/include/eidecl_graph.hpp b/cpp/include/cugraph/eidecl_graph.hpp similarity index 99% rename from cpp/include/eidecl_graph.hpp rename to cpp/include/cugraph/eidecl_graph.hpp index 03f6a675597..3e3d9ac5b31 100644 --- a/cpp/include/eidecl_graph.hpp +++ b/cpp/include/cugraph/eidecl_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/eidir_graph.hpp b/cpp/include/cugraph/eidir_graph.hpp similarity index 98% rename from cpp/include/eidir_graph.hpp rename to cpp/include/cugraph/eidir_graph.hpp index d7273b9ea37..5bd6c233641 100644 --- a/cpp/include/eidir_graph.hpp +++ b/cpp/include/cugraph/eidir_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/cugraph/experimental/detail/graph_utils.cuh similarity index 97% rename from cpp/include/experimental/detail/graph_utils.cuh rename to cpp/include/cugraph/experimental/detail/graph_utils.cuh index d79788e59ce..e9f86eb9d62 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/cugraph/experimental/detail/graph_utils.cuh @@ -15,10 +15,10 @@ */ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/experimental/eidecl_graph.hpp b/cpp/include/cugraph/experimental/eidecl_graph.hpp similarity index 99% rename from cpp/include/experimental/eidecl_graph.hpp rename to cpp/include/cugraph/experimental/eidecl_graph.hpp index b8ac201008a..18e617c0993 100644 --- a/cpp/include/experimental/eidecl_graph.hpp +++ b/cpp/include/cugraph/experimental/eidecl_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/eidir_graph.hpp b/cpp/include/cugraph/experimental/eidir_graph.hpp similarity index 98% rename from cpp/include/experimental/eidir_graph.hpp rename to cpp/include/cugraph/experimental/eidir_graph.hpp index 8998943ec16..93aa333dc5b 100644 --- a/cpp/include/experimental/eidir_graph.hpp +++ b/cpp/include/cugraph/experimental/eidir_graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/cugraph/experimental/graph.hpp similarity index 88% rename from cpp/include/experimental/graph.hpp rename to cpp/include/cugraph/experimental/graph.hpp index 27f766b8593..1c829016516 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/cugraph/experimental/graph.hpp @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include #include @@ -157,6 +157,27 @@ class graph_t &&offsets, + rmm::device_uvector &&indices, + rmm::device_uvector &&weights, + std::vector &&segment_offsets) + : detail::graph_base_t( + handle, number_of_vertices, number_of_edges, properties), + offsets_(std::move(offsets)), + indices_(std::move(indices)), + weights_(std::move(weights)), + segment_offsets_(std::move(segment_offsets)) + { + } + rmm::device_uvector offsets_; rmm::device_uvector indices_; rmm::device_uvector weights_; @@ -189,6 +210,10 @@ template struct invalid_edge_id : invalid_idx { }; +template +struct invalid_component_id : invalid_idx { +}; + template __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( vertex_t num_vertices, vertex_t v) diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/cugraph/experimental/graph_functions.hpp similarity index 76% rename from cpp/include/experimental/graph_functions.hpp rename to cpp/include/cugraph/experimental/graph_functions.hpp index b48dc6da136..8f5dbb1138b 100644 --- a/cpp/include/experimental/graph_functions.hpp +++ b/cpp/include/cugraph/experimental/graph_functions.hpp @@ -15,13 +15,14 @@ */ #pragma once -#include -#include +#include +#include #include #include #include +#include #include #include @@ -31,8 +32,8 @@ namespace experimental { /** * @brief renumber edgelist (multi-GPU) * - * This function assumes that edges are pre-shuffled to their target processes using the - * compute_gpu_id_from_edge_t functor. + * This function assumes that vertices and edges are pre-shuffled to their target processes using + * the compute_gpu_id_from_vertex_t & compute_gpu_id_from_edge_t functors, respectively. * * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. * @tparam edge_t Type of edge identifiers. Needs to be an integral type. @@ -40,6 +41,11 @@ namespace experimental { * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. + * @param optional_local_vertex_span If valid, part of the entire set of vertices in the graph to be + * renumbered. The first tuple element is the pointer to the array and the second tuple element is + * the size of the array. This parameter can be used to include isolated vertices. Applying the + * compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function to + * work (vertices should be pre-shuffled). * @param edgelist_major_vertices Pointers (one pointer per local graph adjacency matrix partition * assigned to this process) to edge source vertex IDs (if the graph adjacency matrix is stored as * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex @@ -68,6 +74,7 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -82,90 +89,9 @@ renumber_edgelist(raft::handle_t const& handle, * or multi-GPU (true). * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and * handles to various CUDA libraries) to run graph algorithms. - * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as - * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex - * IDs are updated in-place ([INOUT] parameter). - * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is - * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). - * Vertex IDs are updated in-place ([INOUT] parameter). - * @param num_edgelist_edges Number of edges in the edgelist. - * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set - * of vertices. - */ -template -std::enable_if_t> renumber_edgelist( - raft::handle_t const& handle, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool do_expensive_check = false); - -/** - * @brief renumber edgelist (multi-GPU) - * - * This version takes the vertex set in addition; this allows renumbering to include isolated - * vertices. This function assumes that vertices and edges are pre-shuffled to their target - * processes using the compute_gpu_id_from_vertex_t & compute_gpu_id_from_edge_t functors, - * respectively. - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @tparam edge_t Type of edge identifiers. Needs to be an integral type. - * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) - * or multi-GPU (true). - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param local_vertices Part of the entire set of vertices in the graph to be renumbered. Applying - * the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function - * to work (vertices should be pre-shuffled). - * @param num_local_vertices Number of local vertices. - * @param edgelist_major_vertices Pointers (one pointer per local graph adjacency matrix partition - * assigned to this process) to edge source vertex IDs (if the graph adjacency matrix is stored as - * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex - * IDs are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target - * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, - * minor) pair should return the GPU ID of this process and applying the - * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition - * should return the partition ID of the corresponding matrix partition. - * @param edgelist_minor_vertices Pointers (one pointer per local graph adjacency matrix partition - * assigned to this process) to edge destination vertex IDs (if the graph adjacency matrix is stored - * as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). Vertex IDs - * are updated in-place ([INOUT] parameter). Edges should be pre-shuffled to their final target - * process & matrix partition; i.e. applying the compute_gpu_id_from_edge_t functor to every (major, - * minor) pair should return the GPU ID of this process and applying the - * compute_partition_id_from_edge_t fuctor to every (major, minor) pair for a local matrix partition - * should return the partition ID of the corresponding matrix partition. - * @param edgelist_edge_counts Edge counts (one count per local graph adjacency matrix partition - * assigned to this process). - * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). - * @return std::tuple, partition_t, vertex_t, edge_t> - * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to - * this process in multi-GPU), partition_t object storing graph partitioning information, total - * number of vertices, and total number of edges. - */ -template -std::enable_if_t, partition_t, vertex_t, edge_t>> -renumber_edgelist(raft::handle_t const& handle, - vertex_t const* local_vertices, - vertex_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check = false); - -/** - * @brief renumber edgelist (single-GPU) - * - * This version takes the vertex set in addition; this allows renumbering to include isolated - * vertices. - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @tparam edge_t Type of edge identifiers. Needs to be an integral type. - * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) - * or multi-GPU (true). - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. + * @param optional_local_vertex_span If valid, vertices in the graph to be renumbered. The first + * tuple element is the pointer to the array and the second tuple element is the size of the array. + * This parameter can be used to include isolated vertices. * @param vertices The entire set of vertices in the graph to be renumbered. * @param num_vertices Number of vertices. * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as @@ -182,8 +108,7 @@ renumber_edgelist(raft::handle_t const& handle, template std::enable_if_t> renumber_edgelist( raft::handle_t const& handle, - vertex_t const* vertices, - vertex_t num_vertices, + std::optional> optional_vertex_span, vertex_t* edgelist_major_vertices /* [INOUT] */, vertex_t* edgelist_minor_vertices /* [INOUT] */, edge_t num_edgelist_edges, @@ -284,7 +209,7 @@ void unrenumber_int_vertices(raft::handle_t const& handle, vertex_t const* renumber_map_labels, vertex_t local_int_vertex_first, vertex_t local_int_vertex_last, - std::vector& vertex_partition_lasts, + std::vector const& vertex_partition_lasts, bool do_expensive_check = false); /** @@ -338,6 +263,10 @@ coarsen_graph( * @param labels Labels to be relabeled. This initially holds old labels. Old labels are updated to * new labels in-place ([INOUT] parameter). * @param num_labels Number of labels to be relabeled. + * @param skip_missing_labels Flag dictating the behavior on missing labels (@p labels contains old + * labels missing in @p old_new_label_pairs). If set to true, missing elements are skipped (not + * relabeled). If set to false, undefined behavior (if @p do_expensive_check is set to true, this + * function will throw an exception). * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). * @return rmm::device_uvector New labels corresponding to the @p old_labels. */ @@ -347,6 +276,7 @@ void relabel(raft::handle_t const& handle, vertex_t num_label_pairs, vertex_t* labels /* [INOUT] */, vertex_t num_labels, + bool skip_missing_labels, bool do_expensive_check = false); /** @@ -393,5 +323,48 @@ extract_induced_subgraphs( size_t num_subgraphs, bool do_expensive_check = false); +/** + * @brief create a graph from (the optional vertex list and) the given edge list. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as + * transposed. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param optional_vertex_span If valid, part of the entire set of vertices in the graph to be + * renumbered. The first tuple element is the pointer to the array and the second tuple element is + * the size of the array. This parameter can be used to include isolated vertices. If multi-GPU, + * applying the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this + * function to work (vertices should be pre-shuffled). + * @param edgelist_rows Vector of edge row (source) vertex IDs. + * @param edgelist_cols Vector of edge column (destination) vertex IDs. + * @param edgelist_weights Vector of edge weights. + * @param graph_properties Properties of the graph represented by the input (optional vertex list + * and) edge list. + * @param renumber Flag indicating whether to renumber vertices or not. + * @return std::tuple, rmm::device_uvector> Pair of the generated graph and the renumber map. The + * szie of the renumber map is 0 if @p renumber is false. + */ +template +std::tuple, + rmm::device_uvector> +create_graph_from_edgelist( + raft::handle_t const& handle, + std::optional> optional_vertex_span, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + graph_properties_t graph_properties, + bool renumber); + } // namespace experimental } // namespace cugraph diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/cugraph/experimental/graph_view.hpp similarity index 90% rename from cpp/include/experimental/graph_view.hpp rename to cpp/include/cugraph/experimental/graph_view.hpp index e9593b70ddb..45e716c3647 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/cugraph/experimental/graph_view.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -29,6 +29,9 @@ #include namespace cugraph { +namespace serializer { +class serializer_t; // forward... +} namespace experimental { /** @@ -254,6 +257,8 @@ class graph_base_t { bool is_weighted() const { return properties_.is_weighted; } protected: + friend class cugraph::serializer::serializer_t; + raft::handle_t const* get_handle_ptr() const { return handle_ptr_; }; graph_properties_t get_graph_properties() const { return properties_; } @@ -379,6 +384,24 @@ class graph_view_t + std::enable_if_t get_local_adj_matrix_partition_row_first() const + { + return partition_.get_matrix_partition_minor_first(); + } + + template + std::enable_if_t get_local_adj_matrix_partition_row_last() const + { + return partition_.get_matrix_partition_minor_last(); + } + + template + std::enable_if_t get_number_of_local_adj_matrix_partition_rows() const + { + return get_local_adj_matrix_partition_row_last() - get_local_adj_matrix_partition_row_first(); + } + vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const { return store_transposed ? partition_.get_matrix_partition_minor_first() @@ -405,6 +428,24 @@ class graph_view_t + std::enable_if_t get_local_adj_matrix_partition_col_first() const + { + return partition_.get_matrix_partition_minor_first(); + } + + template + std::enable_if_t get_local_adj_matrix_partition_col_last() const + { + return partition_.get_matrix_partition_minor_last(); + } + + template + std::enable_if_t get_number_of_local_adj_matrix_partition_cols() const + { + return get_local_adj_matrix_partition_col_last() - get_local_adj_matrix_partition_col_first(); + } + vertex_t get_local_adj_matrix_partition_col_first(size_t adj_matrix_partition_idx) const { return store_transposed ? partition_.get_matrix_partition_major_first(adj_matrix_partition_idx) @@ -586,6 +627,24 @@ class graph_view_tget_number_of_edges(); } + template + std::enable_if_t get_local_adj_matrix_partition_row_first() const + { + return get_local_adj_matrix_partition_row_first(0); + } + + template + std::enable_if_t get_local_adj_matrix_partition_row_last() const + { + return get_local_adj_matrix_partition_row_last(0); + } + + template + std::enable_if_t get_number_of_local_adj_matrix_partition_rows() const + { + return get_number_of_local_adj_matrix_partition_rows(0); + } + vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const { assert(adj_matrix_partition_idx == 0); @@ -605,6 +664,24 @@ class graph_view_t + std::enable_if_t get_local_adj_matrix_partition_col_first() const + { + return get_local_adj_matrix_partition_col_first(0); + } + + template + std::enable_if_t get_local_adj_matrix_partition_col_last() const + { + return get_local_adj_matrix_partition_col_last(0); + } + + template + std::enable_if_t get_number_of_local_adj_matrix_partition_cols() const + { + return get_number_of_local_adj_matrix_partition_cols(0); + } + vertex_t get_local_adj_matrix_partition_col_first(size_t adj_matrix_partition_idx) const { assert(adj_matrix_partition_idx == 0); diff --git a/cpp/include/functions.hpp b/cpp/include/cugraph/functions.hpp similarity index 97% rename from cpp/include/functions.hpp rename to cpp/include/cugraph/functions.hpp index ede1be3767f..00e8648b156 100644 --- a/cpp/include/functions.hpp +++ b/cpp/include/cugraph/functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include -#include +#include namespace cugraph { diff --git a/cpp/include/graph.hpp b/cpp/include/cugraph/graph.hpp similarity index 100% rename from cpp/include/graph.hpp rename to cpp/include/cugraph/graph.hpp diff --git a/cpp/include/cugraph/graph_generators.hpp b/cpp/include/cugraph/graph_generators.hpp new file mode 100644 index 00000000000..9bd002b4299 --- /dev/null +++ b/cpp/include/cugraph/graph_generators.hpp @@ -0,0 +1,364 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include + +namespace cugraph { + +/** + * @brief generate an edge list for an R-mat graph. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * NOTE: The scramble_vertex_ids function needs to be called in order to generate a + * graph conforming to the Graph 500 specification (note that scrambling does not + * affect cuGraph's graph construction performance, so this is generally unnecessary). + * If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (including the diagonal) of the graph adjacency matrix. + * + * For multi-GPU generation with `P` GPUs, @p seed should be set to different values in different + * GPUs to avoid every GPU generating the same set of edges. @p num_edges should be adjusted as + * well; e.g. assuming `edge_factor` is given, set @p num_edges = (size_t{1} << @p scale) * + * `edge_factor` / `P` + (rank < (((size_t{1} << @p scale) * `edge_factor`) % P) ? 1 : 0). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param scale Scale factor to set the number of verties in the graph. Vertex IDs have values in + * [0, V), where V = 1 << @p scale. + * @param num_edges Number of edges to generate. + * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org + * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger + * than 1.0. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> generate_rmat_edgelist( + raft::handle_t const &handle, + size_t scale, + size_t num_edges, + double a = 0.57, + double b = 0.19, + double c = 0.19, + uint64_t seed = 0, + bool clip_and_flip = false); + +enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; + +/** + * @brief generate multiple edge lists using the R-mat graph generator. + * + * This function allows multi-edges and self-loops similar to the Graph 500 reference + * implementation. + * + * NOTE: The scramble_vertex_ids function needs to be called in order to generate a + * graph conforming to the Graph 500 specification (note that scrambling does not + * affect cuGraph's graph construction performance, so this is generally unnecessary). + * If `edge_factor` is given (e.g. Graph 500), set @p num_edges to + * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p + * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part + * (including the diagonal) of the graph adjacency matrix. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param n_edgelists Number of edge lists (graphs) to generate + * @param min_scale Scale factor to set the minimum number of verties in the graph. + * @param max_scale Scale factor to set the maximum number of verties in the graph. + * @param edge_factor Average number of edges per vertex to generate. + * @param size_distribution Distribution of the graph sizes, impacts the scale parameter of the + * R-MAT generator + * @param edge_distribution Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, + * are set. + * @param seed Seed value for the random number generator. + * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part + * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to + * `false`). + * @return A vector of std::tuple, rmm::device_uvector> of + *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge + *source vertex IDs and edge destination vertex IDs. + */ +template +std::vector, rmm::device_uvector>> +generate_rmat_edgelists( + raft::handle_t const &handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor = 16, + generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, + generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, + uint64_t seed = 0, + bool clip_and_flip = false); + +/** + * @brief generate an edge list for path graph + * + * A path graph of size n connects the vertices from 0 to (n - 1) + * in a single long path: ((0,1), (1,2), ..., (n - 2, n - 1) + * + * If executed in a multi-gpu context (handle comms has been initialized) + * the path will span all GPUs including an edge from the last vertex on + * GPU i to the first vertex on GPU (i+1) + * + * This function will generate a collection of path graphs. @p component_parameters_v + * defines the parameters for generating each component. Each element of + * @p component_parameters_v defines a tuple consisting of the number of vertices + * and the base vertex id for the component. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param component_parameters_v A vector containing tuples consisting of the number of vertices and + * base vertex id for each component to generate. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_path_graph_edgelist( + raft::handle_t const &handle, + std::vector> const &component_parameters_v); + +/** + * @brief generate an edge list for a 2D Mesh Graph + * + * A sequence of 2D mesh graphs will be constructed according to the + * component specifications. Each 2D mesh graph is configured with a tuple + * containing (x, y, base_vertex_id). @p component_parameters_v will contain + * a tuple for each component. + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate disjoint 2D mesh constructs of equal size. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param component_parameters_v Vector containing tuple defining the configuration of each + * component + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_2d_mesh_graph_edgelist( + raft::handle_t const &handle, + std::vector> const &component_parameters_v); + +/** + * @brief generate an edge list for a 3D Mesh Graph + * + * A sequence of 3D mesh graphs will be constructed according to the + * component specifications. Each 3D mesh graph is configured with a tuple + * containing (x, y, z, base_vertex_id). @p component_parameters_v will contain + * a tuple for each component. + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate disjoint 3D mesh constructs of equal size. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param component_parameters_v Vector containing tuple defining the configuration of each + * component + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_3d_mesh_graph_edgelist( + raft::handle_t const &handle, + std::vector> const &component_parameters_v); + +/** + * @brief generate an edge lists for some complete graphs + * + * A sequence of complete graphs will be constructed according to the + * component specifications. Each complete graph is configured with a tuple + * containing (n, base_vertex_id). @p component_parameters_v will contain + * a tuple for each component. + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate disjoint complete graph constructs of equal size. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param component_parameters_v Vector containing tuple defining the configuration of each + * component + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_complete_graph_edgelist( + raft::handle_t const &handle, + std::vector> const &component_parameters_v); + +/** + * @brief generate an edge lists for an Erdos-Renyi graph + * + * This API supports the G(n,p) model which requires O(n^2) work. + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate Erdos-Renyi edges for its portion of the 2D + * partitioning of the adjacency matrix. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param num_vertices Number of vertices to use in the generated graph + * @param p Probability for edge creation + * @param base_vertex_id Starting vertex id for the generated graph + * @param seed Seed value for the random number generator. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const &handle, + vertex_t num_vertices, + float p, + vertex_t base_vertex_id, + uint64_t seed = 0); + +/** + * @brief generate an edge lists for an Erdos-Renyi graph + * + * This API supports the G(n,m) model + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will generate Erdos-Renyi edges for its portion of the 2D + * partitioning of the adjacency matrix. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param num_vertices Number of vertices to use in each complete graph + * @param m Number of edges to generate + * @param base_vertex_id Starting vertex id for the generated graph + * @param seed Seed value for the random number generator. + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const &handle, + vertex_t num_vertices, + size_t m, + vertex_t base_vertex_id, + uint64_t seed = 0); + +/** + * @brief symmetrize an edgelist + * + * Given an edgelist for a graph, symmetrize and deduplicate edges. + * + * If a duplicate edge exists in a weighted graph, one of the weights is arbitrarily + * returned. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam weight_t Type of weights. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param d_src_v Vector of source vertices + * @param d_dst_v Vector of destination vertices + * @param d_weights_v Optional vector of edge weights + * @return std::tuple, rmm::device_uvector> A tuple of + * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. + */ +template +std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); + +/** + * @brief scramble vertex ids in a graph + * + * Given an edgelist for a graph, scramble all vertex ids by the given offset. + * This translation is done in place. + * + * The scramble code here follows the algorithm in the Graph 500 reference + * implementation version 3.0.0. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param d_src_v Vector of source vertices + * @param d_dst_v Vector of destination vertices + * @param vertex_id_offset Offset to add to each vertex id + * @param seed Used to initialize random number generator + */ +template +void scramble_vertex_ids(raft::handle_t const &handle, + rmm::device_uvector &d_src_v, + rmm::device_uvector &d_dst_v, + vertex_t vertex_id_offset, + uint64_t seed = 0); + +/** + * @brief Combine edgelists from multiple sources into a single edgelist + * + * If executed in a multi-gpu context (handle comms has been initialized) + * each GPU will operate only on its subset of data. Any shuffling to get + * edges onto the same GPU should be done prior to calling this function. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param sources The source vertex ids to combine + * @param dests The destination vertex ids to combine + * @param weights Optional vector of weights to combine + * @param remove_multi_edges If true (the default) then remove multi edges, if false leave them in + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector> A tuple of rmm::device_uvector objects for edge source vertex IDs + * and edge destination vertex IDs and edge weights. + */ +template +std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&d_sources, + std::vector> &&d_dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges = true); + +} // namespace cugraph diff --git a/cpp/include/internals.hpp b/cpp/include/cugraph/internals.hpp similarity index 96% rename from cpp/include/internals.hpp rename to cpp/include/cugraph/internals.hpp index f71426491e3..cc6b3031079 100644 --- a/cpp/include/internals.hpp +++ b/cpp/include/cugraph/internals.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/cugraph/matrix_partition_device.cuh similarity index 97% rename from cpp/include/matrix_partition_device.cuh rename to cpp/include/cugraph/matrix_partition_device.cuh index 30d6540bcfe..8951e4269bd 100644 --- a/cpp/include/matrix_partition_device.cuh +++ b/cpp/include/cugraph/matrix_partition_device.cuh @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include @@ -38,6 +38,9 @@ class matrix_partition_device_base_t { __host__ __device__ edge_t get_number_of_edges() const { return number_of_edges_; } + __host__ __device__ vertex_t const* get_indices() const { return indices_; } + __host__ __device__ weight_t const* get_weights() const { return weights_; } + __device__ thrust::tuple get_local_edges( vertex_t major_offset) const noexcept { diff --git a/cpp/include/partition_manager.hpp b/cpp/include/cugraph/partition_manager.hpp similarity index 98% rename from cpp/include/partition_manager.hpp rename to cpp/include/cugraph/partition_manager.hpp index 431655e5642..c7657d459b2 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/cugraph/partition_manager.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/cugraph/patterns/any_of_adj_matrix_row.cuh similarity index 95% rename from cpp/include/patterns/any_of_adj_matrix_row.cuh rename to cpp/include/cugraph/patterns/any_of_adj_matrix_row.cuh index a367ec2a50c..94cdae1ec95 100644 --- a/cpp/include/patterns/any_of_adj_matrix_row.cuh +++ b/cpp/include/cugraph/patterns/any_of_adj_matrix_row.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/cugraph/patterns/copy_to_adj_matrix_row_col.cuh similarity index 98% rename from cpp/include/patterns/copy_to_adj_matrix_row_col.cuh rename to cpp/include/cugraph/patterns/copy_to_adj_matrix_row_col.cuh index 26a4eed4213..e2ab135691e 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/cugraph/patterns/copy_to_adj_matrix_row_col.cuh @@ -15,16 +15,16 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh similarity index 98% rename from cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh rename to cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh index 6aded0eccf0..4284396370d 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -15,14 +15,14 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -101,6 +101,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( ? static_cast(major_offset) : minor_offset; return evaluate_edge_op() @@ -189,6 +190,7 @@ __global__ void for_all_major_for_all_nbr_mid_degree( ? static_cast(major_offset) : minor_offset; auto e_op_result = evaluate_edge_op() @@ -263,6 +265,7 @@ __global__ void for_all_major_for_all_nbr_high_degree( ? static_cast(major_offset) : minor_offset; auto e_op_result = evaluate_edge_op() diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh similarity index 75% rename from cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh rename to cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh index 9a1d9fea24c..f9c6fed059b 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh +++ b/cpp/include/cugraph/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -15,110 +15,28 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include #include - -#include +#include +#include #include namespace cugraph { namespace experimental { -namespace detail { - -// FIXME: block size requires tuning -int32_t constexpr copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size = 128; - -template -__global__ void for_all_major_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, - typename GraphViewType::vertex_type major_first, - typename GraphViewType::vertex_type major_last, - VertexIterator adj_matrix_minor_key_first, - typename GraphViewType::vertex_type* major_vertices, - typename GraphViewType::vertex_type* minor_keys, - typename GraphViewType::weight_type* key_aggregated_edge_weights, - typename GraphViewType::vertex_type invalid_vertex) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); - auto idx = static_cast(tid); - - while (idx < static_cast(major_last - major_first)) { - auto major_offset = major_start_offset + idx; - vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; - edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = - matrix_partition.get_local_edges(static_cast(major_offset)); - if (local_degree > 0) { - auto local_offset = matrix_partition.get_local_offset(major_offset); - auto minor_key_first = thrust::make_transform_iterator( - indices, [matrix_partition, adj_matrix_minor_key_first] __device__(auto minor) { - return *(adj_matrix_minor_key_first + - matrix_partition.get_minor_offset_from_minor_nocheck(minor)); - }); - thrust::copy( - thrust::seq, minor_key_first, minor_key_first + local_degree, minor_keys + local_offset); - if (weights == nullptr) { - thrust::sort( - thrust::seq, minor_keys + local_offset, minor_keys + local_offset + local_degree); - } else { - thrust::copy( - thrust::seq, weights, weights + local_degree, key_aggregated_edge_weights + local_offset); - thrust::sort_by_key(thrust::seq, - minor_keys + local_offset, - minor_keys + local_offset + local_degree, - key_aggregated_edge_weights + local_offset); - } - // in-place reduce_by_key - vertex_t key_idx{0}; - key_aggregated_edge_weights[local_offset + key_idx] = - weights != nullptr ? key_aggregated_edge_weights[local_offset] : weight_t{1.0}; - - for (edge_t i = 1; i < local_degree; ++i) { - if (minor_keys[local_offset + i] == minor_keys[local_offset + key_idx]) { - key_aggregated_edge_weights[local_offset + key_idx] += - weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0}; - } else { - ++key_idx; - minor_keys[local_offset + key_idx] = minor_keys[local_offset + i]; - key_aggregated_edge_weights[local_offset + key_idx] = - weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0}; - } - } - thrust::fill(thrust::seq, - major_vertices + local_offset, - major_vertices + local_offset + key_idx + 1, - matrix_partition.get_major_from_major_offset_nocheck(major_offset)); - thrust::fill(thrust::seq, - major_vertices + local_offset + key_idx + 1, - major_vertices + local_offset + local_degree, - invalid_vertex); - } - - idx += gridDim.x * blockDim.x; - } -} - -} // namespace detail - /** * @brief Iterate over every vertex's key-aggregated outgoing edges to update vertex properties. * @@ -209,8 +127,14 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( // 1. build a cuco::static_map object for the k, v pairs. - auto kv_map_ptr = std::make_unique>( - size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto kv_map_ptr = std::make_unique< + cuco::static_map>( + size_t{0}, + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter); if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); @@ -268,38 +192,34 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( kv_map_ptr.reset(); - kv_map_ptr = std::make_unique>( + kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(map_keys.size()) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple( - map_keys.begin(), get_dataframe_buffer_begin(map_value_buffer))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(map_keys.begin(), get_dataframe_buffer_begin(map_value_buffer))); kv_map_ptr->insert(pair_first, pair_first + map_keys.size()); } else { handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream kv_map_ptr.reset(); - kv_map_ptr = std::make_unique>( + kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)); kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } @@ -328,47 +248,94 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( rmm::device_uvector tmp_major_vertices(matrix_partition.get_number_of_edges(), handle.get_stream()); rmm::device_uvector tmp_minor_keys(tmp_major_vertices.size(), handle.get_stream()); - rmm::device_uvector tmp_key_aggregated_edge_weights(tmp_major_vertices.size(), - handle.get_stream()); + rmm::device_uvector tmp_key_aggregated_edge_weights( + graph_view.is_weighted() ? tmp_major_vertices.size() : size_t{0}, handle.get_stream()); if (matrix_partition.get_major_size() > 0) { - raft::grid_1d_thread_t update_grid( - matrix_partition.get_major_size(), - detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - auto constexpr invalid_vertex = invalid_vertex_id::value; - + auto minor_key_first = thrust::make_transform_iterator( + matrix_partition.get_indices(), + [adj_matrix_col_key_first, matrix_partition] __device__(auto minor) { + return *(adj_matrix_col_key_first + + matrix_partition.get_minor_offset_from_minor_nocheck(minor)); + }); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_key_first, + minor_key_first + matrix_partition.get_number_of_edges(), + tmp_minor_keys.begin()); + if (graph_view.is_weighted()) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + matrix_partition.get_weights(), + matrix_partition.get_weights() + matrix_partition.get_number_of_edges(), + tmp_key_aggregated_edge_weights.begin()); + } // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber // vertices to insure that rows within a partition are sorted by their out-degree in // decreasing order, we will apply this kernel only to low out-degree vertices. - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - matrix_partition.get_major_first(), - matrix_partition.get_major_last(), - adj_matrix_col_key_first, - tmp_major_vertices.data(), - tmp_minor_keys.data(), - tmp_key_aggregated_edge_weights.data(), - invalid_vertex); + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(matrix_partition.get_major_first()), + thrust::make_counting_iterator(matrix_partition.get_major_first()) + + matrix_partition.get_major_size(), + [matrix_partition, tmp_major_vertices = tmp_major_vertices.begin()] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + auto local_degree = matrix_partition.get_local_degree(major_offset); + auto local_offset = matrix_partition.get_local_offset(major_offset); + thrust::fill(thrust::seq, + tmp_major_vertices + local_offset, + tmp_major_vertices + local_offset + local_degree, + major); + }); + rmm::device_uvector reduced_major_vertices(tmp_major_vertices.size(), + handle.get_stream()); + rmm::device_uvector reduced_minor_keys(reduced_major_vertices.size(), + handle.get_stream()); + rmm::device_uvector reduced_key_aggregated_edge_weights( + reduced_major_vertices.size(), handle.get_stream()); + size_t reduced_size{}; + // FIXME: cub segmented sort may be more efficient as this is already sorted by major + auto input_key_first = thrust::make_zip_iterator( + thrust::make_tuple(tmp_major_vertices.begin(), tmp_minor_keys.begin())); + auto output_key_first = thrust::make_zip_iterator( + thrust::make_tuple(reduced_major_vertices.begin(), reduced_minor_keys.begin())); + if (graph_view.is_weighted()) { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_key_first, + input_key_first + tmp_major_vertices.size(), + tmp_key_aggregated_edge_weights.begin()); + reduced_size = + thrust::distance(output_key_first, + thrust::get<0>(thrust::reduce_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_key_first, + input_key_first + tmp_major_vertices.size(), + tmp_key_aggregated_edge_weights.begin(), + output_key_first, + reduced_key_aggregated_edge_weights.begin()))); + } else { + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_key_first, + input_key_first + tmp_major_vertices.size()); + reduced_size = + thrust::distance(output_key_first, + thrust::get<0>(thrust::reduce_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_key_first, + input_key_first + tmp_major_vertices.size(), + thrust::make_constant_iterator(weight_t{1.0}), + output_key_first, + reduced_key_aggregated_edge_weights.begin()))); + } + tmp_major_vertices = std::move(reduced_major_vertices); + tmp_minor_keys = std::move(reduced_minor_keys); + tmp_key_aggregated_edge_weights = std::move(reduced_key_aggregated_edge_weights); + tmp_major_vertices.resize(reduced_size, handle.get_stream()); + tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_major_vertices.shrink_to_fit(handle.get_stream()); + tmp_minor_keys.shrink_to_fit(handle.get_stream()); + tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); } - auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( - tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); - auto last = - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - triplet_first, - triplet_first + tmp_major_vertices.size(), - [] __device__(auto val) { - return thrust::get<0>(val) == invalid_vertex_id::value; - }); - tmp_major_vertices.resize(thrust::distance(triplet_first, last), handle.get_stream()); - tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); - tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); - if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -379,7 +346,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_size = col_comm.get_size(); - triplet_first = + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple(tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); @@ -429,7 +396,7 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( allocate_dataframe_buffer(tmp_major_vertices.size(), handle.get_stream()); auto tmp_e_op_result_buffer_first = get_dataframe_buffer_begin(tmp_e_op_result_buffer); - triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); thrust::transform( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -464,9 +431,6 @@ void copy_v_transform_reduce_key_aggregated_out_nbr( // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op // can be mapped to ncclRedOp_t). - // FIXME: a temporary workaround for a NCCL (2.9.6) bug that causes a hang on DGX1 (due to - // remote memory allocation), this barrier is unnecessary otherwise. - col_comm.barrier(); auto rx_sizes = host_scalar_gather(col_comm, tmp_major_vertices.size(), i, handle.get_stream()); std::vector rx_displs{}; diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/cugraph/patterns/count_if_e.cuh similarity index 92% rename from cpp/include/patterns/count_if_e.cuh rename to cpp/include/cugraph/patterns/count_if_e.cuh index 4eb3fea24c4..039be17252d 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/cugraph/patterns/count_if_e.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include @@ -66,13 +66,15 @@ typename GraphViewType::edge_type count_if_e( AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, EdgeOp e_op) { - using edge_t = typename GraphViewType::edge_type; + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; return transform_reduce_e(handle, graph_view, adj_matrix_row_value_input_first, adj_matrix_col_value_input_first, cast_edge_op_bool_to_integer -#include -#include +#include +#include +#include #include #include diff --git a/cpp/include/patterns/edge_op_utils.cuh b/cpp/include/cugraph/patterns/edge_op_utils.cuh similarity index 81% rename from cpp/include/patterns/edge_op_utils.cuh rename to cpp/include/cugraph/patterns/edge_op_utils.cuh index 198c1880ff4..23a66e8a0c1 100644 --- a/cpp/include/patterns/edge_op_utils.cuh +++ b/cpp/include/cugraph/patterns/edge_op_utils.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include @@ -30,19 +30,20 @@ namespace cugraph { namespace experimental { -template +template struct is_valid_edge_op { static constexpr bool value = false; }; -template +template struct is_valid_edge_op< - ResultOfEdgeOp, - typename std::conditional::type> { + InvokeResultEdgeOp, + typename std::conditional_t> { static constexpr bool valid = true; }; template @@ -52,32 +53,36 @@ struct evaluate_edge_op { using row_value_type = typename std::iterator_traits::value_type; using col_value_type = typename std::iterator_traits::value_type; - template - __device__ std::enable_if_t>::valid, - typename std::result_of::type> - compute(V r, V c, W w, R rv, C cv, E e) + __device__ + std::enable_if_t>::valid, + typename std::invoke_result::type> + compute(K r, V c, W w, R rv, C cv, E e) { return e(r, c, w, rv, cv); } - template - __device__ std::enable_if_t>::valid, - typename std::result_of::type> - compute(V r, V c, W w, R rv, C cv, E e) + __device__ std::enable_if_t>::valid, + typename std::invoke_result::type> + compute(K r, V c, W w, R rv, C cv, E e) { return e(r, c, rv, cv); } }; template - __device__ std::enable_if_t>::valid, T> - operator()(V r, V c, W w, R rv, C cv) + __device__ + std::enable_if_t>::valid, T> + operator()(K r, V c, W w, R rv, C cv) { return e_op(r, c, w, rv, cv) ? T{1} : T{0}; } - template - __device__ std::enable_if_t>::valid, T> - operator()(V r, V c, R rv, C cv) + __device__ + std::enable_if_t>::valid, T> + operator()(K r, V c, R rv, C cv) { return e_op(r, c, rv, cv) ? T{1} : T{0}; } diff --git a/cpp/include/patterns/reduce_op.cuh b/cpp/include/cugraph/patterns/reduce_op.cuh similarity index 65% rename from cpp/include/patterns/reduce_op.cuh rename to cpp/include/cugraph/patterns/reduce_op.cuh index d92d3352d08..e73a2861cb0 100644 --- a/cpp/include/patterns/reduce_op.cuh +++ b/cpp/include/cugraph/patterns/reduce_op.cuh @@ -20,10 +20,19 @@ namespace cugraph { namespace experimental { namespace reduce_op { +// in case there is no payload to reduce +struct null { + using type = void; +}; + // reducing N elements, any element can be a valid output. template struct any { - using type = T; + using type = T; + // FIXME: actually every reduction operation should be side-effect free if reduction is performed + // by thrust; thrust reduction call rounds up the number of invocations based on the block size + // and discards the values outside the valid range; this does not work if the reduction operation + // has side-effects. static constexpr bool pure_function = true; // this can be called in any process __host__ __device__ T operator()(T const& lhs, T const& rhs) const { return lhs; } @@ -34,7 +43,11 @@ struct any { // should be selected. template struct min { - using type = T; + using type = T; + // FIXME: actually every reduction operation should be side-effect free if reduction is performed + // by thrust; thrust reduction call rounds up the number of invocations based on the block size + // and discards the values outside the valid range; this does not work if the reduction operation + // has side-effects. static constexpr bool pure_function = true; // this can be called in any process __host__ __device__ T operator()(T const& lhs, T const& rhs) const diff --git a/cpp/include/patterns/reduce_v.cuh b/cpp/include/cugraph/patterns/reduce_v.cuh similarity index 96% rename from cpp/include/patterns/reduce_v.cuh rename to cpp/include/cugraph/patterns/reduce_v.cuh index b232d37b78d..d27a45e2737 100644 --- a/cpp/include/patterns/reduce_v.cuh +++ b/cpp/include/cugraph/patterns/reduce_v.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh similarity index 98% rename from cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh rename to cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh index 9848aa21f88..58633fb1e22 100644 --- a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -15,13 +15,13 @@ */ #pragma once -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include @@ -98,6 +98,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( ((GraphViewType::is_adj_matrix_transposed != adj_matrix_row_key) ? major_offset : minor_offset)); auto e_op_result = evaluate_edge_op() diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/cugraph/patterns/transform_reduce_e.cuh similarity index 92% rename from cpp/include/patterns/transform_reduce_e.cuh rename to cpp/include/cugraph/patterns/transform_reduce_e.cuh index b95e036d460..151fa1df0c7 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_e.cuh @@ -15,11 +15,11 @@ */ #pragma once -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -76,7 +76,7 @@ __global__ void for_all_major_for_all_nbr_low_degree( &adj_matrix_row_value_input_first, &adj_matrix_col_value_input_first, &e_op, - idx, + major_offset, indices, weights] __device__(auto i) { auto minor = indices[i]; @@ -84,15 +84,18 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; return evaluate_edge_op() @@ -154,15 +157,18 @@ __global__ void for_all_major_for_all_nbr_mid_degree( auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; auto e_op_result = evaluate_edge_op() @@ -218,15 +224,18 @@ __global__ void for_all_major_for_all_nbr_high_degree( auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; auto e_op_result = evaluate_edge_op() diff --git a/cpp/include/patterns/transform_reduce_v.cuh b/cpp/include/cugraph/patterns/transform_reduce_v.cuh similarity index 97% rename from cpp/include/patterns/transform_reduce_v.cuh rename to cpp/include/cugraph/patterns/transform_reduce_v.cuh index 17ffb89206a..0d5b4f9cbb6 100644 --- a/cpp/include/patterns/transform_reduce_v.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_v.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/cugraph/patterns/transform_reduce_v_with_adj_matrix_row.cuh similarity index 97% rename from cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh rename to cpp/include/cugraph/patterns/transform_reduce_v_with_adj_matrix_row.cuh index 39aca7cacae..59830222a9c 100644 --- a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh +++ b/cpp/include/cugraph/patterns/transform_reduce_v_with_adj_matrix_row.cuh @@ -15,9 +15,9 @@ */ #pragma once -#include -#include -#include +#include +#include +#include #include diff --git a/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh new file mode 100644 index 00000000000..b1d63cc942a --- /dev/null +++ b/cpp/include/cugraph/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -0,0 +1,1139 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_block_size = 512; + +// we cannot use std::iterator_traits::value_type if Iterator is void* (reference to void +// is not allowed) +template +struct optional_payload_buffer_value_type_t; + +template +struct optional_payload_buffer_value_type_t< + PayloadIterator, + std::enable_if_t>> { + using value = typename std::iterator_traits::value_type; +}; + +template +struct optional_payload_buffer_value_type_t< + PayloadIterator, + std::enable_if_t>> { + using value = void; +}; + +// FIXME: to silence the spurious warning (missing return statement ...) due to the nvcc bug +// (https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void- +// function-in-constexpr-if-fun) +#if 1 +template >* = nullptr> +std::byte allocate_optional_payload_buffer(size_t size, cudaStream_t stream) +{ + return std::byte{0}; // dummy +} + +template >* = nullptr> +auto allocate_optional_payload_buffer(size_t size, cudaStream_t stream) +{ + return allocate_dataframe_buffer(size, stream); +} + +template >* = nullptr> +void* get_optional_payload_buffer_begin(std::byte& optional_payload_buffer) +{ + return static_cast(nullptr); +} + +template >* = nullptr> +auto get_optional_payload_buffer_begin( + std::add_lvalue_reference_t( + size_t{0}, cudaStream_t{nullptr}))> optional_payload_buffer) +{ + return get_dataframe_buffer_begin(optional_payload_buffer); +} +#else +auto allocate_optional_payload_buffer = [](size_t size, cudaStream_t stream) { + if constexpr (std::is_same_v) { + return std::byte{0}; // dummy + } else { + return allocate_dataframe_buffer(size, stream); + } +}; + +auto get_optional_payload_buffer_begin = [](auto& optional_payload_buffer) { + if constexpr (std::is_same_v) { + return static_cast(nullptr); + } else { + return get_dataframe_buffer_begin(optional_payload_buffer); + } +}; +#endif + +// FIXME: a temporary workaround for cudaErrorInvalidDeviceFunction error when device lambda is used +// in the else part in if constexpr else statement that involves device lambda +template +struct call_v_op_t { + VertexValueInputIterator vertex_value_input_first{}; + VertexValueOutputIterator vertex_value_output_first{}; + VertexOp v_op{}; + vertex_partition_device_t vertex_partition{}; + size_t invalid_bucket_idx; + + template + __device__ std::enable_if_t, uint8_t> operator()( + key_t key) const + { + auto v_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); + auto v_val = *(vertex_value_input_first + v_offset); + auto v_op_result = v_op(key, v_val); + if (v_op_result) { + *(vertex_value_output_first + v_offset) = thrust::get<1>(*v_op_result); + return static_cast(thrust::get<0>(*v_op_result)); + } else { + return std::numeric_limits::max(); + } + } + + template + __device__ std::enable_if_t, uint8_t> operator()( + key_t key) const + { + auto v_offset = + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(thrust::get<0>(key)); + auto v_val = *(vertex_value_input_first + v_offset); + auto v_op_result = v_op(key, v_val); + if (v_op_result) { + *(vertex_value_output_first + v_offset) = thrust::get<1>(*v_op_result); + return static_cast(thrust::get<0>(*v_op_result)); + } else { + return std::numeric_limits::max(); + } + } +}; + +// FIXME: a temporary workaround for cudaErrorInvalidDeviceFunction error when device lambda is used +// after if constexpr else statement that involves device lambda (bug report submitted) +template +struct check_invalid_bucket_idx_t { + __device__ bool operator()(thrust::tuple pair) + { + return thrust::get<0>(pair) == std::numeric_limits::max(); + } +}; + +template +__device__ void push_if_buffer_element( + matrix_partition_device_t& matrix_partition, + typename std::iterator_traits::value_type key, + typename GraphViewType::vertex_type row_offset, + typename GraphViewType::vertex_type col, + typename GraphViewType::weight_type weight, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using key_t = typename std::iterator_traits::value_type; + using payload_t = + typename optional_payload_buffer_value_type_t::value; + + auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); + auto e_op_result = evaluate_edge_op() + .compute(key, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (e_op_result) { + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(1)); + if constexpr (std::is_same_v && std::is_same_v) { + *(buffer_key_output_first + buffer_idx) = col; + } else if constexpr (std::is_same_v && !std::is_same_v) { + *(buffer_key_output_first + buffer_idx) = col; + *(buffer_payload_output_first + buffer_idx) = *e_op_result; + } else if constexpr (!std::is_same_v && std::is_same_v) { + *(buffer_key_output_first + buffer_idx) = thrust::make_tuple(col, *e_op_result); + } else { + *(buffer_key_output_first + buffer_idx) = + thrust::make_tuple(col, thrust::get<0>(*e_op_result)); + *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(*e_op_result); + } + } +} + +template +__global__ void for_all_frontier_row_for_all_nbr_low_degree( + matrix_partition_device_t matrix_partition, + KeyIterator key_first, + KeyIterator key_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using key_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type>); + using payload_t = + typename optional_payload_buffer_value_type_t::value; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto idx = static_cast(tid); + + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + vertex_t row{}; + if constexpr (std::is_same_v) { + row = key; + } else { + row = thrust::get<0>(key); + } + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); + for (edge_t i = 0; i < local_out_degree; ++i) { + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights != nullptr ? weights[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); + } + idx += gridDim.x * blockDim.x; + } +} + +template +__global__ void for_all_frontier_row_for_all_nbr_mid_degree( + matrix_partition_device_t matrix_partition, + KeyIterator key_first, + KeyIterator key_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using key_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type>); + using payload_t = + typename optional_payload_buffer_value_type_t::value; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(update_frontier_v_push_if_out_nbr_for_all_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto idx = static_cast(tid / raft::warp_size()); + + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + vertex_t row{}; + if constexpr (std::is_same_v) { + row = key; + } else { + row = thrust::get<0>(key); + } + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); + for (edge_t i = lane_id; i < local_out_degree; i += raft::warp_size()) { + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights != nullptr ? weights[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); + } + + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } +} + +template +__global__ void for_all_frontier_row_for_all_nbr_high_degree( + matrix_partition_device_t matrix_partition, + KeyIterator key_first, + KeyIterator key_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using key_t = typename std::iterator_traits::value_type; + static_assert( + std::is_same_v::value_type>); + using payload_t = + typename optional_payload_buffer_value_type_t::value; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto idx = static_cast(blockIdx.x); + + while (idx < static_cast(thrust::distance(key_first, key_last))) { + auto key = *(key_first + idx); + vertex_t row{}; + if constexpr (std::is_same_v) { + row = key; + } else { + row = thrust::get<0>(key); + } + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); + for (edge_t i = threadIdx.x; i < local_out_degree; i += blockDim.x) { + push_if_buffer_element(matrix_partition, + key, + row_offset, + indices[i], + weights != nullptr ? weights[i] : weight_t{1.0}, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_output_first, + buffer_payload_output_first, + buffer_idx_ptr, + e_op); + } + + idx += gridDim.x; + } +} + +template +size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t num_buffer_elements, + ReduceOp reduce_op) +{ + using key_t = typename std::iterator_traits::value_type; + using payload_t = + typename optional_payload_buffer_value_type_t::value; + + if constexpr (std::is_same_v) { + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements); + } else { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements, + buffer_payload_output_first); + } + + size_t num_reduced_buffer_elements{}; + if constexpr (std::is_same_v) { + auto it = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements); + num_reduced_buffer_elements = + static_cast(thrust::distance(buffer_key_output_first, it)); + } else if constexpr (std::is_same>::value) { + // FIXME: if ReducOp is any, we may have a cheaper alternative than sort & uique (i.e. discard + // non-first elements) + auto it = thrust::unique_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements, + buffer_payload_output_first); + num_reduced_buffer_elements = + static_cast(thrust::distance(buffer_key_output_first, thrust::get<0>(it))); + } else { + // FIXME: better avoid temporary buffer or at least limit the maximum buffer size (if we adopt + // CUDA cooperative group https://devblogs.nvidia.com/cooperative-groups and global sync(), we + // can use aggregate shared memory as a temporary buffer, or we can limit the buffer size, and + // split one thrust::reduce_by_key call to multiple thrust::reduce_by_key calls if the + // temporary buffer size exceeds the maximum buffer size (may be definied as percentage of the + // system HBM size or a function of the maximum number of threads in the system)) + // FIXME: actually, we can find how many unique keys are here by now. + // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding + // the vertex unless reduce_op is a pure function. + rmm::device_uvector keys(num_buffer_elements, handle.get_stream()); + auto value_buffer = + allocate_dataframe_buffer(num_buffer_elements, handle.get_stream()); + auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements, + buffer_payload_output_first, + keys.begin(), + get_dataframe_buffer_begin(value_buffer), + thrust::equal_to(), + reduce_op); + num_reduced_buffer_elements = + static_cast(thrust::distance(keys.begin(), thrust::get<0>(it))); + // FIXME: this copy can be replaced by move + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + keys.begin(), + keys.begin() + num_reduced_buffer_elements, + buffer_key_output_first); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(value_buffer), + get_dataframe_buffer_begin(value_buffer) + num_reduced_buffer_elements, + buffer_payload_output_first); + } + + return num_reduced_buffer_elements; +} + +} // namespace detail + +template +typename GraphViewType::edge_type compute_num_out_nbrs_from_frontier( + raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexFrontierType const& frontier, + size_t cur_frontier_bucket_idx) +{ + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using key_t = typename VertexFrontierType::key_type; + + edge_t ret{0}; + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + + auto const& cur_frontier_bucket = frontier.get_bucket(cur_frontier_bucket_idx); + vertex_t const* local_frontier_vertex_first{nullptr}; + vertex_t const* local_frontier_vertex_last{nullptr}; + if constexpr (std::is_same_v) { + local_frontier_vertex_first = cur_frontier_bucket.begin(); + local_frontier_vertex_last = cur_frontier_bucket.end(); + } else { + local_frontier_vertex_first = thrust::get<0>(cur_frontier_bucket.begin().get_iterator_tuple()); + local_frontier_vertex_last = thrust::get<0>(cur_frontier_bucket.end().get_iterator_tuple()); + } + + std::vector local_frontier_sizes{}; + if (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + local_frontier_sizes = + host_scalar_allgather(col_comm, cur_frontier_bucket.size(), handle.get_stream()); + } else { + local_frontier_sizes = std::vector{static_cast(cur_frontier_bucket.size())}; + } + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + + if (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + + rmm::device_uvector frontier_vertices(local_frontier_sizes[i], + handle.get_stream_view()); + // FIXME: this copy is unnecessary, better fix RAFT comm's bcast to take const iterators for + // input + if (col_comm_rank == static_cast(i)) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_frontier_vertex_first, + local_frontier_vertex_last, + frontier_vertices.begin()); + } + device_bcast(col_comm, + frontier_vertices.data(), + frontier_vertices.data(), + frontier_vertices.size(), + static_cast(i), + handle.get_stream()); + + ret += thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_vertices.begin(), + frontier_vertices.end(), + [matrix_partition] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + return matrix_partition.get_local_degree(major_offset); + }, + edge_t{0}, + thrust::plus()); + } else { + assert(i == 0); + ret += thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_frontier_vertex_first, + local_frontier_vertex_last, + [matrix_partition] __device__(auto major) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(major); + return matrix_partition.get_local_degree(major_offset); + }, + edge_t{0}, + thrust::plus()); + } + } + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + + return ret; +} + +// FIXME: this documentation needs to be updated due to (tagged-)vertex support +/** + * @brief Update (tagged-)vertex frontier and (tagged-)vertex property values iterating over the + * outgoing edges from the frontier. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexFrontierType Type of the vertex frontier class which abstracts vertex frontier + * managements. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam VertexValueOutputIterator Type of the iterator for vertex property variables. + * @tparam VertexOp Type of the binary vertex operator. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param frontier VertexFrontier class object for vertex frontier managements. This object includes + * multiple bucket objects. + * @param cur_frontier_bucket_idx Index of the VertexFrontier bucket holding vertices for the + * current iteration. + * @param next_frontier_bucket_indices Indices of the VertexFrontier buckets to store new frontier + * vertices for the next iteration. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a value to reduced by the @p + * reduce_op. + * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first + * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.get_number_of_local_vertices(). + * @param v_op Ternary operator takes (tagged-)vertex ID, *(@p vertex_value_input_first + i) (where + * i is [0, @p graph_view.get_number_of_local_vertices())) and reduced value of the @p e_op outputs + * for this vertex and returns the target bucket index (for frontier update) and new verrtex + * property values (to update *(@p vertex_value_output_first + i)). The target bucket index should + * either be VertexFrontierType::kInvalidBucketIdx or an index in @p next_frontier_bucket_indices. + */ +template +void update_frontier_v_push_if_out_nbr( + raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexFrontierType& frontier, + size_t cur_frontier_bucket_idx, + std::vector const& next_frontier_bucket_indices, + // FIXME: if vertices in the frontier are tagged, we should have an option to access with (vertex, + // tag) pair (currently we can access only with vertex, we may use cuco::static_map for this + // purpose) + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + ReduceOp reduce_op, + // FIXME: if vertices in the frontier are tagged, we should have an option to access with (vertex, + // tag) pair (currently we can access only with vertex, we may use cuco::static_map for this + // purpose) + VertexValueInputIterator vertex_value_input_first, + // FIXME: if vertices in the frontier are tagged, we should have an option to access with (vertex, + // tag) pair (currently we can access only with vertex, we may use cuco::static_map for this + // purpose) + // FIXME: currently, it is undefined behavior if vertices in the frontier are tagged and the same + // vertex property is updated by multiple v_op invocations with the same vertex but with different + // tags. + VertexValueOutputIterator vertex_value_output_first, + // FIXME: this takes (tagged-)vertex ID in addition, think about consistency with the other + // primitives. + VertexOp v_op) +{ + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using key_t = typename VertexFrontierType::key_type; + using payload_t = typename ReduceOp::type; + + auto frontier_key_first = frontier.get_bucket(cur_frontier_bucket_idx).begin(); + auto frontier_key_last = frontier.get_bucket(cur_frontier_bucket_idx).end(); + + // 1. fill the buffer + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + + auto key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + auto payload_buffer = + detail::allocate_optional_payload_buffer(size_t{0}, handle.get_stream()); + rmm::device_scalar buffer_idx(size_t{0}, handle.get_stream()); + std::vector local_frontier_sizes{}; + if (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + local_frontier_sizes = host_scalar_allgather( + col_comm, + static_cast(thrust::distance(frontier_key_first, frontier_key_last)), + handle.get_stream()); + } else { + local_frontier_sizes = std::vector{static_cast( + static_cast(thrust::distance(frontier_key_first, frontier_key_last)))}; + } + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + + auto matrix_partition_frontier_key_buffer = + allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + vertex_t matrix_partition_frontier_size = static_cast(local_frontier_sizes[i]); + if (GraphViewType::is_multi_gpu) { + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + + resize_dataframe_buffer( + matrix_partition_frontier_key_buffer, matrix_partition_frontier_size, handle.get_stream()); + + if (static_cast(col_comm_rank) == i) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_key_first, + frontier_key_last, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer)); + } + + device_bcast(col_comm, + frontier_key_first, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), + matrix_partition_frontier_size, + i, + handle.get_stream()); + } else { + resize_dataframe_buffer( + matrix_partition_frontier_key_buffer, matrix_partition_frontier_size, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_key_first, + frontier_key_last, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer)); + } + + vertex_t const* matrix_partition_frontier_row_first{nullptr}; + vertex_t const* matrix_partition_frontier_row_last{nullptr}; + if constexpr (std::is_same_v) { + matrix_partition_frontier_row_first = + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer); + matrix_partition_frontier_row_last = + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer); + } else { + matrix_partition_frontier_row_first = + thrust::get<0>(get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + .get_iterator_tuple()); + matrix_partition_frontier_row_last = thrust::get<0>( + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer).get_iterator_tuple()); + } + auto max_pushes = + thrust::distance(matrix_partition_frontier_row_first, matrix_partition_frontier_row_last) > 0 + ? thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + matrix_partition_frontier_row_first, + matrix_partition_frontier_row_last, + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()) + : edge_t{0}; + + // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain + // additional per column data for filtering in e_op). If we can pause & resume execution if + // buffer needs to be increased (and if we reserve address space to avoid expensive + // reallocation; + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management/), we can + // start with a smaller buffer size (especially when the frontier size is large). + // for special cases when we can assure that there is no more than one push per destination + // (e.g. if cugraph::experimental::reduce_op::any is used), we can limit the buffer size to + // std::min(max_pushes, matrix_partition.get_minor_size()). + // For Volta+, we can limit the buffer size to std::min(max_pushes, + // matrix_partition.get_minor_size()) if the reduction operation is a pure function if we use + // locking. + // FIXME: if i != 0, this will require costly reallocation if we don't use the new CUDA feature + // to reserve address space. + auto new_buffer_size = buffer_idx.value(handle.get_stream()) + max_pushes; + resize_dataframe_buffer(key_buffer, new_buffer_size, handle.get_stream()); + if constexpr (!std::is_same_v) { + resize_dataframe_buffer(payload_buffer, new_buffer_size, handle.get_stream()); + } + + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); + if (segment_offsets.size() > 0) { + static_assert(detail::num_segments_per_vertex_partition == 3); + std::vector h_thresholds(detail::num_segments_per_vertex_partition - 1); + h_thresholds[0] = matrix_partition.get_major_first() + segment_offsets[1]; + h_thresholds[1] = matrix_partition.get_major_first() + segment_offsets[2]; + rmm::device_uvector d_thresholds(h_thresholds.size(), handle.get_stream()); + raft::update_device( + d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); + rmm::device_uvector d_offsets(d_thresholds.size(), handle.get_stream()); + thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + matrix_partition_frontier_row_first, + matrix_partition_frontier_row_last, + d_thresholds.begin(), + d_thresholds.end(), + d_offsets.begin()); + std::vector h_offsets(d_offsets.size()); + raft::update_host(h_offsets.data(), d_offsets.data(), d_offsets.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + // FIXME: we may further improve performance by 1) concurrently running kernels on different + // segments; 2) individually tuning block sizes for different segments; and 3) adding one more + // segment for very high degree vertices and running segmented reduction + if (h_offsets[0] > 0) { + raft::grid_1d_block_t update_grid( + h_offsets[0], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_high_degree<<>>( + matrix_partition, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[0], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + if (h_offsets[1] - h_offsets[0] > 0) { + raft::grid_1d_warp_t update_grid( + h_offsets[1] - h_offsets[0], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_mid_degree<<>>( + matrix_partition, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[0], + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + if (matrix_partition_frontier_size - h_offsets[1] > 0) { + raft::grid_1d_thread_t update_grid( + matrix_partition_frontier_size - h_offsets[1], + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( + matrix_partition, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer) + h_offsets[1], + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + } else { + if (matrix_partition_frontier_size > 0) { + raft::grid_1d_thread_t update_grid( + matrix_partition_frontier_size, + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( + matrix_partition, + get_dataframe_buffer_begin(matrix_partition_frontier_key_buffer), + get_dataframe_buffer_end(matrix_partition_frontier_key_buffer), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.data(), + e_op); + } + } + } + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of col_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + + // 2. reduce the buffer + + auto num_buffer_elements = detail::sort_and_reduce_buffer_elements( + handle, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + buffer_idx.value(handle.get_stream()), + reduce_op); + if (GraphViewType::is_multi_gpu) { + // FIXME: this step is unnecessary if row_comm_size== 1 + auto& comm = handle.get_comms(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (beginning of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + + std::vector h_vertex_lasts(row_comm_size); + for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { + h_vertex_lasts[i] = graph_view.get_vertex_partition_last(col_comm_rank * row_comm_size + i); + } + + rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); + raft::update_device( + d_vertex_lasts.data(), h_vertex_lasts.data(), h_vertex_lasts.size(), handle.get_stream()); + rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), + handle.get_stream()); + vertex_t const* row_first{nullptr}; + if constexpr (std::is_same_v) { + row_first = get_dataframe_buffer_begin(key_buffer); + } else { + row_first = + thrust::get<0>(get_dataframe_buffer_begin(key_buffer).get_iterator_tuple()); + } + thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + row_first, + row_first + num_buffer_elements, + d_vertex_lasts.begin(), + d_vertex_lasts.end(), + d_tx_buffer_last_boundaries.begin()); + std::vector h_tx_buffer_last_boundaries(d_tx_buffer_last_boundaries.size()); + raft::update_host(h_tx_buffer_last_boundaries.data(), + d_tx_buffer_last_boundaries.data(), + d_tx_buffer_last_boundaries.size(), + handle.get_stream()); + handle.get_stream_view().synchronize(); + std::vector tx_counts(h_tx_buffer_last_boundaries.size()); + std::adjacent_difference( + h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); + + auto rx_key_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_key_buffer, std::ignore) = shuffle_values( + row_comm, get_dataframe_buffer_begin(key_buffer), tx_counts, handle.get_stream()); + key_buffer = std::move(rx_key_buffer); + + if constexpr (!std::is_same_v) { + auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); + std::tie(rx_payload_buffer, std::ignore) = + shuffle_values(row_comm, + get_dataframe_buffer_begin(payload_buffer), + tx_counts, + handle.get_stream()); + payload_buffer = std::move(rx_payload_buffer); + } + + num_buffer_elements = detail::sort_and_reduce_buffer_elements( + handle, + get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer), + size_dataframe_buffer(key_buffer), + reduce_op); + + // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between + // two different communicators (end of row_comm) +#if 1 + // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK + // and MPI barrier with MPI) + host_barrier(comm, handle.get_stream_view()); +#else + handle.get_stream_view().synchronize(); + comm.barrier(); // currently, this is ncclAllReduce +#endif + } + + // 3. update vertex properties and frontier + + if (num_buffer_elements > 0) { + static_assert(VertexFrontierType::kNumBuckets <= std::numeric_limits::max()); + rmm::device_uvector bucket_indices(num_buffer_elements, handle.get_stream()); + + vertex_partition_device_t vertex_partition(graph_view); + + if constexpr (!std::is_same_v) { + auto key_payload_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(get_dataframe_buffer_begin(key_buffer), + detail::get_optional_payload_buffer_begin(payload_buffer))); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + key_payload_pair_first, + key_payload_pair_first + num_buffer_elements, + bucket_indices.begin(), + [vertex_value_input_first, + vertex_value_output_first, + v_op, + vertex_partition, + invalid_bucket_idx = VertexFrontierType::kInvalidBucketIdx] __device__(auto pair) { + auto key = thrust::get<0>(pair); + auto payload = thrust::get<1>(pair); + vertex_t v_offset{}; + if constexpr (std::is_same_v) { + v_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); + } else { + v_offset = + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(thrust::get<0>(key)); + } + auto v_val = *(vertex_value_input_first + v_offset); + auto v_op_result = v_op(key, v_val, payload); + if (v_op_result) { + *(vertex_value_output_first + v_offset) = thrust::get<1>(*v_op_result); + return static_cast(thrust::get<0>(*v_op_result)); + } else { + return std::numeric_limits::max(); + } + }); + + resize_dataframe_buffer(payload_buffer, size_t{0}, handle.get_stream()); + shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); + } else { + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(key_buffer), + get_dataframe_buffer_begin(key_buffer) + num_buffer_elements, + bucket_indices.begin(), + detail::call_v_op_t{vertex_value_input_first, + vertex_value_output_first, + v_op, + vertex_partition, + VertexFrontierType::kInvalidBucketIdx}); + } + + auto bucket_key_pair_first = thrust::make_zip_iterator( + thrust::make_tuple(bucket_indices.begin(), get_dataframe_buffer_begin(key_buffer))); + bucket_indices.resize( + thrust::distance( + bucket_key_pair_first, + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + bucket_key_pair_first, + bucket_key_pair_first + num_buffer_elements, + detail::check_invalid_bucket_idx_t())), + handle.get_stream()); + resize_dataframe_buffer(key_buffer, bucket_indices.size(), handle.get_stream()); + bucket_indices.shrink_to_fit(handle.get_stream()); + shrink_to_fit_dataframe_buffer(key_buffer, handle.get_stream()); + + frontier.insert_to_buckets(bucket_indices.begin(), + bucket_indices.end(), + get_dataframe_buffer_begin(key_buffer), + next_frontier_bucket_indices); + } +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/cugraph/patterns/vertex_frontier.cuh b/cpp/include/cugraph/patterns/vertex_frontier.cuh new file mode 100644 index 00000000000..bfe23882088 --- /dev/null +++ b/cpp/include/cugraph/patterns/vertex_frontier.cuh @@ -0,0 +1,468 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +// stores unique key objects in the sorted (non-descending) order; key type is either vertex_t +// (tag_t == void) or thrust::tuple (tag_t != void) +template +class SortedUniqueKeyBucket { + static_assert(std::is_same_v || std::is_arithmetic_v); + + using optional_buffer_type = std:: + conditional_t, std::byte /* dummy */, rmm::device_uvector>; + + public: + template >* = nullptr> + SortedUniqueKeyBucket(raft::handle_t const& handle) + : handle_ptr_(&handle), vertices_(0, handle.get_stream()), tags_(std::byte{0}) + { + } + + template >* = nullptr> + SortedUniqueKeyBucket(raft::handle_t const& handle) + : handle_ptr_(&handle), vertices_(0, handle.get_stream()), tags_(0, handle.get_stream()) + { + } + + /** + * @ brief insert a vertex to the bucket + * + * @param vertex vertex to insert + */ + template >* = nullptr> + void insert(vertex_t vertex) + { + if (vertices_.size() > 0) { + rmm::device_scalar tmp(vertex, handle_ptr_->get_stream()); + insert(tmp.data(), tmp.data() + 1); + } else { + vertices_.resize(1, handle_ptr_->get_stream()); + raft::update_device(vertices_.data(), &vertex, size_t{1}, handle_ptr_->get_stream()); + } + } + + /** + * @ brief insert a (vertex, tag) pair to the bucket + * + * @param vertex vertex of the (vertex, tag) pair to insert + * @param tag tag of the (vertex, tag) pair to insert + */ + template >* = nullptr> + void insert(thrust::tuple key) + { + if (vertices_.size() > 0) { + rmm::device_scalar tmp_vertex(thrust::get<0>(key), handle_ptr_->get_stream()); + rmm::device_scalar tmp_tag(thrust::get<1>(key), handle_ptr_->get_stream()); + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_vertex.data(), tmp_tag.data())); + insert(pair_first, pair_first + 1); + } else { + vertices_.resize(1, handle_ptr_->get_stream()); + tags_.resize(1, handle_ptr_->get_stream()); + auto pair_first = + thrust::make_tuple(thrust::make_zip_iterator(vertices_.begin(), tags_.begin())); + thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + 1, + key); + } + } + + /** + * @ brief insert a list of vertices to the bucket + * + * @param vertex_first Iterator pointing to the first (inclusive) element of the vertices stored + * in device memory. + * @param vertex_last Iterator pointing to the last (exclusive) element of the vertices stored in + * device memory. + */ + template >* = nullptr> + void insert(VertexIterator vertex_first, VertexIterator vertex_last) + { + static_assert( + std::is_same_v::value_type, vertex_t>); + + if (vertices_.size() > 0) { + rmm::device_uvector merged_vertices( + vertices_.size() + thrust::distance(vertex_first, vertex_last), handle_ptr_->get_stream()); + thrust::merge(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + vertices_.begin(), + vertices_.end(), + vertex_first, + vertex_last, + merged_vertices.begin()); + merged_vertices.resize( + thrust::distance( + merged_vertices.begin(), + thrust::unique(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + merged_vertices.begin(), + merged_vertices.end())), + handle_ptr_->get_stream()); + merged_vertices.shrink_to_fit(handle_ptr_->get_stream()); + vertices_ = std::move(merged_vertices); + } else { + vertices_.resize(thrust::distance(vertex_first, vertex_last), handle_ptr_->get_stream()); + thrust::copy(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + vertex_first, + vertex_last, + vertices_.begin()); + } + } + + /** + * @ brief insert a list of (vertex, tag) pairs to the bucket + * + * @param key_first Iterator pointing to the first (inclusive) element of the (vertex,tag) pairs + * stored in device memory. + * @param key_last Iterator pointing to the last (exclusive) element of the (vertex,tag) pairs + * stored in device memory. + */ + template >* = nullptr> + void insert(KeyIterator key_first, KeyIterator key_last) + { + static_assert(std::is_same_v::value_type, + thrust::tuple>); + + if (vertices_.size() > 0) { + rmm::device_uvector merged_vertices( + vertices_.size() + thrust::distance(key_first, key_last), handle_ptr_->get_stream()); + rmm::device_uvector merged_tags(merged_vertices.size(), handle_ptr_->get_stream()); + auto old_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + auto merged_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(merged_vertices.begin(), merged_tags.begin())); + thrust::merge(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + old_pair_first, + old_pair_first + vertices_.size(), + key_first, + key_last, + merged_pair_first); + merged_vertices.resize( + thrust::distance( + merged_pair_first, + thrust::unique(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + merged_pair_first, + merged_pair_first + merged_vertices.size())), + handle_ptr_->get_stream()); + merged_tags.resize(merged_vertices.size(), handle_ptr_->get_stream()); + merged_vertices.shrink_to_fit(handle_ptr_->get_stream()); + merged_tags.shrink_to_fit(handle_ptr_->get_stream()); + vertices_ = std::move(merged_vertices); + tags_ = std::move(merged_tags); + } else { + vertices_.resize(thrust::distance(key_first, key_last), handle_ptr_->get_stream()); + tags_.resize(thrust::distance(key_first, key_last), handle_ptr_->get_stream()); + thrust::copy(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + key_first, + key_last, + thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin()))); + } + } + + size_t size() const { return vertices_.size(); } + + template + std::enable_if_t aggregate_size() const + { + return host_scalar_allreduce( + handle_ptr_->get_comms(), vertices_.size(), handle_ptr_->get_stream()); + } + + template + std::enable_if_t aggregate_size() const + { + return vertices_.size(); + } + + void resize(size_t size) + { + vertices_.resize(size, handle_ptr_->get_stream()); + if constexpr (!std::is_same_v) { tags_.resize(size, handle_ptr_->get_stream()); } + } + + void clear() { resize(0); } + + void shrink_to_fit() + { + vertices_.shrink_to_fit(handle_ptr_->get_stream()); + if constexpr (!std::is_same_v) { tags_.shrink_to_fit(handle_ptr_->get_stream()); } + } + +// FIXME: to silence the spurious warning (missing return statement ...) due to the nvcc bug +// (https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void- +// function-in-constexpr-if-fun) +#if 1 + template >* = nullptr> + auto const begin() const + { + return vertices_.begin(); + } + + template >* = nullptr> + auto begin() + { + return vertices_.begin(); + } + + template >* = nullptr> + auto const begin() const + { + return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + } + + template >* = nullptr> + auto begin() + { + return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + } +#else + auto const begin() const + { + if constexpr (std::is_same_v) { + return vertices_.begin(); + } else { + return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + } + } + + auto begin() + { + if constexpr (std::is_same_v) { + return vertices_.begin(); + } else { + return thrust::make_zip_iterator(thrust::make_tuple(vertices_.begin(), tags_.begin())); + } + } +#endif + + auto const end() const { return begin() + vertices_.size(); } + + auto end() { return begin() + vertices_.size(); } + + private: + raft::handle_t const* handle_ptr_{nullptr}; + rmm::device_uvector vertices_; + optional_buffer_type tags_; +}; + +template +class VertexFrontier { + static_assert(std::is_same_v || std::is_arithmetic_v); + + public: + using key_type = + std::conditional_t, vertex_t, thrust::tuple>; + static size_t constexpr kNumBuckets = num_buckets; + static size_t constexpr kInvalidBucketIdx{std::numeric_limits::max()}; + + VertexFrontier(raft::handle_t const& handle) : handle_ptr_(&handle) + { + for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle); } + } + + SortedUniqueKeyBucket& get_bucket(size_t bucket_idx) + { + return buckets_[bucket_idx]; + } + + SortedUniqueKeyBucket const& get_bucket(size_t bucket_idx) const + { + return buckets_[bucket_idx]; + } + + void swap_buckets(size_t bucket_idx0, size_t bucket_idx1) + { + std::swap(buckets_[bucket_idx0], buckets_[bucket_idx1]); + } + + template + void split_bucket(size_t this_bucket_idx, + std::vector const& move_to_bucket_indices, + SplitOp split_op) + { + auto& this_bucket = get_bucket(this_bucket_idx); + if (this_bucket.size() == 0) { return; } + + // 1. apply split_op to each bucket element + + static_assert(kNumBuckets <= std::numeric_limits::max()); + rmm::device_uvector bucket_indices(this_bucket.size(), handle_ptr_->get_stream()); + thrust::transform( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + this_bucket.begin(), + this_bucket.end(), + bucket_indices.begin(), + [split_op] __device__(auto key) { + auto split_op_result = split_op(key); + return static_cast(split_op_result ? *split_op_result : kInvalidBucketIdx); + }); + + // 2. remove elements with the invalid bucket indices + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); + bucket_indices.resize( + thrust::distance(pair_first, + thrust::remove_if( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + bucket_indices.size(), + [] __device__(auto pair) { + return thrust::get<0>(pair) == static_cast(kInvalidBucketIdx); + })), + handle_ptr_->get_stream()); + this_bucket.resize(bucket_indices.size()); + bucket_indices.shrink_to_fit(handle_ptr_->get_stream()); + this_bucket.shrink_to_fit(); + + // 3. separte the elements to stay in this bucket from the elements to be moved to other buckets + + pair_first = + thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); + auto new_this_bucket_size = static_cast(thrust::distance( + pair_first, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_first + bucket_indices.size(), + [this_bucket_idx = static_cast(this_bucket_idx)] __device__(auto pair) { + return thrust::get<0>(pair) == this_bucket_idx; + }))); + + // 4. insert to target buckets and resize this bucket + + insert_to_buckets(bucket_indices.begin() + new_this_bucket_size, + bucket_indices.end(), + this_bucket.begin() + new_this_bucket_size, + move_to_bucket_indices); + + this_bucket.resize(new_this_bucket_size); + this_bucket.shrink_to_fit(); + } + + template + void insert_to_buckets(uint8_t* bucket_idx_first /* [INOUT] */, + uint8_t* bucket_idx_last /* [INOUT] */, + KeyIterator key_first /* [INOUT] */, + std::vector const& to_bucket_indices) + { + // 1. group the elements by their target bucket indices + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(bucket_idx_first, key_first)); + auto pair_last = pair_first + thrust::distance(bucket_idx_first, bucket_idx_last); + + std::vector insert_bucket_indices{}; + std::vector insert_offsets{}; + std::vector insert_sizes{}; + if (to_bucket_indices.size() == 1) { + insert_bucket_indices = to_bucket_indices; + insert_offsets = {0}; + insert_sizes = {static_cast(thrust::distance(pair_first, pair_last))}; + } else if (to_bucket_indices.size() == 2) { + auto next_bucket_size = static_cast(thrust::distance( + pair_first, + thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_last, + [next_bucket_idx = static_cast(to_bucket_indices[0])] __device__(auto pair) { + return thrust::get<0>(pair) == next_bucket_idx; + }))); + insert_bucket_indices = to_bucket_indices; + insert_offsets = {0, next_bucket_size}; + insert_sizes = { + next_bucket_size, + static_cast(thrust::distance(pair_first + next_bucket_size, pair_last))}; + } else { + thrust::stable_sort( // stalbe_sort to maintain sorted order within each bucket + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + pair_first, + pair_last, + [] __device__(auto lhs, auto rhs) { return thrust::get<0>(lhs) < thrust::get<0>(rhs); }); + rmm::device_uvector d_indices(to_bucket_indices.size(), handle_ptr_->get_stream()); + rmm::device_uvector d_counts(d_indices.size(), handle_ptr_->get_stream()); + auto it = thrust::reduce_by_key( + rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + bucket_idx_first, + bucket_idx_last, + thrust::make_constant_iterator(size_t{1}), + d_indices.begin(), + d_counts.begin()); + d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), + handle_ptr_->get_stream()); + d_counts.resize(d_indices.size(), handle_ptr_->get_stream()); + std::vector h_indices(d_indices.size()); + std::vector h_counts(h_indices.size()); + raft::update_host( + h_indices.data(), d_indices.data(), d_indices.size(), handle_ptr_->get_stream()); + raft::update_host( + h_counts.data(), d_counts.data(), d_counts.size(), handle_ptr_->get_stream()); + handle_ptr_->get_stream_view().synchronize(); + + size_t offset{0}; + for (size_t i = 0; i < h_indices.size(); ++i) { + insert_bucket_indices[i] = static_cast(h_indices[i]); + insert_offsets[i] = offset; + insert_sizes[i] = h_counts[i]; + offset += insert_sizes[i]; + } + } + + // 2. insert to the target buckets + + for (size_t i = 0; i < insert_offsets.size(); ++i) { + get_bucket(insert_bucket_indices[i]) + .insert(key_first + insert_offsets[i], key_first + (insert_offsets[i] + insert_sizes[i])); + } + } + + private: + raft::handle_t const* handle_ptr_{nullptr}; + std::vector> buckets_{}; +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/cugraph/serialization/serializer.hpp b/cpp/include/cugraph/serialization/serializer.hpp new file mode 100644 index 00000000000..666ee81e98f --- /dev/null +++ b/cpp/include/cugraph/serialization/serializer.hpp @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include + +#include + +#include + +#include +#include + +namespace cugraph { +namespace serializer { + +using namespace cugraph::experimental; + +class serializer_t { + public: + using byte_t = uint8_t; + + using device_byte_it = typename rmm::device_uvector::iterator; + using device_byte_cit = typename rmm::device_uvector::const_iterator; + + // cnstr. for serialize() path: + // + serializer_t(raft::handle_t const& handle, size_t total_sz_bytes) + : handle_(handle), + d_storage_(total_sz_bytes, handle.get_stream()), + begin_(d_storage_.begin()), + cbegin_(d_storage_.begin()) + { + } + + // cnstr. for unserialize() path: + // + serializer_t(raft::handle_t const& handle, byte_t const* ptr_d_storage) + : handle_(handle), d_storage_(0, handle.get_stream()), cbegin_(ptr_d_storage) + { + } + + template + struct graph_meta_t; + + template + struct graph_meta_t> { + // purposely empty, for now; + // FIXME: provide implementation for multi-gpu version + }; + + template + struct graph_meta_t> { + using vertex_t = typename graph_t::vertex_type; + using bool_ser_t = uint8_t; + + graph_meta_t(void) {} + + explicit graph_meta_t(graph_t const& graph) + : num_vertices_(graph.get_number_of_vertices()), + num_edges_(graph.get_number_of_edges()), + properties_(graph.get_graph_properties()), + segment_offsets_(graph.view().get_local_adj_matrix_partition_segment_offsets(0)) + { + } + + graph_meta_t(size_t num_vertices, + size_t num_edges, + graph_properties_t const& properties, + std::vector const& segment_offsets) + : num_vertices_(num_vertices), + num_edges_(num_edges), + properties_(properties), + segment_offsets_(segment_offsets) + { + } + + size_t num_vertices_; + size_t num_edges_; + graph_properties_t properties_{}; + std::vector segment_offsets_{}; + + size_t get_device_sz_bytes(void) const + { + return 2 * sizeof(size_t) + segment_offsets_.size() * sizeof(vertex_t) + + 3 * sizeof(bool_ser_t); + } + }; + + // POD-type serialization: + // + template + void serialize(value_t val); + + // POD-type unserialization: + // + template + value_t unserialize(void); + + // device array serialization: + // + template + void serialize(value_t const* p_d_src, size_t size); + + // device vector unserialization; + // extracts device_uvector of `size` bytes_to_value_t elements: + // + template + rmm::device_uvector unserialize( + size_t size); // size of device vector to be unserialized + + // graph serialization, + // with device storage and host metadata: + // (associated with target; e.g., num_vertices, etc.) + // + template + void serialize(graph_t const& graph, graph_meta_t& gmeta); // serialization target + + // graph unserialization, + // with device storage and host metadata: + // (associated with target; e.g., num_vertices, etc.) + // + template + graph_t unserialize(size_t device_sz_bytes, size_t host_sz_bytes); + + template + static std::pair get_device_graph_sz_bytes( + graph_meta_t const& graph_meta) + { + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + size_t num_vertices = graph_meta.num_vertices_; + size_t num_edges = graph_meta.num_edges_; + + size_t weight_storage_sz = + graph_meta.properties_.is_weighted ? num_edges * sizeof(weight_t) : 0; + + size_t device_ser_sz = + (num_vertices + 1) * sizeof(edge_t) + num_edges * sizeof(vertex_t) + weight_storage_sz; + + size_t host_ser_sz = graph_meta.get_device_sz_bytes(); + + return std::make_pair( + device_ser_sz, + host_ser_sz); // FIXME: remove when host_bcast() becomes available for host vectors + + } else { + CUGRAPH_FAIL("Unsupported graph type for un/serialization."); + + return std::pair{}; + } + } + + template + static std::pair get_device_graph_sz_bytes(graph_t const& graph) + { + graph_meta_t gmeta{graph}; + return get_device_graph_sz_bytes(gmeta); + } + + byte_t const* get_storage(void) const { return d_storage_.begin(); } + byte_t* get_storage(void) { return d_storage_.begin(); } + + private: + // serialization of graph metadata, via device orchestration: + // + template + void serialize(graph_meta_t const& graph_meta); + + // unserialization of graph metadata, via device orchestration: + // + template + graph_meta_t unserialize( + size_t graph_meta_sz_bytes, + graph_meta_t const& empty_meta); // tag dispatching to avoid conflict with + // `unserialize(size_t)` for device vectors + + raft::handle_t const& handle_; + rmm::device_uvector d_storage_; + device_byte_it begin_{nullptr}; // advances on serialize() + device_byte_cit cbegin_{nullptr}; // advances on unserialize() +}; + +} // namespace serializer +} // namespace cugraph diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/cugraph/utilities/collect_comm.cuh similarity index 83% rename from cpp/include/utilities/collect_comm.cuh rename to cpp/include/cugraph/utilities/collect_comm.cuh index f5a904ad875..ddc5621e929 100644 --- a/cpp/include/utilities/collect_comm.cuh +++ b/cpp/include/cugraph/utilities/collect_comm.cuh @@ -15,14 +15,16 @@ */ #pragma once -#include -#include -#include -#include +#include +#include +#include +#include #include #include #include +#include +#include #include @@ -63,19 +65,19 @@ collect_values_for_keys(raft::comms::comms_t const &comm, // 1. build a cuco::static_map object for the map k, v pairs. - auto kv_map_ptr = std::make_unique>( + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); { - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)); kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } @@ -124,20 +126,17 @@ collect_values_for_keys(raft::comms::comms_t const &comm, kv_map_ptr.reset(); - kv_map_ptr = std::make_unique>( + kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_keys.size()) / load_factor), unique_keys.size() + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); { - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())); kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } @@ -181,19 +180,19 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, // 1. build a cuco::static_map object for the map k, v pairs. - auto kv_map_ptr = std::make_unique>( + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(thrust::distance(map_key_first, map_key_last)) / load_factor), static_cast(thrust::distance(map_key_first, map_key_last)) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); { - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)); kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); } @@ -238,20 +237,17 @@ collect_values_for_unique_keys(raft::comms::comms_t const &comm, kv_map_ptr.reset(); - kv_map_ptr = std::make_unique>( + kv_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast(static_cast(unique_keys.size()) / load_factor), unique_keys.size() + 1), invalid_vertex_id::value, - invalid_vertex_id::value); + invalid_vertex_id::value, + stream_adapter); { - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())); kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); } diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/cugraph/utilities/cython.hpp similarity index 89% rename from cpp/include/utilities/cython.hpp rename to cpp/include/cugraph/utilities/cython.hpp index 0d6cb2f63d0..273e55bae25 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/cugraph/utilities/cython.hpp @@ -15,11 +15,14 @@ */ #pragma once -#include -#include +#include +#include +#include +#include + #include + #include -#include namespace cugraph { namespace cython { @@ -207,6 +210,18 @@ struct random_walk_ret_t { std::unique_ptr d_sizes_; }; +struct random_walk_path_t { + std::unique_ptr d_v_offsets; + std::unique_ptr d_w_sizes; + std::unique_ptr d_w_offsets; +}; + +struct graph_generator_t { + std::unique_ptr d_source; + std::unique_ptr d_destination; +}; + +// enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; // aggregate for random_walks() COO return type // to be exposed to cython: // @@ -405,6 +420,7 @@ void populate_graph_container(graph_container_t& graph_container, size_t num_global_edges, bool sorted_by_degree, bool is_weighted, + bool is_symmetric, bool transposed, bool multi_gpu); @@ -488,6 +504,37 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, vertex_t* source_vertex, vertex_t n_subgraphs, vertex_t radius); + +// Wrapper for calling WCC through a graph container +template +void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* components); + +// Wrapper for calling graph generator +template +std::unique_ptr call_generate_rmat_edgelist(raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); +template +std::vector, std::unique_ptr>> +call_generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::generator_distribution_t size_distribution, + cugraph::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + // wrapper for random_walks. // template @@ -497,7 +544,13 @@ call_random_walks(raft::handle_t const& handle, graph_container_t const& graph_container, vertex_t const* ptr_start_set, edge_t num_paths, - edge_t max_depth); + edge_t max_depth, + bool use_padding); + +template +std::unique_ptr call_rw_paths(raft::handle_t const& handle, + index_t num_paths, + index_t const* vertex_path_sizes); // convertor from random_walks return type to COO: // diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/cugraph/utilities/dataframe_buffer.cuh similarity index 78% rename from cpp/include/utilities/dataframe_buffer.cuh rename to cpp/include/cugraph/utilities/dataframe_buffer.cuh index b0e9c1ebfec..beaf4cabe00 100644 --- a/cpp/include/utilities/dataframe_buffer.cuh +++ b/cpp/include/cugraph/utilities/dataframe_buffer.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -91,6 +91,20 @@ auto get_dataframe_buffer_begin_tuple_impl(std::index_sequence, BufferTyp get_dataframe_buffer_begin_tuple_element_impl(buffer)...); } +template +auto get_dataframe_buffer_end_tuple_element_impl(BufferType& buffer) +{ + using element_t = typename thrust::tuple_element::type; + return std::get(buffer).end(); +} + +template +auto get_dataframe_buffer_end_tuple_impl(std::index_sequence, BufferType& buffer) +{ + // thrust::make_tuple instead of std::make_tuple as this is fed to thrust::make_zip_iterator. + return thrust::make_tuple(get_dataframe_buffer_end_tuple_element_impl(buffer)...); +} + } // namespace detail template ::value>* = nullptr> @@ -147,6 +161,22 @@ void shrink_to_fit_dataframe_buffer(BufferType& buffer, cudaStream_t stream) .run(buffer, stream); } +template ::value>* = nullptr> +size_t size_dataframe_buffer(BufferType& buffer) +{ + return buffer.size(); +} + +template ::value>* = nullptr> +size_t size_dataframe_buffer(BufferType& buffer) +{ + return std::get<0>(buffer).size(); +} + template ::value>* = nullptr> @@ -165,5 +195,23 @@ auto get_dataframe_buffer_begin(BufferType& buffer) std::make_index_sequence(), buffer)); } +template ::value>* = nullptr> +auto get_dataframe_buffer_end(BufferType& buffer) +{ + return buffer.end(); +} + +template ::value>* = nullptr> +auto get_dataframe_buffer_end(BufferType& buffer) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return thrust::make_zip_iterator( + detail::get_dataframe_buffer_end_tuple_impl(std::make_index_sequence(), buffer)); +} + } // namespace experimental } // namespace cugraph diff --git a/cpp/include/utilities/device_comm.cuh b/cpp/include/cugraph/utilities/device_comm.cuh similarity index 99% rename from cpp/include/utilities/device_comm.cuh rename to cpp/include/cugraph/utilities/device_comm.cuh index daf8524e25b..b13aa8e5401 100644 --- a/cpp/include/utilities/device_comm.cuh +++ b/cpp/include/cugraph/utilities/device_comm.cuh @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -398,7 +398,7 @@ struct device_bcast_tuple_iterator_element_impl { count, root, stream); - device_bcast_tuple_iterator_element_impl( + device_bcast_tuple_iterator_element_impl().run( comm, input_first, output_first, count, root, stream); } }; @@ -458,7 +458,7 @@ struct device_allreduce_tuple_iterator_element_impl { count, op, stream); - device_allreduce_tuple_iterator_element_impl( + device_allreduce_tuple_iterator_element_impl().run( comm, input_first, output_first, count, op, stream); } }; @@ -912,8 +912,8 @@ device_bcast(raft::comms::comms_t const& comm, thrust::tuple_size::value_type>::value; detail:: - device_bcast_tuple_iterator_element_impl( - comm, input_first, output_first, count, root, stream); + device_bcast_tuple_iterator_element_impl() + .run(comm, input_first, output_first, count, root, stream); } template @@ -952,8 +952,8 @@ device_allreduce(raft::comms::comms_t const& comm, detail::device_allreduce_tuple_iterator_element_impl( - comm, input_first, output_first, count, op, stream); + tuple_size>() + .run(comm, input_first, output_first, count, op, stream); } template diff --git a/cpp/include/utilities/error.hpp b/cpp/include/cugraph/utilities/error.hpp similarity index 98% rename from cpp/include/utilities/error.hpp rename to cpp/include/cugraph/utilities/error.hpp index e44e2c910ea..8cfb077cf7b 100644 --- a/cpp/include/utilities/error.hpp +++ b/cpp/include/cugraph/utilities/error.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/utilities/graph_traits.hpp b/cpp/include/cugraph/utilities/graph_traits.hpp similarity index 100% rename from cpp/include/utilities/graph_traits.hpp rename to cpp/include/cugraph/utilities/graph_traits.hpp diff --git a/cpp/include/utilities/host_barrier.hpp b/cpp/include/cugraph/utilities/host_barrier.hpp similarity index 100% rename from cpp/include/utilities/host_barrier.hpp rename to cpp/include/cugraph/utilities/host_barrier.hpp diff --git a/cpp/include/utilities/host_scalar_comm.cuh b/cpp/include/cugraph/utilities/host_scalar_comm.cuh similarity index 99% rename from cpp/include/utilities/host_scalar_comm.cuh rename to cpp/include/cugraph/utilities/host_scalar_comm.cuh index 2ecfd913813..85994ed22bf 100644 --- a/cpp/include/utilities/host_scalar_comm.cuh +++ b/cpp/include/cugraph/utilities/host_scalar_comm.cuh @@ -15,7 +15,8 @@ */ #pragma once -#include +#include +#include #include #include diff --git a/cpp/include/utilities/path_retrieval.hpp b/cpp/include/cugraph/utilities/path_retrieval.hpp similarity index 87% rename from cpp/include/utilities/path_retrieval.hpp rename to cpp/include/cugraph/utilities/path_retrieval.hpp index 4d1b6a1b4d2..3b2408d9037 100644 --- a/cpp/include/utilities/path_retrieval.hpp +++ b/cpp/include/cugraph/utilities/path_retrieval.hpp @@ -84,4 +84,19 @@ template std::tuple, rmm::device_uvector, rmm::device_uvector> query_rw_sizes_offsets(raft::handle_t const &handle, index_t num_paths, index_t const *ptr_d_sizes); } // namespace experimental + +namespace broadcast { +/** + * @brief broadcasts graph_t object (only the single GPU version). + * + * @tparam graph_t Type of graph (view). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_ptr pointer to graph object: not `nullptr` on send, `nullptr` (ignored) on receive. + * @return graph_t object that was sent/received + */ +template +graph_t graph_broadcast(raft::handle_t const &handle, graph_t *graph_ptr); +}; // namespace broadcast + } // namespace cugraph diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh similarity index 97% rename from cpp/include/utilities/shuffle_comm.cuh rename to cpp/include/cugraph/utilities/shuffle_comm.cuh index b42b9ad06bb..009dde845b5 100644 --- a/cpp/include/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include #include @@ -73,10 +73,6 @@ compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, rx_offsets, rx_src_ranks, stream); - // FIXME: temporary unverified work-around for a NCCL (2.9.6) bug that causes a hang on DGX1 (due - // to remote memory allocation), this synchronization is unnecessary otherwise but seems like - // suppress the hange issue. Need to be revisited once NCCL 2.10 is released. - CUDA_TRY(cudaDeviceSynchronize()); raft::update_host(tx_counts.data(), d_tx_value_counts.data(), comm_size, stream); raft::update_host(rx_counts.data(), d_rx_value_counts.data(), comm_size, stream); diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/cugraph/utilities/thrust_tuple_utils.cuh similarity index 100% rename from cpp/include/utilities/thrust_tuple_utils.cuh rename to cpp/include/cugraph/utilities/thrust_tuple_utils.cuh diff --git a/cpp/include/vertex_partition_device.cuh b/cpp/include/cugraph/vertex_partition_device.cuh similarity index 96% rename from cpp/include/vertex_partition_device.cuh rename to cpp/include/cugraph/vertex_partition_device.cuh index a6a78ad3878..b57efd115eb 100644 --- a/cpp/include/vertex_partition_device.cuh +++ b/cpp/include/cugraph/vertex_partition_device.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include diff --git a/cpp/include/experimental/graph_generator.hpp b/cpp/include/experimental/graph_generator.hpp deleted file mode 100644 index bc7337944f3..00000000000 --- a/cpp/include/experimental/graph_generator.hpp +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include -#include - -namespace cugraph { -namespace experimental { - -/** - * @brief generate an edge list for an R-mat graph. - * - * This function allows multi-edges and self-loops similar to the Graph 500 reference - * implementation. - * - * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 - * specification (note that scrambling does not affect cuGraph's graph construction performance, so - * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to - * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p - * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part - * (inculding the diagonal) of the graph adjacency matrix. - * - * For multi-GPU generation with `P` GPUs, @p seed should be set to different values in different - * GPUs to avoid every GPU generating the same set of edges. @p num_edges should be adjusted as - * well; e.g. assuming `edge_factor` is given, set @p num_edges = (size_t{1} << @p scale) * - * `edge_factor` / `P` + (rank < (((size_t{1} << @p scale) * `edge_factor`) % P) ? 1 : 0). - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param scale Scale factor to set the number of verties in the graph. Vertex IDs have values in - * [0, V), where V = 1 << @p scale. - * @param num_edges Number of edges to generate. - * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org - * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger - * than 1.0. - * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org - * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger - * than 1.0. - * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org - * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger - * than 1.0. - * @param seed Seed value for the random number generator. - * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part - * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to - * `false`). - * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) - * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values - * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference - * implementation version 3.0.0. - * @return std::tuple, rmm::device_uvector> A tuple of - * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs. - */ -template -std::tuple, rmm::device_uvector> generate_rmat_edgelist( - raft::handle_t const& handle, - size_t scale, - size_t num_edges, - double a = 0.57, - double b = 0.19, - double c = 0.19, - uint64_t seed = 0, - bool clip_and_flip = false, - bool scramble_vertex_ids = false); - -enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; - -/** - * @brief generate multiple edge lists using the R-mat graph generator. - * - * This function allows multi-edges and self-loops similar to the Graph 500 reference - * implementation. - * - * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500 - * specification (note that scrambling does not affect cuGraph's graph construction performance, so - * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to - * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p - * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part - * (inculding the diagonal) of the graph adjacency matrix. - * - * - * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param n_edgelists Number of edge lists (graphs) to generate - * @param min_scale Scale factor to set the minimum number of verties in the graph. - * @param max_scale Scale factor to set the maximum number of verties in the graph. - * @param edge_factor Average number of edges per vertex to generate. - * @param size_distribution Distribution of the graph sizes, impacts the scale parameter of the - * R-MAT generator - * @param edge_distribution Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, - * are set. - * @param seed Seed value for the random number generator. - * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part - * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to - * `false`). - * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`) - * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values - * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference - * implementation version 3.0.0. - * @return A vector of std::tuple, rmm::device_uvector> of - *size @p n_edgelists, each vector element being a tuple of rmm::device_uvector objects for edge - *source vertex IDs and edge destination vertex IDs. - */ -template -std::vector, rmm::device_uvector>> -generate_rmat_edgelists( - raft::handle_t const& handle, - size_t n_edgelists, - size_t min_scale, - size_t max_scale, - size_t edge_factor = 16, - generator_distribution_t size_distribution = generator_distribution_t::POWER_LAW, - generator_distribution_t edge_distribution = generator_distribution_t::POWER_LAW, - uint64_t seed = 0, - bool clip_and_flip = false, - bool scramble_vertex_ids = false); - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/experimental/include_cuco_static_map.cuh b/cpp/include/experimental/include_cuco_static_map.cuh deleted file mode 100644 index 9e54acef72c..00000000000 --- a/cpp/include/experimental/include_cuco_static_map.cuh +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -// "FIXME": remove the guards below and references to CUCO_STATIC_MAP_DEFINED -// -// cuco/static_map.cuh depends on features not supported on or before Pascal. -// -// If we build for sm_60 or before, the inclusion of cuco/static_map.cuh wil -// result in compilation errors. -// -// If we're Pascal or before we do nothing here and will suppress including -// some code below. If we are later than Pascal we define CUCO_STATIC_MAP_DEFINED -// which will result in the full implementation being pulled in. -// -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 -#else -#define CUCO_STATIC_MAP_DEFINED -#include -#endif diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh deleted file mode 100644 index 4f3925f7d4c..00000000000 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ /dev/null @@ -1,809 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -namespace cugraph { -namespace experimental { - -namespace detail { - -int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_block_size = 512; - -template -__global__ void for_all_frontier_row_for_all_nbr_low_degree( - matrix_partition_device_t matrix_partition, - RowIterator row_first, - RowIterator row_last, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - BufferKeyOutputIterator buffer_key_output_first, - BufferPayloadOutputIterator buffer_payload_output_first, - size_t* buffer_idx_ptr, - EdgeOp e_op) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - static_assert(!GraphViewType::is_adj_matrix_transposed, - "GraphViewType should support the push model."); - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto idx = static_cast(tid); - - while (idx < static_cast(thrust::distance(row_first, row_last))) { - vertex_t row = *(row_first + idx); - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; - edge_t local_out_degree{}; - thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); - for (edge_t i = 0; i < local_out_degree; ++i) { - auto col = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (thrust::get<0>(e_op_result) == true) { - // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows - // within a partition are sorted by their out-degree in decreasing order, we can compute - // a tight uppper bound for the maximum number of pushes per warp/block and use shared - // memory buffer to reduce the number of atomicAdd operations. - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col; - *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); - } - } - idx += gridDim.x * blockDim.x; - } -} - -template -__global__ void for_all_frontier_row_for_all_nbr_mid_degree( - matrix_partition_device_t matrix_partition, - RowIterator row_first, - RowIterator row_last, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - BufferKeyOutputIterator buffer_key_output_first, - BufferPayloadOutputIterator buffer_payload_output_first, - size_t* buffer_idx_ptr, - EdgeOp e_op) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - static_assert(!GraphViewType::is_adj_matrix_transposed, - "GraphViewType should support the push model."); - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - static_assert(update_frontier_v_push_if_out_nbr_for_all_block_size % raft::warp_size() == 0); - auto const lane_id = tid % raft::warp_size(); - auto idx = static_cast(tid / raft::warp_size()); - - while (idx < static_cast(thrust::distance(row_first, row_last))) { - vertex_t row = *(row_first + idx); - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; - edge_t local_out_degree{}; - thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); - for (edge_t i = lane_id; i < local_out_degree; i += raft::warp_size()) { - auto col = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (thrust::get<0>(e_op_result) == true) { - // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows - // within a partition are sorted by their out-degree in decreasing order, we can compute - // a tight uppper bound for the maximum number of pushes per warp/block and use shared - // memory buffer to reduce the number of atomicAdd operations. - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col; - *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); - } - } - - idx += gridDim.x * (blockDim.x / raft::warp_size()); - } -} - -template -__global__ void for_all_frontier_row_for_all_nbr_high_degree( - matrix_partition_device_t matrix_partition, - RowIterator row_first, - RowIterator row_last, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - BufferKeyOutputIterator buffer_key_output_first, - BufferPayloadOutputIterator buffer_payload_output_first, - size_t* buffer_idx_ptr, - EdgeOp e_op) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - - static_assert(!GraphViewType::is_adj_matrix_transposed, - "GraphViewType should support the push model."); - - auto idx = static_cast(blockIdx.x); - - while (idx < static_cast(thrust::distance(row_first, row_last))) { - vertex_t row = *(row_first + idx); - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; - edge_t local_out_degree{}; - thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); - for (edge_t i = threadIdx.x; i < local_out_degree; i += blockDim.x) { - auto col = indices[i]; - auto weight = weights != nullptr ? weights[i] : 1.0; - auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (thrust::get<0>(e_op_result) == true) { - // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows - // within a partition are sorted by their out-degree in decreasing order, we can compute - // a tight uppper bound for the maximum number of pushes per warp/block and use shared - // memory buffer to reduce the number of atomicAdd operations. - static_assert(sizeof(unsigned long long int) == sizeof(size_t)); - auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), - static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col; - *(buffer_payload_output_first + buffer_idx) = thrust::get<1>(e_op_result); - } - } - - idx += gridDim.x; - } -} - -template -size_t sort_and_reduce_buffer_elements(raft::handle_t const& handle, - BufferKeyOutputIterator buffer_key_output_first, - BufferPayloadOutputIterator buffer_payload_output_first, - size_t num_buffer_elements, - ReduceOp reduce_op) -{ - thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - buffer_key_output_first, - buffer_key_output_first + num_buffer_elements, - buffer_payload_output_first); - - if (std::is_same>::value) { - // FIXME: if ReducOp is any, we may have a cheaper alternative than sort & uique (i.e. discard - // non-first elements) - auto it = thrust::unique_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - buffer_key_output_first, - buffer_key_output_first + num_buffer_elements, - buffer_payload_output_first); - return static_cast(thrust::distance(buffer_key_output_first, thrust::get<0>(it))); - } else { - using key_t = typename std::iterator_traits::value_type; - using payload_t = typename std::iterator_traits::value_type; - // FIXME: better avoid temporary buffer or at least limit the maximum buffer size (if we adopt - // CUDA cooperative group https://devblogs.nvidia.com/cooperative-groups and global sync(), we - // can use aggregate shared memory as a temporary buffer, or we can limit the buffer size, and - // split one thrust::reduce_by_key call to multiple thrust::reduce_by_key calls if the - // temporary buffer size exceeds the maximum buffer size (may be definied as percentage of the - // system HBM size or a function of the maximum number of threads in the system)) - // FIXME: actually, we can find how many unique keys are here by now. - // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding - // the vertex unless reduce_op is a pure function. - rmm::device_uvector keys(num_buffer_elements, handle.get_stream()); - auto value_buffer = - allocate_dataframe_buffer(num_buffer_elements, handle.get_stream()); - auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - buffer_key_output_first, - buffer_key_output_first + num_buffer_elements, - buffer_payload_output_first, - keys.begin(), - get_dataframe_buffer_begin(value_buffer), - thrust::equal_to(), - reduce_op); - auto num_reduced_buffer_elements = - static_cast(thrust::distance(keys.begin(), thrust::get<0>(it))); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - keys.begin(), - keys.begin() + num_reduced_buffer_elements, - buffer_key_output_first); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - get_dataframe_buffer_begin(value_buffer), - get_dataframe_buffer_begin(value_buffer) + num_reduced_buffer_elements, - buffer_payload_output_first); - return num_reduced_buffer_elements; - } -} - -} // namespace detail - -/** - * @brief Update vertex frontier and vertex property values iterating over the outgoing edges. - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexIterator Type of the iterator for vertex identifiers. - * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row - * input properties. - * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column - * input properties. - * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. - * @tparam ReduceOp Type of the binary reduction operator. - * @tparam VertexValueInputIterator Type of the iterator for vertex properties. - * @tparam VertexValueOutputIterator Type of the iterator for vertex property variables. - * @tparam VertexFrontierType Type of the vertex frontier class which abstracts vertex frontier - * managements. - * @tparam VertexOp Type of the binary vertex operator. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param vertex_frontier VertexFrontier class object for vertex frontier managements. This object - * includes multiple bucket objects. - * @param cur_fontier_bucket_idx Index of the VertexFrontier bucket holding vertices for the current - * iteration. - * @param next_frontier_bucket_indices Indices of the VertexFrontier buckets to store new frontier - * vertices for the next iteration. - * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input - * properties for the first (inclusive) row (assigned to this process in multi-GPU). - * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). - * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input - * properties for the first (inclusive) column (assigned to this process in multi-GPU). - * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). - * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge - * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + - * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, - * get_number_of_local_adj_matrix_partition_cols())) and returns a value to reduced by the @p - * reduce_op. - * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. - * @param vertex_value_input_first Iterator pointing to the vertex properties for the first - * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) - * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). - * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first - * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` - * (exclusive) is deduced as @p vertex_value_output_first + @p - * graph_view.get_number_of_local_vertices(). - * @param v_op Binary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p - * graph_view.get_number_of_local_vertices())) and reduced value of the @p e_op outputs for - * this vertex and returns the target bucket index (for frontier update) and new verrtex property - * values (to update *(@p vertex_value_output_first + i)). The target bucket index should either be - * VertexFrontier::kInvalidBucketIdx or an index in @p next_frontier_bucket_indices. - */ -template -void update_frontier_v_push_if_out_nbr( - raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexFrontierType& vertex_frontier, - size_t cur_frontier_bucket_idx, - std::vector const& next_frontier_bucket_indices, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - EdgeOp e_op, - ReduceOp reduce_op, - VertexValueInputIterator vertex_value_input_first, - VertexValueOutputIterator vertex_value_output_first, - VertexOp v_op) -{ - static_assert(!GraphViewType::is_adj_matrix_transposed, - "GraphViewType should support the push model."); - - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - using payload_t = typename ReduceOp::type; - - auto cur_frontier_vertex_first = vertex_frontier.get_bucket(cur_frontier_bucket_idx).begin(); - auto cur_frontier_vertex_last = vertex_frontier.get_bucket(cur_frontier_bucket_idx).end(); - - // 1. fill the buffer - - if (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - - // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between - // two different communicators (beginning of col_comm) -#if 1 - // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK - // and MPI barrier with MPI) - host_barrier(comm, handle.get_stream_view()); -#else - handle.get_stream_view().synchronize(); - comm.barrier(); // currently, this is ncclAllReduce -#endif - } - - rmm::device_uvector keys(size_t{0}, handle.get_stream()); - auto payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - rmm::device_scalar buffer_idx(size_t{0}, handle.get_stream()); - for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); - - rmm::device_uvector frontier_rows(0, handle.get_stream()); - if (GraphViewType::is_multi_gpu) { - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - - auto frontier_size = - host_scalar_bcast(col_comm, - (static_cast(col_comm_rank) == i) - ? thrust::distance(cur_frontier_vertex_first, cur_frontier_vertex_last) - : size_t{0} /* dummy */, - i, - handle.get_stream()); - frontier_rows.resize(frontier_size, handle.get_stream()); - - if (static_cast(col_comm_rank) == i) { - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - cur_frontier_vertex_first, - cur_frontier_vertex_last, - frontier_rows.begin()); - } - - device_bcast(col_comm, - cur_frontier_vertex_first, - frontier_rows.begin(), - frontier_size, - i, - handle.get_stream()); - } else { - frontier_rows.resize(thrust::distance(cur_frontier_vertex_first, cur_frontier_vertex_last), - handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - cur_frontier_vertex_first, - cur_frontier_vertex_last, - frontier_rows.begin()); - } - - auto max_pushes = frontier_rows.size() > 0 - ? thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_rows.begin(), - frontier_rows.end(), - [matrix_partition] __device__(auto row) { - auto row_offset = - matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()) - : edge_t{0}; - - // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain - // additional per column data for filtering in e_op). If we can pause & resume execution if - // buffer needs to be increased (and if we reserve address space to avoid expensive - // reallocation; - // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management/), we can - // start with a smaller buffer size (especially when the frontier size is large). - // for special cases when we can assure that there is no more than one push per destination - // (e.g. if cugraph::experimental::reduce_op::any is used), we can limit the buffer size to - // std::min(max_pushes, matrix_partition.get_minor_size()). - // For Volta+, we can limit the buffer size to std::min(max_pushes, - // matrix_partition.get_minor_size()) if the reduction operation is a pure function if we use - // locking. - // FIXME: if i != 0, this will require costly reallocation if we don't use the new CUDA feature - // to reserve address space. - keys.resize(buffer_idx.value(handle.get_stream()) + max_pushes, handle.get_stream()); - resize_dataframe_buffer(payload_buffer, keys.size(), handle.get_stream()); - - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? vertex_t{0} - : matrix_partition.get_major_value_start_offset(); - auto segment_offsets = graph_view.get_local_adj_matrix_partition_segment_offsets(i); - if (segment_offsets.size() > 0) { - static_assert(detail::num_segments_per_vertex_partition == 3); - std::vector h_thresholds(detail::num_segments_per_vertex_partition - 1); - h_thresholds[0] = matrix_partition.get_major_first() + segment_offsets[1]; - h_thresholds[1] = matrix_partition.get_major_first() + segment_offsets[2]; - rmm::device_uvector d_thresholds(h_thresholds.size(), handle.get_stream()); - raft::update_device( - d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), handle.get_stream()); - rmm::device_uvector d_offsets(d_thresholds.size(), handle.get_stream()); - thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_rows.begin(), - frontier_rows.end(), - d_thresholds.begin(), - d_thresholds.end(), - d_offsets.begin()); - std::vector h_offsets(d_offsets.size()); - raft::update_host(h_offsets.data(), d_offsets.data(), d_offsets.size(), handle.get_stream()); - CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); - // FIXME: we may further improve performance by 1) concurrently running kernels on different - // segments; 2) individually tuning block sizes for different segments; and 3) adding one more - // segment for very high degree vertices and running segmented reduction - if (h_offsets[0] > 0) { - raft::grid_1d_block_t update_grid( - h_offsets[0], - detail::update_frontier_v_push_if_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - detail::for_all_frontier_row_for_all_nbr_high_degree<<>>( - matrix_partition, - frontier_rows.begin(), - frontier_rows.begin() + h_offsets[0], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - buffer_idx.data(), - e_op); - } - if (h_offsets[1] - h_offsets[0] > 0) { - raft::grid_1d_warp_t update_grid( - h_offsets[1] - h_offsets[0], - detail::update_frontier_v_push_if_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - detail::for_all_frontier_row_for_all_nbr_mid_degree<<>>( - matrix_partition, - frontier_rows.begin() + h_offsets[0], - frontier_rows.begin() + h_offsets[1], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - buffer_idx.data(), - e_op); - } - if (frontier_rows.size() - h_offsets[1] > 0) { - raft::grid_1d_thread_t update_grid( - frontier_rows.size() - h_offsets[1], - detail::update_frontier_v_push_if_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( - matrix_partition, - frontier_rows.begin() + h_offsets[1], - frontier_rows.end(), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - buffer_idx.data(), - e_op); - } - } else { - if (frontier_rows.size() > 0) { - raft::grid_1d_thread_t update_grid( - frontier_rows.size(), - detail::update_frontier_v_push_if_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( - matrix_partition, - frontier_rows.begin(), - frontier_rows.end(), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - buffer_idx.data(), - e_op); - } - } - } - - if (GraphViewType::is_multi_gpu) { - auto& comm = handle.get_comms(); - - // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between - // two different communicators (beginning of col_comm) -#if 1 - // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK - // and MPI barrier with MPI) - host_barrier(comm, handle.get_stream_view()); -#else - handle.get_stream_view().synchronize(); - comm.barrier(); // currently, this is ncclAllReduce -#endif - } - - // 2. reduce the buffer - - auto num_buffer_elements = - detail::sort_and_reduce_buffer_elements(handle, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - buffer_idx.value(handle.get_stream()), - reduce_op); - if (GraphViewType::is_multi_gpu) { - // FIXME: this step is unnecessary if row_comm_size== 1 - auto& comm = handle.get_comms(); - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - - // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between - // two different communicators (beginning of row_comm) -#if 1 - // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK - // and MPI barrier with MPI) - host_barrier(comm, handle.get_stream_view()); -#else - handle.get_stream_view().synchronize(); - comm.barrier(); // currently, this is ncclAllReduce -#endif - - std::vector h_vertex_lasts(row_comm_size); - for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { - h_vertex_lasts[i] = graph_view.get_vertex_partition_last(col_comm_rank * row_comm_size + i); - } - - rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); - raft::update_device( - d_vertex_lasts.data(), h_vertex_lasts.data(), h_vertex_lasts.size(), handle.get_stream()); - rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), - handle.get_stream()); - thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - keys.begin(), - keys.begin() + num_buffer_elements, - d_vertex_lasts.begin(), - d_vertex_lasts.end(), - d_tx_buffer_last_boundaries.begin()); - std::vector h_tx_buffer_last_boundaries(d_tx_buffer_last_boundaries.size()); - raft::update_host(h_tx_buffer_last_boundaries.data(), - d_tx_buffer_last_boundaries.data(), - d_tx_buffer_last_boundaries.size(), - handle.get_stream()); - handle.get_stream_view().synchronize(); - std::vector tx_counts(h_tx_buffer_last_boundaries.size()); - std::adjacent_difference( - h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); - - rmm::device_uvector rx_keys(size_t{0}, handle.get_stream()); - std::tie(rx_keys, std::ignore) = - shuffle_values(row_comm, keys.begin(), tx_counts, handle.get_stream()); - keys = std::move(rx_keys); - - auto rx_payload_buffer = allocate_dataframe_buffer(size_t{0}, handle.get_stream()); - std::tie(rx_payload_buffer, std::ignore) = - shuffle_values(row_comm, - get_dataframe_buffer_begin(payload_buffer), - tx_counts, - handle.get_stream()); - payload_buffer = std::move(rx_payload_buffer); - - num_buffer_elements = - detail::sort_and_reduce_buffer_elements(handle, - keys.begin(), - get_dataframe_buffer_begin(payload_buffer), - keys.size(), - reduce_op); - - // barrier is necessary here to avoid potential overlap (which can leads to deadlock) between - // two different communicators (end of row_comm) -#if 1 - // FIXME: temporary hack till UCC is integrated into RAFT (so we can use UCC barrier with DASK - // and MPI barrier with MPI) - host_barrier(comm, handle.get_stream_view()); -#else - handle.get_stream_view().synchronize(); - comm.barrier(); // currently, this is ncclAllReduce -#endif - } - - // 3. update vertex properties - - if (num_buffer_elements > 0) { - static_assert(VertexFrontierType::kNumBuckets <= std::numeric_limits::max()); - rmm::device_uvector bucket_indices(num_buffer_elements, handle.get_stream()); - - vertex_partition_device_t vertex_partition(graph_view); - - auto key_payload_pair_first = thrust::make_zip_iterator( - thrust::make_tuple(keys.begin(), get_dataframe_buffer_begin(payload_buffer))); - thrust::transform( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - key_payload_pair_first, - key_payload_pair_first + num_buffer_elements, - bucket_indices.begin(), - [vertex_value_input_first, - vertex_value_output_first, - v_op, - vertex_partition, - invalid_bucket_idx = VertexFrontierType::kInvalidBucketIdx] __device__(auto pair) { - auto key = thrust::get<0>(pair); - auto payload = thrust::get<1>(pair); - auto key_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); - auto v_val = *(vertex_value_input_first + key_offset); - auto v_op_result = v_op(v_val, payload); - auto bucket_idx = thrust::get<0>(v_op_result); - if (bucket_idx != invalid_bucket_idx) { - *(vertex_value_output_first + key_offset) = thrust::get<1>(v_op_result); - return static_cast(bucket_idx); - } else { - return std::numeric_limits::max(); - } - }); - - resize_dataframe_buffer(payload_buffer, size_t{0}, handle.get_stream()); - shrink_to_fit_dataframe_buffer(payload_buffer, handle.get_stream()); - - auto bucket_key_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), keys.begin())); - keys.resize(thrust::distance( - bucket_key_pair_first, - thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - bucket_key_pair_first, - bucket_key_pair_first + num_buffer_elements, - [] __device__(auto pair) { - return thrust::get<0>(pair) == - std::numeric_limits::max(); - })), - handle.get_stream()); - bucket_indices.resize(keys.size(), handle.get_stream()); - keys.shrink_to_fit(handle.get_stream()); - bucket_indices.shrink_to_fit(handle.get_stream()); - - bucket_key_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), keys.begin())); - if (next_frontier_bucket_indices.size() == 1) { - vertex_frontier.get_bucket(next_frontier_bucket_indices[0]).insert(keys.begin(), keys.size()); - } else if (next_frontier_bucket_indices.size() == 2) { - auto first_bucket_size = thrust::distance( - bucket_key_pair_first, - thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - bucket_key_pair_first, - bucket_key_pair_first + bucket_indices.size(), - [first_bucket_idx = static_cast(next_frontier_bucket_indices[0])] __device__( - auto pair) { return thrust::get<0>(pair) == first_bucket_idx; })); - vertex_frontier.get_bucket(next_frontier_bucket_indices[0]) - .insert(keys.begin(), first_bucket_size); - vertex_frontier.get_bucket(next_frontier_bucket_indices[1]) - .insert(keys.begin() + first_bucket_size, - thrust::distance(keys.begin() + first_bucket_size, keys.end())); - } else { - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - bucket_key_pair_first, - bucket_key_pair_first + bucket_indices.size()); - rmm::device_uvector d_indices(next_frontier_bucket_indices.size(), - handle.get_stream()); - rmm::device_uvector d_counts(d_indices.size(), handle.get_stream()); - auto it = - thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - bucket_indices.begin(), - bucket_indices.end(), - thrust::make_constant_iterator(size_t{1}), - d_indices.begin(), - d_counts.begin()); - d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), - handle.get_stream()); - d_counts.resize(d_indices.size(), handle.get_stream()); - std::vector h_indices(d_indices.size()); - std::vector h_counts(h_indices.size()); - raft::update_host(h_indices.data(), d_indices.data(), d_indices.size(), handle.get_stream()); - raft::update_host(h_counts.data(), d_counts.data(), d_counts.size(), handle.get_stream()); - handle.get_stream_view().synchronize(); - std::vector h_offsets(h_indices.size(), 0); - std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_offsets.begin() + 1); - for (size_t i = 0; i < h_indices.size(); ++i) { - if (h_counts[i] > 0) { - vertex_frontier.get_bucket(h_indices[i]).insert(keys.begin() + h_offsets[i], h_counts[i]); - } - } - } - } -} // namespace experimental - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh deleted file mode 100644 index 4758334e9fc..00000000000 --- a/cpp/include/patterns/vertex_frontier.cuh +++ /dev/null @@ -1,266 +0,0 @@ -/* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include - -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -namespace cugraph { -namespace experimental { - -template -class SortedUniqueElementBucket { - public: - SortedUniqueElementBucket(raft::handle_t const& handle) - : handle_ptr_(&handle), elements_(0, handle.get_stream()) - { - } - - void insert(vertex_t v) - { - if (elements_.size() > 0) { - rmm::device_scalar vertex(v, handle_ptr_->get_stream()); - insert(vertex.data(), vertex_t{1}); - } else { - elements_.resize(1, handle_ptr_->get_stream()); - raft::update_device(elements_.data(), &v, size_t{1}, handle_ptr_->get_stream()); - } - } - - /** - * @ brief insert a list of vertices to the bucket - * - * @param sorted_unique_vertices Device pointer to the array storing the vertex list. - * @param num_sorted_unique_vertices Size of the vertex list to insert. - */ - void insert(vertex_t const* sorted_unique_vertices, vertex_t num_sorted_unique_vertices) - { - if (elements_.size() > 0) { - rmm::device_uvector merged_vertices(elements_.size() + num_sorted_unique_vertices, - handle_ptr_->get_stream()); - thrust::merge(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - elements_.begin(), - elements_.end(), - sorted_unique_vertices, - sorted_unique_vertices + num_sorted_unique_vertices, - merged_vertices.begin()); - merged_vertices.resize( - thrust::distance( - merged_vertices.begin(), - thrust::unique(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - merged_vertices.begin(), - merged_vertices.end())), - handle_ptr_->get_stream()); - merged_vertices.shrink_to_fit(handle_ptr_->get_stream()); - elements_ = std::move(merged_vertices); - } else { - elements_.resize(num_sorted_unique_vertices, handle_ptr_->get_stream()); - thrust::copy(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - sorted_unique_vertices, - sorted_unique_vertices + num_sorted_unique_vertices, - elements_.begin()); - } - } - - size_t size() const { return elements_.size(); } - - template - std::enable_if_t aggregate_size() const - { - return host_scalar_allreduce( - handle_ptr_->get_comms(), elements_.size(), handle_ptr_->get_stream()); - } - - template - std::enable_if_t aggregate_size() const - { - return elements_.size(); - } - - void resize(size_t size) { elements_.resize(size, handle_ptr_->get_stream()); } - - void clear() { elements_.resize(0, handle_ptr_->get_stream()); } - - void shrink_to_fit() { elements_.shrink_to_fit(handle_ptr_->get_stream()); } - - auto const data() const { return elements_.data(); } - - auto data() { return elements_.data(); } - - auto const begin() const { return elements_.begin(); } - - auto begin() { return elements_.begin(); } - - auto const end() const { return elements_.end(); } - - auto end() { return elements_.end(); } - - private: - raft::handle_t const* handle_ptr_{nullptr}; - rmm::device_uvector elements_; -}; - -template -class VertexFrontier { - public: - static size_t constexpr kNumBuckets = num_buckets; - static size_t constexpr kInvalidBucketIdx{std::numeric_limits::max()}; - - VertexFrontier(raft::handle_t const& handle) : handle_ptr_(&handle) - { - for (size_t i = 0; i < num_buckets; ++i) { buckets_.emplace_back(handle); } - } - - SortedUniqueElementBucket& get_bucket(size_t bucket_idx) - { - return buckets_[bucket_idx]; - } - - SortedUniqueElementBucket const& get_bucket(size_t bucket_idx) const - { - return buckets_[bucket_idx]; - } - - void swap_buckets(size_t bucket_idx0, size_t bucket_idx1) - { - std::swap(buckets_[bucket_idx0], buckets_[bucket_idx1]); - } - - template - void split_bucket(size_t this_bucket_idx, - std::vector const& move_to_bucket_indices, - SplitOp split_op) - { - auto& this_bucket = get_bucket(this_bucket_idx); - if (this_bucket.size() > 0) { - static_assert(kNumBuckets <= std::numeric_limits::max()); - rmm::device_uvector bucket_indices(this_bucket.size(), handle_ptr_->get_stream()); - thrust::transform( - rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - this_bucket.begin(), - this_bucket.end(), - bucket_indices.begin(), - [split_op] __device__(auto v) { return static_cast(split_op(v)); }); - - auto pair_first = - thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); - this_bucket.resize(thrust::distance( - pair_first, - thrust::remove_if( - rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - pair_first, - pair_first + bucket_indices.size(), - [invalid_bucket_idx = static_cast(kInvalidBucketIdx)] __device__(auto pair) { - return thrust::get<0>(pair) == invalid_bucket_idx; - }))); - bucket_indices.resize(this_bucket.size(), handle_ptr_->get_stream()); - this_bucket.shrink_to_fit(); - bucket_indices.shrink_to_fit(handle_ptr_->get_stream()); - - pair_first = - thrust::make_zip_iterator(thrust::make_tuple(bucket_indices.begin(), this_bucket.begin())); - auto new_this_bucket_size = thrust::distance( - pair_first, - thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket - rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - pair_first, - pair_first + bucket_indices.size(), - [this_bucket_idx = static_cast(this_bucket_idx)] __device__(auto pair) { - return thrust::get<0>(pair) == this_bucket_idx; - })); - - if (move_to_bucket_indices.size() == 1) { - get_bucket(move_to_bucket_indices[0]) - .insert(this_bucket.begin() + new_this_bucket_size, - thrust::distance(this_bucket.begin() + new_this_bucket_size, this_bucket.end())); - } else if (move_to_bucket_indices.size() == 2) { - auto next_bucket_size = thrust::distance( - pair_first + new_this_bucket_size, - thrust::stable_partition( // stalbe_partition to maintain sorted order within each bucket - rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - pair_first + new_this_bucket_size, - pair_first + bucket_indices.size(), - [next_bucket_idx = static_cast(move_to_bucket_indices[0])] __device__( - auto pair) { return thrust::get<0>(pair) == next_bucket_idx; })); - get_bucket(move_to_bucket_indices[0]) - .insert(this_bucket.begin() + new_this_bucket_size, next_bucket_size); - get_bucket(move_to_bucket_indices[1]) - .insert(this_bucket.begin() + new_this_bucket_size + next_bucket_size, - thrust::distance(this_bucket.begin() + new_this_bucket_size + next_bucket_size, - this_bucket.end())); - } else { - thrust::sort(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - pair_first + new_this_bucket_size, - pair_first + bucket_indices.size()); - rmm::device_uvector d_indices(move_to_bucket_indices.size(), - handle_ptr_->get_stream()); - rmm::device_uvector d_counts(d_indices.size(), handle_ptr_->get_stream()); - auto it = thrust::reduce_by_key( - rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), - bucket_indices.begin() + new_this_bucket_size, - bucket_indices.end(), - thrust::make_constant_iterator(size_t{1}), - d_indices.begin(), - d_counts.begin()); - d_indices.resize(thrust::distance(d_indices.begin(), thrust::get<0>(it)), - handle_ptr_->get_stream()); - d_counts.resize(d_indices.size(), handle_ptr_->get_stream()); - std::vector h_indices(d_indices.size()); - std::vector h_counts(h_indices.size()); - raft::update_host( - h_indices.data(), d_indices.data(), d_indices.size(), handle_ptr_->get_stream()); - raft::update_host( - h_counts.data(), d_counts.data(), d_counts.size(), handle_ptr_->get_stream()); - handle_ptr_->get_stream_view().synchronize(); - std::vector h_offsets(h_indices.size(), 0); - std::partial_sum(h_counts.begin(), h_counts.end() - 1, h_offsets.begin() + 1); - for (size_t i = 0; i < h_indices.size(); ++i) { - if (h_counts[i] > 0) { - get_bucket(h_indices[i]) - .insert(this_bucket.begin() + new_this_bucket_size + h_offsets[i], h_counts[i]); - } - } - } - - this_bucket.resize(new_this_bucket_size); - this_bucket.shrink_to_fit(); - } - - return; - } - - private: - raft::handle_t const* handle_ptr_{nullptr}; - std::vector> buckets_{}; -}; - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/src/centrality/README.md b/cpp/src/centrality/README.md index db7838fb0cc..31b5ed6720e 100644 --- a/cpp/src/centrality/README.md +++ b/cpp/src/centrality/README.md @@ -13,7 +13,7 @@ The unit test code is the best place to search for examples on calling pagerank. The example assumes that you create an SG or MG graph somehow. The caller must create the pageranks vector in device memory and pass in the raw pointer to that vector into the pagerank function. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate @@ -46,7 +46,7 @@ cugraph::experimental::pagerank(handle, graph_view, nullptr, nullptr, nullptr, v The example assumes that you create an SG or MG graph somehow. The caller must create the pageranks vector in device memory and pass in the raw pointer to that vector into the pagerank function. Additionally, the caller must create personalization_vertices and personalized_values vectors in device memory, populate them and pass in the raw pointers to those vectors. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate diff --git a/cpp/src/centrality/betweenness_centrality.cu b/cpp/src/centrality/betweenness_centrality.cu index c0a34de5f70..cdee2140382 100644 --- a/cpp/src/centrality/betweenness_centrality.cu +++ b/cpp/src/centrality/betweenness_centrality.cu @@ -20,10 +20,10 @@ #include -#include -#include +#include +#include +#include #include -#include #include #include "betweenness_centrality.cuh" diff --git a/cpp/src/centrality/katz_centrality.cu b/cpp/src/centrality/katz_centrality.cu index 0119a388680..0622193670e 100644 --- a/cpp/src/centrality/katz_centrality.cu +++ b/cpp/src/centrality/katz_centrality.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,8 +23,8 @@ #include #include -#include -#include "utilities/error.hpp" +#include +#include namespace cugraph { diff --git a/cpp/src/community/README.md b/cpp/src/community/README.md index 4bff0a6e77e..9d635a6167f 100644 --- a/cpp/src/community/README.md +++ b/cpp/src/community/README.md @@ -31,7 +31,7 @@ The API itself is very simple. There are two variations: The example assumes that you create an SG or MG graph somehow. The caller must create the clustering vector in device memory and pass in the raw pointer to that vector into the louvain function. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate @@ -54,7 +54,7 @@ std::tie(level, modularity) = cugraph::louvain(handle, graph_view, clustering_v. The Dendrogram represents the levels of hierarchical clustering that the Louvain algorithm computes. There is a separate function that will flatten the clustering into the same result as above. Returning the Dendrogram, however, provides a finer level of detail on the intermediate results which can be helpful in more fully understanding the data. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate diff --git a/cpp/src/community/ecg.cu b/cpp/src/community/ecg.cu index a176dfbd1c8..ef171d127fe 100644 --- a/cpp/src/community/ecg.cu +++ b/cpp/src/community/ecg.cu @@ -14,10 +14,10 @@ * limitations under the License. */ -#include #include #include -#include +#include +#include #include #include diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu index 85ee327edb2..6b93f561a45 100644 --- a/cpp/src/community/egonet.cu +++ b/cpp/src/community/egonet.cu @@ -15,8 +15,8 @@ */ // Alex Fender afender@nvida.com -#include #include +#include #include #include #include @@ -28,14 +28,14 @@ #include #include -#include +#include -#include -#include "experimental/graph.hpp" -#include "utilities/graph_utils.cuh" +#include +#include +#include -#include -#include +#include +#include #include diff --git a/cpp/src/community/extract_subgraph_by_vertex.cu b/cpp/src/community/extract_subgraph_by_vertex.cu index eb7b1d494a0..4bfe57c2c50 100644 --- a/cpp/src/community/extract_subgraph_by_vertex.cu +++ b/cpp/src/community/extract_subgraph_by_vertex.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/src/community/flatten_dendrogram.cuh b/cpp/src/community/flatten_dendrogram.cuh index 6d455a68192..ff6446b0e5f 100644 --- a/cpp/src/community/flatten_dendrogram.cuh +++ b/cpp/src/community/flatten_dendrogram.cuh @@ -15,8 +15,8 @@ */ #pragma once -#include -#include +#include +#include #include #include @@ -51,7 +51,8 @@ void partition_at_level(raft::handle_t const &handle, dendrogram.get_level_ptr_nocheck(l)), dendrogram.get_level_size_nocheck(l), d_partition, - local_num_verts); + local_num_verts, + false); }); } diff --git a/cpp/src/community/ktruss.cu b/cpp/src/community/ktruss.cu index 11a8ed6fbae..224f84f6718 100644 --- a/cpp/src/community/ktruss.cu +++ b/cpp/src/community/ktruss.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,11 +21,11 @@ * @file ktruss.cu * --------------------------------------------------------------------------*/ -#include +#include #include #include -#include +#include #include "Static/KTruss/KTruss.cuh" using namespace hornets_nest; diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 2affcf29805..842a7f39750 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -16,11 +16,14 @@ #include #include -#include +#include #include #include +CUCO_DECLARE_BITWISE_COMPARABLE(float) +CUCO_DECLARE_BITWISE_COMPARABLE(double) + namespace cugraph { namespace detail { @@ -48,25 +51,12 @@ std::pair>, weight_t> louvain( size_t max_level, weight_t resolution) { - // "FIXME": remove this check and the guards below - // - // Disable louvain(experimental::graph_view_t,...) - // versions for GPU architectures < 700 - // (cuco/static_map.cuh depends on features not supported on or before Pascal) - // - cudaDeviceProp device_prop; - CUDA_CHECK(cudaGetDeviceProperties(&device_prop, 0)); - - if (device_prop.major < 7) { - CUGRAPH_FAIL("Louvain not supported on Pascal and older architectures"); - } else { - experimental::Louvain> - runner(handle, graph_view); - - weight_t wt = runner(max_level, resolution); - - return std::make_pair(runner.move_dendrogram(), wt); - } + experimental::Louvain> + runner(handle, graph_view); + + weight_t wt = runner(max_level, resolution); + + return std::make_pair(runner.move_dendrogram(), wt); } template @@ -297,4 +287,4 @@ template std::pair louvain( } // namespace cugraph -#include +#include diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index e3569d4c850..8fa2b81783a 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -15,12 +15,12 @@ */ #pragma once -#include +#include #include #include -#include +#include #include diff --git a/cpp/src/community/spectral_clustering.cu b/cpp/src/community/spectral_clustering.cu index f32739ddf29..06b62c5019d 100644 --- a/cpp/src/community/spectral_clustering.cu +++ b/cpp/src/community/spectral_clustering.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,14 +20,14 @@ * @file spectral_clustering.cu * ---------------------------------------------------------------------------**/ -#include +#include #include #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/src/community/triangles_counting.cu b/cpp/src/community/triangles_counting.cu index f6670365652..cd5b8bc6614 100644 --- a/cpp/src/community/triangles_counting.cu +++ b/cpp/src/community/triangles_counting.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,10 +17,10 @@ #include #include -#include -#include +#include +#include -#include +#include #include #include @@ -759,8 +759,9 @@ void TrianglesCount::tcount_b2b() cudaMemGetInfo(&free_bytes, &total_bytes); CHECK_CUDA(m_stream); - int nblock = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmldL1); //@TODO: what? - nblock = MIN(nblock, m_mat.nrows); + size_t nblock_available = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmldL1); + + int nblock = static_cast(MIN(nblock_available, static_cast(m_mat.nrows))); // allocate level 1 bitmap rmm::device_vector bmapL1_d(bmldL1 * nblock, uint32_t{0}); @@ -793,8 +794,10 @@ void TrianglesCount::tcount_wrp() cudaMemGetInfo(&free_bytes, &total_bytes); CHECK_CUDA(m_stream); - int nblock = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmld * (THREADS / 32)); - nblock = MIN(nblock, DIV_UP(m_mat.nrows, (THREADS / 32))); + size_t nblock_available = (free_bytes * 95 / 100) / (sizeof(uint32_t) * bmld * (THREADS / 32)); + + int nblock = static_cast( + MIN(nblock_available, static_cast(DIV_UP(m_mat.nrows, (THREADS / 32))))); size_t bmap_sz = bmld * nblock * (THREADS / 32); @@ -827,7 +830,8 @@ void TrianglesCount::count() tcount_wrp(); else { const int shMinBlkXSM = 6; - if (size_t{m_shared_mem_per_block * 8 / shMinBlkXSM} < (size_t)m_mat.N) + if (static_cast(m_shared_mem_per_block * 8 / shMinBlkXSM) < + static_cast(m_mat.N)) tcount_b2b(); else tcount_bsh(); diff --git a/cpp/src/components/connectivity.cu b/cpp/src/components/connectivity.cu index 09412160b37..d5768c7f09f 100644 --- a/cpp/src/components/connectivity.cu +++ b/cpp/src/components/connectivity.cu @@ -19,13 +19,13 @@ #include -#include #include -#include +#include +#include +#include #include #include -#include "utilities/error.hpp" -#include "utilities/graph_utils.cuh" +#include #include "topology/topology.cuh" diff --git a/cpp/src/components/utils.h b/cpp/src/components/utils.h index c9ebb6ac4d1..7b0e3042a97 100644 --- a/cpp/src/components/utils.h +++ b/cpp/src/components/utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,7 @@ #include -#include +#include namespace MLCommon { diff --git a/cpp/src/components/weakly_connected_components.cu b/cpp/src/components/weakly_connected_components.cu new file mode 100644 index 00000000000..0c552ad24fc --- /dev/null +++ b/cpp/src/components/weakly_connected_components.cu @@ -0,0 +1,849 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace { + +// FIXME: this function (after modification) may be useful for SSSP with the near-far method to +// determine the near-far threshold. +// add new roots till the sum of the degrees first becomes no smaller than degree_sum_threshold and +// returns a triplet of (new roots, number of scanned candidates, sum of the degrees of the new +// roots) +template +std::tuple, + typename GraphViewType::vertex_type, + typename GraphViewType::edge_type> +accumulate_new_roots(raft::handle_t const &handle, + vertex_partition_device_t vertex_partition, + typename GraphViewType::vertex_type const *components, + typename GraphViewType::edge_type const *degrees, + typename GraphViewType::vertex_type const *candidate_first, + typename GraphViewType::vertex_type const *candidate_last, + typename GraphViewType::vertex_type max_new_roots, + typename GraphViewType::edge_type degree_sum_threshold) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + + // tuning parameter (time to scan max_scan_size elements should not take significantly longer than + // scanning a single element) + vertex_t max_scan_size = + static_cast(handle.get_device_properties().multiProcessorCount) * vertex_t{16384}; + + rmm::device_uvector new_roots(max_new_roots, handle.get_stream_view()); + vertex_t num_new_roots{0}; + vertex_t num_scanned{0}; + edge_t degree_sum{0}; + while ((candidate_first + num_scanned < candidate_last) && (degree_sum < degree_sum_threshold) && + (num_new_roots < max_new_roots)) { + auto scan_size = std::min( + max_scan_size, + static_cast(thrust::distance(candidate_first + num_scanned, candidate_last))); + + rmm::device_uvector tmp_new_roots(scan_size, handle.get_stream_view()); + rmm::device_uvector tmp_indices(tmp_new_roots.size(), handle.get_stream_view()); + auto input_pair_first = thrust::make_zip_iterator(thrust::make_tuple( + candidate_first + num_scanned, thrust::make_counting_iterator(vertex_t{0}))); + auto output_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_new_roots.begin(), tmp_indices.begin())); + tmp_new_roots.resize( + static_cast(thrust::distance( + output_pair_first, + thrust::copy_if( + rmm::exec_policy(handle.get_stream_view()), + input_pair_first, + input_pair_first + scan_size, + output_pair_first, + [vertex_partition, components] __device__(auto pair) { + auto v = thrust::get<0>(pair); + return (components[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)] == + invalid_component_id::value); + }))), + handle.get_stream_view()); + tmp_indices.resize(tmp_new_roots.size(), handle.get_stream_view()); + + if (tmp_new_roots.size() > 0) { + rmm::device_uvector tmp_cumulative_degrees(tmp_new_roots.size(), + handle.get_stream_view()); + thrust::transform( + rmm::exec_policy(handle.get_stream_view()), + tmp_new_roots.begin(), + tmp_new_roots.end(), + tmp_cumulative_degrees.begin(), + [vertex_partition, degrees] __device__(auto v) { + return degrees[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)]; + }); + thrust::inclusive_scan(rmm::exec_policy(handle.get_stream_view()), + tmp_cumulative_degrees.begin(), + tmp_cumulative_degrees.end(), + tmp_cumulative_degrees.begin()); + auto last = thrust::lower_bound(rmm::exec_policy(handle.get_stream_view()), + tmp_cumulative_degrees.begin(), + tmp_cumulative_degrees.end(), + degree_sum_threshold - degree_sum); + if (last != tmp_cumulative_degrees.end()) { ++last; } + auto tmp_num_new_roots = + std::min(static_cast(thrust::distance(tmp_cumulative_degrees.begin(), last)), + max_new_roots - num_new_roots); + + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + tmp_new_roots.begin(), + tmp_new_roots.begin() + tmp_num_new_roots, + new_roots.begin() + num_new_roots); + num_new_roots += tmp_num_new_roots; + vertex_t tmp_num_scanned{0}; + edge_t tmp_degree_sum{0}; + if (tmp_num_new_roots == static_cast(tmp_new_roots.size())) { + tmp_num_scanned = scan_size; + } else { + raft::update_host( + &tmp_num_scanned, tmp_indices.data() + tmp_num_new_roots, size_t{1}, handle.get_stream()); + } + raft::update_host(&tmp_degree_sum, + tmp_cumulative_degrees.data() + (tmp_num_new_roots - 1), + size_t{1}, + handle.get_stream()); + handle.get_stream_view().synchronize(); + num_scanned += tmp_num_scanned; + degree_sum += tmp_degree_sum; + } else { + num_scanned += scan_size; + } + } + + new_roots.resize(num_new_roots, handle.get_stream_view()); + new_roots.shrink_to_fit(handle.get_stream_view()); + + return std::make_tuple(std::move(new_roots), num_scanned, degree_sum); +} + +// FIXME: to silence the spurious warning (missing return statement ...) due to the nvcc bug +// (https://stackoverflow.com/questions/64523302/cuda-missing-return-statement-at-end-of-non-void- +// function-in-constexpr-if-fun) +template +struct v_op_t { + using vertex_type = typename GraphViewType::vertex_type; + + vertex_partition_device_t vertex_partition{}; + vertex_type *level_components{}; + decltype(thrust::make_zip_iterator(thrust::make_tuple( + static_cast(nullptr), static_cast(nullptr)))) edge_buffer_first{}; + // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this requires + // placing the atomic barrier on managed memory and this adds additional complication. + size_t *num_edge_inserts{}; + size_t next_bucket_idx{}; + size_t conflict_bucket_idx{}; // relevant only if GraphViewType::is_multi_gpu is true + + template + __device__ std::enable_if_t>> + operator()(thrust::tuple tagged_v, int v_val /* dummy */) const + { + auto tag = thrust::get<1>(tagged_v); + auto v_offset = + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(thrust::get<0>(tagged_v)); + // FIXME: better switch to atomic_ref after + // https://github.com/nvidia/libcudacxx/milestone/2 + auto old = + atomicCAS(level_components + v_offset, invalid_component_id::value, tag); + if (old != invalid_component_id::value && old != tag) { // conflict + return thrust::optional>{ + thrust::make_tuple(conflict_bucket_idx, std::byte{0} /* dummy */)}; + } else { + return (old == invalid_component_id::value) + ? thrust::optional>{thrust::make_tuple( + next_bucket_idx, std::byte{0} /* dummy */)} + : thrust::nullopt; + } + } + + template + __device__ std::enable_if_t>> + operator()(thrust::tuple tagged_v, int v_val /* dummy */) const + { + return thrust::optional>{ + thrust::make_tuple(next_bucket_idx, std::byte{0} /* dummy */)}; + } +}; + +template +void weakly_connected_components_impl(raft::handle_t const &handle, + GraphViewType const &push_graph_view, + typename GraphViewType::vertex_type *components, + bool do_expensive_check) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(std::is_integral::value, + "GraphViewType::vertex_type should be integral."); + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const num_vertices = push_graph_view.get_number_of_vertices(); + if (num_vertices == 0) { return; } + + // 1. check input arguments + + CUGRAPH_EXPECTS( + push_graph_view.is_symmetric(), + "Invalid input argument: input graph should be symmetric for weakly connected components."); + + if (do_expensive_check) { + // nothing to do + } + + // 2. recursively run multi-root frontier expansion + + enum class Bucket { + cur, + next, + conflict /* relevant only if GraphViewType::is_multi_gpu is true */, + num_buckets + }; + // tuning parameter to balance work per iteration (should be large enough to be throughput + // bounded) vs # conflicts between frontiers with different roots (# conflicts == # edges for the + // next level) + auto degree_sum_threshold = + static_cast(handle.get_device_properties().multiProcessorCount) * edge_t{1024}; + + size_t num_levels{0}; + graph_t + level_graph(handle); + rmm::device_uvector level_renumber_map(0, handle.get_stream_view()); + std::vector> level_component_vectors{}; + // vertex ID in this level to the component ID in the previous level + std::vector> level_renumber_map_vectors{}; + std::vector level_local_vertex_first_vectors{}; + while (true) { + auto level_graph_view = num_levels == 0 ? push_graph_view : level_graph.view(); + vertex_partition_device_t vertex_partition(level_graph_view); + level_component_vectors.push_back(rmm::device_uvector( + num_levels == 0 ? vertex_t{0} : level_graph_view.get_number_of_local_vertices(), + handle.get_stream_view())); + level_renumber_map_vectors.push_back(std::move(level_renumber_map)); + level_local_vertex_first_vectors.push_back(level_graph_view.get_local_vertex_first()); + auto level_components = + num_levels == 0 ? components : level_component_vectors[num_levels].data(); + ++num_levels; + auto degrees = level_graph_view.compute_out_degrees(handle); + + // 2-1. filter out isolated vertices + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + thrust::make_counting_iterator(level_graph_view.get_local_vertex_first()), degrees.begin())); + thrust::transform(rmm::exec_policy(handle.get_stream_view()), + pair_first, + pair_first + level_graph_view.get_number_of_local_vertices(), + level_components, + [] __device__(auto pair) { + auto v = thrust::get<0>(pair); + auto degree = thrust::get<1>(pair); + return degree > 0 ? invalid_component_id::value : v; + }); + + // 2-2. initialize new root candidates + + // Vertices are first partitioned to high-degree vertices and low-degree vertices, we can reach + // degree_sum_threshold with fewer high-degree vertices leading to a higher compression ratio. + // The degree threshold is set to ceil(sqrt(degree_sum_threshold * 2)); this guarantees the + // compression ratio of at least 50% (ignoring rounding errors) even if all the selected roots + // fall into a single connected component as there will be at least as many non-root vertices in + // the connected component (assuming there are no multi-edges, if there are multi-edges, we may + // not get 50% compression in # vertices but still get compression in # edges). the remaining + // low-degree vertices will be randomly shuffled so comparable ratios of vertices will be + // selected as roots in the remaining connected components. + + rmm::device_uvector new_root_candidates( + level_graph_view.get_number_of_local_vertices(), handle.get_stream_view()); + new_root_candidates.resize( + thrust::distance( + new_root_candidates.begin(), + thrust::copy_if( + rmm::exec_policy(handle.get_stream_view()), + thrust::make_counting_iterator(level_graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(level_graph_view.get_local_vertex_last()), + new_root_candidates.begin(), + [vertex_partition, level_components] __device__(auto v) { + return level_components[vertex_partition.get_local_vertex_offset_from_vertex_nocheck( + v)] == invalid_component_id::value; + })), + handle.get_stream_view()); + auto high_degree_partition_last = thrust::stable_partition( + rmm::exec_policy(handle.get_stream_view()), + new_root_candidates.begin(), + new_root_candidates.end(), + [vertex_partition, + degrees = degrees.data(), + threshold = static_cast( + ceil(sqrt(static_cast(degree_sum_threshold) * 2.0)))] __device__(auto v) { + return degrees[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)] >= + threshold; + }); + thrust::shuffle(rmm::exec_policy(handle.get_stream_view()), + high_degree_partition_last, + new_root_candidates.end(), + thrust::default_random_engine()); + + double constexpr max_new_roots_ratio = + 0.05; // to avoid selecting all the vertices as roots leading to zero compression + static_assert(max_new_roots_ratio > 0.0); + auto max_new_roots = std::max( + static_cast(new_root_candidates.size() * max_new_roots_ratio), vertex_t{1}); + + auto init_max_new_roots = max_new_roots; + if (GraphViewType::is_multi_gpu) { + auto &comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto const comm_size = comm.get_size(); + + auto first_candidate_degree = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream_view()), + new_root_candidates.begin(), + new_root_candidates.begin() + (new_root_candidates.size() > 0 ? 1 : 0), + [vertex_partition, degrees = degrees.data()] __device__(auto v) { + return degrees[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)]; + }, + edge_t{0}, + thrust::plus{}); + + auto first_candidate_degrees = + host_scalar_gather(comm, first_candidate_degree, int{0}, handle.get_stream()); + auto new_root_candidate_counts = + host_scalar_gather(comm, new_root_candidates.size(), int{0}, handle.get_stream()); + + if (comm_rank == 0) { + std::vector init_max_new_root_counts(comm_size, vertex_t{0}); + + // if there exists very high degree vertices, we can exceed degree_sum_threshold * comm_size + // with fewer than one root per GPU + if (std::reduce(first_candidate_degrees.begin(), first_candidate_degrees.end()) > + degree_sum_threshold * comm_size) { + std::vector> degree_gpuid_pairs(comm_size); + for (int i = 0; i < comm_size; ++i) { + degree_gpuid_pairs[i] = std::make_tuple(first_candidate_degrees[i], i); + } + std::sort(degree_gpuid_pairs.begin(), degree_gpuid_pairs.end(), [](auto lhs, auto rhs) { + return std::get<0>(lhs) > std::get<0>(rhs); + }); + edge_t sum{0}; + for (size_t i = 0; i < degree_gpuid_pairs.size(); ++i) { + sum += std::get<0>(degree_gpuid_pairs[i]); + init_max_new_root_counts[std::get<1>(degree_gpuid_pairs[i])] = 1; + if (sum > degree_sum_threshold * comm_size) { break; } + } + } + // to avoid selecting too many (possibly all) vertices as initial roots leading to no + // compression in the worst case. + else if (level_graph_view.get_number_of_vertices() <= + static_cast(handle.get_comms().get_size() * + ceil(1.0 / max_new_roots_ratio))) { + std::vector gpuids{}; + gpuids.reserve( + std::reduce(new_root_candidate_counts.begin(), new_root_candidate_counts.end())); + for (size_t i = 0; i < new_root_candidate_counts.size(); ++i) { + gpuids.insert(gpuids.end(), new_root_candidate_counts[i], static_cast(i)); + } + std::random_device rd{}; + std::shuffle(gpuids.begin(), gpuids.end(), std::mt19937(rd())); + gpuids.resize( + std::max(static_cast(gpuids.size() * max_new_roots_ratio), vertex_t{1})); + for (size_t i = 0; i < gpuids.size(); ++i) { ++init_max_new_root_counts[gpuids[i]]; } + } else { + std::fill(init_max_new_root_counts.begin(), + init_max_new_root_counts.end(), + std::numeric_limits::max()); + } + + // FIXME: we need to add host_scalar_scatter +#if 1 + rmm::device_uvector d_counts(comm_size, handle.get_stream_view()); + raft::update_device(d_counts.data(), + init_max_new_root_counts.data(), + init_max_new_root_counts.size(), + handle.get_stream()); + device_bcast( + comm, d_counts.data(), d_counts.data(), d_counts.size(), int{0}, handle.get_stream()); + raft::update_host( + &init_max_new_roots, d_counts.data() + comm_rank, size_t{1}, handle.get_stream()); +#else + iinit_max_new_roots = + host_scalar_scatter(comm, init_max_new_root_counts.data(), int{0}, handle.get_stream()); +#endif + } else { + // FIXME: we need to add host_scalar_scatter +#if 1 + rmm::device_uvector d_counts(comm_size, handle.get_stream_view()); + device_bcast( + comm, d_counts.data(), d_counts.data(), d_counts.size(), int{0}, handle.get_stream()); + raft::update_host( + &init_max_new_roots, d_counts.data() + comm_rank, size_t{1}, handle.get_stream()); +#else + iinit_max_new_roots = + host_scalar_scatter(comm, init_max_new_root_counts.data(), int{0}, handle.get_stream()); +#endif + } + + handle.get_stream_view().synchronize(); + init_max_new_roots = std::min(init_max_new_roots, max_new_roots); + } + + // 2-3. initialize vertex frontier, edge_buffer, and col_components (if multi-gpu) + + VertexFrontier(Bucket::num_buckets)> + vertex_frontier(handle); + vertex_t next_candidate_offset{0}; + edge_t edge_count{0}; + + auto edge_buffer = + allocate_dataframe_buffer>(0, handle.get_stream()); + // FIXME: we can use cuda::atomic instead but currently on a system with x86 + GPU, this + // requires placing the atomic variable on managed memory and this make it less attractive. + rmm::device_scalar num_edge_inserts(size_t{0}, handle.get_stream_view()); + + rmm::device_uvector col_components( + GraphViewType::is_multi_gpu ? level_graph_view.get_number_of_local_adj_matrix_partition_cols() + : vertex_t{0}, + handle.get_stream_view()); + if (GraphViewType::is_multi_gpu) { + thrust::fill(rmm::exec_policy(handle.get_stream_view()), + col_components.begin(), + col_components.end(), + invalid_component_id::value); + } + + // 2.4 iterate till every vertex gets visited + + size_t iter{0}; + while (true) { + if ((edge_count < degree_sum_threshold) && + (next_candidate_offset < static_cast(new_root_candidates.size()))) { + auto [new_roots, num_scanned, degree_sum] = + accumulate_new_roots(handle, + vertex_partition, + level_components, + degrees.data(), + new_root_candidates.data() + next_candidate_offset, + new_root_candidates.data() + new_root_candidates.size(), + iter == 0 ? init_max_new_roots : max_new_roots, + degree_sum_threshold - edge_count); + next_candidate_offset += num_scanned; + edge_count += degree_sum; + + thrust::sort( + rmm::exec_policy(handle.get_stream_view()), new_roots.begin(), new_roots.end()); + + thrust::for_each( + rmm::exec_policy(handle.get_stream_view()), + new_roots.begin(), + new_roots.end(), + [vertex_partition, components = level_components] __device__(auto c) { + components[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(c)] = c; + }); + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(new_roots.begin(), new_roots.begin())); + vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .insert(pair_first, pair_first + new_roots.size()); + } + + if (vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() == 0) { + break; + } + + if (GraphViewType::is_multi_gpu) { + copy_to_adj_matrix_col( + handle, + level_graph_view, + thrust::get<0>(vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .begin() + .get_iterator_tuple()), + thrust::get<0>(vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .end() + .get_iterator_tuple()), + level_components, + col_components.begin()); + } + + auto max_pushes = + GraphViewType::is_multi_gpu + ? compute_num_out_nbrs_from_frontier( + handle, level_graph_view, vertex_frontier, static_cast(Bucket::cur)) + : edge_count; + + // FIXME: if we use cuco::static_map (no duplicates, ideally we need static_set), edge_buffer + // size cannot exceed (# roots)^2 and we can avoid additional sort & unique (but resizing the + // buffer may be more expensive). + auto old_num_edge_inserts = num_edge_inserts.value(handle.get_stream_view()); + resize_dataframe_buffer>( + edge_buffer, old_num_edge_inserts + max_pushes, handle.get_stream()); + + update_frontier_v_push_if_out_nbr( + handle, + level_graph_view, + vertex_frontier, + static_cast(Bucket::cur), + GraphViewType::is_multi_gpu ? std::vector{static_cast(Bucket::next), + static_cast(Bucket::conflict)} + : std::vector{static_cast(Bucket::next)}, + thrust::make_counting_iterator(0) /* dummy */, + thrust::make_counting_iterator(0) /* dummy */, + [col_components = GraphViewType::is_multi_gpu ? col_components.data() : level_components, + col_first = level_graph_view.get_local_adj_matrix_partition_col_first(), + edge_buffer_first = + get_dataframe_buffer_begin>(edge_buffer), + num_edge_inserts = num_edge_inserts.data()] __device__(auto tagged_src, + vertex_t dst, + auto src_val, + auto dst_val) { + auto tag = thrust::get<1>(tagged_src); + auto col_offset = dst - col_first; + // FIXME: better switch to atomic_ref after + // https://github.com/nvidia/libcudacxx/milestone/2 + auto old = + atomicCAS(col_components + col_offset, invalid_component_id::value, tag); + if (old != invalid_component_id::value && old != tag) { // conflict + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), + static_cast(1)); + // keep only the edges in the lower triangular part + *(edge_buffer_first + edge_idx) = + tag >= old ? thrust::make_tuple(tag, old) : thrust::make_tuple(old, tag); + } + return (old == invalid_component_id::value) ? thrust::optional{tag} + : thrust::nullopt; + }, + reduce_op::null(), + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_discard_iterator() /* dummy */, + v_op_t{ + vertex_partition, + level_components, + get_dataframe_buffer_begin>(edge_buffer), + num_edge_inserts.data(), + static_cast(Bucket::next), + static_cast(Bucket::conflict)}); + + if (GraphViewType::is_multi_gpu) { + auto cur_num_edge_inserts = num_edge_inserts.value(handle.get_stream_view()); + auto &conflict_bucket = vertex_frontier.get_bucket(static_cast(Bucket::conflict)); + resize_dataframe_buffer>( + edge_buffer, cur_num_edge_inserts + conflict_bucket.size(), handle.get_stream()); + thrust::for_each( + rmm::exec_policy(handle.get_stream_view()), + conflict_bucket.begin(), + conflict_bucket.end(), + [vertex_partition, + level_components, + edge_buffer_first = + get_dataframe_buffer_begin>(edge_buffer), + num_edge_inserts = num_edge_inserts.data()] __device__(auto tagged_v) { + auto v_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck( + thrust::get<0>(tagged_v)); + auto old = *(level_components + v_offset); + auto tag = thrust::get<1>(tagged_v); + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto edge_idx = atomicAdd(reinterpret_cast(num_edge_inserts), + static_cast(1)); + // keep only the edges in the lower triangular part + *(edge_buffer_first + edge_idx) = + tag >= old ? thrust::make_tuple(tag, old) : thrust::make_tuple(old, tag); + }); + conflict_bucket.clear(); + } + + // maintain the list of sorted unique edges (we can avoid this if we use cuco::static_map(no + // duplicates, ideally we need static_set)). + auto new_num_edge_inserts = num_edge_inserts.value(handle.get_stream_view()); + if (new_num_edge_inserts > old_num_edge_inserts) { + auto edge_first = + get_dataframe_buffer_begin>(edge_buffer); + thrust::sort(rmm::exec_policy(handle.get_stream_view()), + edge_first + old_num_edge_inserts, + edge_first + new_num_edge_inserts); + if (old_num_edge_inserts > 0) { + auto tmp_edge_buffer = allocate_dataframe_buffer>( + new_num_edge_inserts, handle.get_stream()); + auto tmp_edge_first = + get_dataframe_buffer_begin>(tmp_edge_buffer); + thrust::merge(rmm::exec_policy(handle.get_stream_view()), + edge_first, + edge_first + old_num_edge_inserts, + edge_first + old_num_edge_inserts, + edge_first + new_num_edge_inserts, + tmp_edge_first); + edge_buffer = std::move(tmp_edge_buffer); + } + edge_first = get_dataframe_buffer_begin>(edge_buffer); + auto unique_edge_last = thrust::unique(rmm::exec_policy(handle.get_stream_view()), + edge_first, + edge_first + new_num_edge_inserts); + auto num_unique_edges = static_cast(thrust::distance(edge_first, unique_edge_last)); + num_edge_inserts.set_value(num_unique_edges, handle.get_stream_view()); + } + + vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); + vertex_frontier.get_bucket(static_cast(Bucket::cur)).shrink_to_fit(); + vertex_frontier.swap_buckets(static_cast(Bucket::cur), + static_cast(Bucket::next)); + edge_count = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream_view()), + thrust::get<0>(vertex_frontier.get_bucket(static_cast(Bucket::cur)) + .begin() + .get_iterator_tuple()), + thrust::get<0>( + vertex_frontier.get_bucket(static_cast(Bucket::cur)).end().get_iterator_tuple()), + [vertex_partition, degrees = degrees.data()] __device__(auto v) { + return degrees[vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)]; + }, + edge_t{0}, + thrust::plus()); + + ++iter; + } + + // 2-5. construct the next level graph from the edges emitted on conflicts + + auto num_inserts = num_edge_inserts.value(handle.get_stream_view()); + auto aggregate_num_inserts = num_inserts; + if (GraphViewType::is_multi_gpu) { + auto &comm = handle.get_comms(); + aggregate_num_inserts = host_scalar_allreduce(comm, num_inserts, handle.get_stream()); + } + + if (aggregate_num_inserts > 0) { + resize_dataframe_buffer>( + edge_buffer, static_cast(num_inserts * 2), handle.get_stream()); + auto input_first = get_dataframe_buffer_begin>(edge_buffer); + auto output_first = thrust::make_zip_iterator( + thrust::make_tuple(thrust::get<1>(input_first.get_iterator_tuple()), + thrust::get<0>(input_first.get_iterator_tuple()))) + + num_inserts; + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + input_first, + input_first + num_inserts, + output_first); + + if (GraphViewType::is_multi_gpu) { + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + std::tie(edge_buffer, std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, + get_dataframe_buffer_begin>(edge_buffer), + get_dataframe_buffer_end>(edge_buffer), + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + comm_size, row_comm_size, col_comm_size}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + auto edge_first = + get_dataframe_buffer_begin>(edge_buffer); + auto edge_last = get_dataframe_buffer_end>(edge_buffer); + thrust::sort(rmm::exec_policy(handle.get_stream_view()), edge_first, edge_last); + auto unique_edge_last = + thrust::unique(rmm::exec_policy(handle.get_stream_view()), edge_first, edge_last); + resize_dataframe_buffer>( + edge_buffer, + static_cast(thrust::distance(edge_first, unique_edge_last)), + handle.get_stream()); + shrink_to_fit_dataframe_buffer>(edge_buffer, + handle.get_stream()); + } + + std::tie(level_graph, level_renumber_map) = + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(std::get<0>(edge_buffer)), + std::move(std::get<1>(edge_buffer)), + rmm::device_uvector(size_t{0}, handle.get_stream_view()), + graph_properties_t{true, false, false}, + true); + } else { + break; + } + } + + // 3. recursive update the current level component IDs from the next level component IDs + + for (size_t i = 0; i < num_levels - 1; ++i) { + size_t next_level = num_levels - 1 - i; + size_t current_level = next_level - 1; + + rmm::device_uvector next_local_vertices(level_renumber_map_vectors[next_level].size(), + handle.get_stream_view()); + thrust::sequence(rmm::exec_policy(handle.get_stream_view()), + next_local_vertices.begin(), + next_local_vertices.end(), + level_local_vertex_first_vectors[next_level]); + relabel( + handle, + std::make_tuple(next_local_vertices.data(), level_renumber_map_vectors[next_level].data()), + next_local_vertices.size(), + level_component_vectors[next_level].data(), + level_component_vectors[next_level].size(), + false); + relabel( + handle, + std::make_tuple(level_renumber_map_vectors[next_level].data(), + level_component_vectors[next_level].data()), + level_renumber_map_vectors[next_level].size(), + current_level == 0 ? components : level_component_vectors[current_level].data(), + current_level == 0 ? push_graph_view.get_number_of_local_vertices() + : level_component_vectors[current_level].size(), + true); + } +} + +} // namespace + +template +void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t *components, + bool do_expensive_check) +{ + weakly_connected_components_impl(handle, graph_view, components, do_expensive_check); +} + +// explicit instantiation + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t *components, + bool do_expensive_check); + +template void weakly_connected_components( + raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t *components, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/converters/COOtoCSR.cu b/cpp/src/converters/COOtoCSR.cu index 787872742e9..9164d7b9562 100644 --- a/cpp/src/converters/COOtoCSR.cu +++ b/cpp/src/converters/COOtoCSR.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include "COOtoCSR.cuh" namespace cugraph { diff --git a/cpp/src/converters/COOtoCSR.cuh b/cpp/src/converters/COOtoCSR.cuh index b110e02a513..2876f1ccf52 100644 --- a/cpp/src/converters/COOtoCSR.cuh +++ b/cpp/src/converters/COOtoCSR.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -31,14 +31,14 @@ #include #include -#include +#include #include #include -#include +#include -#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/converters/permute_graph.cuh b/cpp/src/converters/permute_graph.cuh index b5b2de83e9b..aa64cf5ae11 100644 --- a/cpp/src/converters/permute_graph.cuh +++ b/cpp/src/converters/permute_graph.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,10 @@ * limitations under the License. */ #include -#include -#include +#include +#include +#include #include "converters/COOtoCSR.cuh" -#include "utilities/graph_utils.cuh" namespace cugraph { namespace detail { diff --git a/cpp/src/converters/renumber.cuh b/cpp/src/converters/renumber.cuh index 263d7199c10..ccf4e6f62c2 100644 --- a/cpp/src/converters/renumber.cuh +++ b/cpp/src/converters/renumber.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,9 +30,9 @@ #include #include -#include +#include +#include #include "sort/bitonic.cuh" -#include "utilities/graph_utils.cuh" namespace cugraph { namespace detail { diff --git a/cpp/src/cores/core_number.cu b/cpp/src/cores/core_number.cu index 091ba07ccc6..419232e8deb 100644 --- a/cpp/src/cores/core_number.cu +++ b/cpp/src/cores/core_number.cu @@ -17,8 +17,8 @@ #include #include #include -#include -#include +#include +#include //#include namespace cugraph { diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 2a703c1c85e..817e9cbd225 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -14,13 +14,13 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -30,6 +30,7 @@ #include #include #include +#include #include #include @@ -91,7 +92,10 @@ void bfs(raft::handle_t const &handle, // 3. initialize BFS frontier enum class Bucket { cur, next, num_buckets }; - VertexFrontier(Bucket::num_buckets)> + VertexFrontier(Bucket::num_buckets)> vertex_frontier(handle); if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { @@ -123,15 +127,18 @@ void bfs(raft::handle_t const &handle, *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(dst)); if (distance != invalid_distance) { push = false; } } - return thrust::make_tuple(push, src); + return push ? thrust::optional{src} : thrust::nullopt; }, reduce_op::any(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - [depth] __device__(auto v_val, auto pushed_val) { - auto idx = (v_val == invalid_distance) ? static_cast(Bucket::next) - : VertexFrontier::kInvalidBucketIdx; - return thrust::make_tuple(idx, thrust::make_tuple(depth + 1, pushed_val)); + [depth] __device__(auto v, auto v_val, auto pushed_val) { + return (v_val == invalid_distance) + ? thrust::optional< + thrust::tuple>>{thrust::make_tuple( + static_cast(Bucket::next), + thrust::make_tuple(depth + 1, pushed_val))} + : thrust::nullopt; }); vertex_frontier.get_bucket(static_cast(Bucket::cur)).clear(); diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu index 6397f92e336..e648691f8b1 100644 --- a/cpp/src/experimental/coarsen_graph.cu +++ b/cpp/src/experimental/coarsen_graph.cu @@ -14,14 +14,14 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -284,12 +284,14 @@ coarsen_graph( store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols(i) : graph_view.get_number_of_local_adj_matrix_partition_rows(i), handle.get_stream()); - // FIXME: this copy is unnecessary, beter fix RAFT comm's bcast to take const iterators for - // input - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - labels, - labels + major_labels.size(), - major_labels.begin()); + if (col_comm_rank == static_cast(i)) { + // FIXME: this copy is unnecessary, beter fix RAFT comm's bcast to take const iterators for + // input + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + major_labels.size(), + major_labels.begin()); + } device_bcast(col_comm, major_labels.data(), major_labels.data(), @@ -455,7 +457,7 @@ coarsen_graph( cur_size; thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), src_edge_first, - src_edge_first + edgelist_major_vertices.size(), + src_edge_first + number_of_partition_edges, dst_edge_first); } } @@ -539,13 +541,14 @@ coarsen_graph( counts[i] = static_cast(coarsened_edgelist_major_vertices[i].size()); } std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = - renumber_edgelist(handle, - unique_labels.data(), - static_cast(unique_labels.size()), - major_ptrs, - minor_ptrs, - counts, - do_expensive_check); + renumber_edgelist( + handle, + std::optional>{ + std::make_tuple(unique_labels.data(), static_cast(unique_labels.size()))}, + major_ptrs, + minor_ptrs, + counts, + do_expensive_check); } // 5. build a graph @@ -631,8 +634,8 @@ coarsen_graph( auto renumber_map_labels = renumber_edgelist( handle, - unique_labels.data(), - static_cast(unique_labels.size()), + std::optional>{ + std::make_tuple(unique_labels.data(), static_cast(unique_labels.size()))}, coarsened_edgelist_major_vertices.data(), coarsened_edgelist_minor_vertices.data(), static_cast(coarsened_edgelist_major_vertices.size()), diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 18db57a737f..ad6f51d75fe 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -14,11 +14,11 @@ * limitations under the License. */ -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -295,8 +295,9 @@ graph_t::max())); rmm::device_uvector d_thresholds(detail::num_segments_per_vertex_partition - 1, default_stream); - std::vector h_thresholds = {static_cast(detail::mid_degree_threshold), - static_cast(detail::low_degree_threshold)}; + std::vector h_thresholds = { + static_cast(detail::mid_degree_threshold * col_comm_size), + static_cast(detail::low_degree_threshold * col_comm_size)}; raft::update_device( d_thresholds.data(), h_thresholds.data(), h_thresholds.size(), default_stream); @@ -516,4 +517,4 @@ template class graph_t; } // namespace experimental } // namespace cugraph -#include +#include diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 67603ae260b..3dc5dee4756 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -14,12 +14,12 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include @@ -534,9 +534,13 @@ graph_view_ton(handle.get_stream()), in_degrees.begin(), in_degrees.end()); - rmm::device_scalar ret(handle.get_stream()); - device_allreduce( - handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + rmm::device_scalar ret(edge_t{0}, handle.get_stream()); + device_allreduce(handle.get_comms(), + it != in_degrees.end() ? it : ret.data(), + ret.data(), + 1, + raft::comms::op_t::MAX, + handle.get_stream()); return ret.value(handle.get_stream()); } @@ -557,8 +561,8 @@ edge_t graph_view_ton(handle.get_stream()), in_degrees.begin(), in_degrees.end()); - edge_t ret{}; - raft::update_host(&ret, it, 1, handle.get_stream()); + edge_t ret{0}; + if (it != in_degrees.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); return ret; } @@ -576,9 +580,13 @@ graph_view_ton(handle.get_stream()), out_degrees.begin(), out_degrees.end()); - rmm::device_scalar ret(handle.get_stream()); - device_allreduce( - handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + rmm::device_scalar ret(edge_t{0}, handle.get_stream()); + device_allreduce(handle.get_comms(), + it != out_degrees.end() ? it : ret.data(), + ret.data(), + 1, + raft::comms::op_t::MAX, + handle.get_stream()); return ret.value(handle.get_stream()); } @@ -599,8 +607,8 @@ edge_t graph_view_ton(handle.get_stream()), out_degrees.begin(), out_degrees.end()); - edge_t ret{}; - raft::update_host(&ret, it, 1, handle.get_stream()); + edge_t ret{0}; + if (it != out_degrees.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); return ret; } @@ -618,9 +626,13 @@ graph_view_ton(handle.get_stream()), in_weight_sums.begin(), in_weight_sums.end()); - rmm::device_scalar ret(handle.get_stream()); - device_allreduce( - handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + rmm::device_scalar ret(weight_t{0.0}, handle.get_stream()); + device_allreduce(handle.get_comms(), + it != in_weight_sums.end() ? it : ret.data(), + ret.data(), + 1, + raft::comms::op_t::MAX, + handle.get_stream()); return ret.value(handle.get_stream()); } @@ -641,8 +653,8 @@ weight_t graph_view_ton(handle.get_stream()), in_weight_sums.begin(), in_weight_sums.end()); - weight_t ret{}; - raft::update_host(&ret, it, 1, handle.get_stream()); + weight_t ret{0.0}; + if (it != in_weight_sums.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); return ret; } @@ -660,9 +672,13 @@ graph_view_ton(handle.get_stream()), out_weight_sums.begin(), out_weight_sums.end()); - rmm::device_scalar ret(handle.get_stream()); - device_allreduce( - handle.get_comms(), it, ret.data(), 1, raft::comms::op_t::MAX, handle.get_stream()); + rmm::device_scalar ret(weight_t{0.0}, handle.get_stream()); + device_allreduce(handle.get_comms(), + it != out_weight_sums.end() ? it : ret.data(), + ret.data(), + 1, + raft::comms::op_t::MAX, + handle.get_stream()); return ret.value(handle.get_stream()); } @@ -683,8 +699,8 @@ weight_t graph_view_t< auto it = thrust::max_element(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), out_weight_sums.begin(), out_weight_sums.end()); - weight_t ret{}; - raft::update_host(&ret, it, 1, handle.get_stream()); + weight_t ret{0.0}; + if (it != out_weight_sums.end()) { raft::update_host(&ret, it, 1, handle.get_stream()); } handle.get_stream_view().synchronize(); return ret; } diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu index 5cda36ad7e2..062bf18cd95 100644 --- a/cpp/src/experimental/induced_subgraph.cu +++ b/cpp/src/experimental/induced_subgraph.cu @@ -14,11 +14,11 @@ * limitations under the License. */ -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 7ffef5053af..ad62f5e9d68 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -14,13 +14,13 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 24914fb028b..6205f13e94d 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -15,18 +15,18 @@ */ #pragma once -#include +#include -#include -#include +#include +#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -201,6 +201,8 @@ class Louvain { timer_start("compute_vertex_and_cluster_weights"); vertex_weights_v_ = current_graph_view_.compute_out_weight_sums(handle_); + cluster_keys_v_.resize(vertex_weights_v_.size(), handle_.get_stream()); + cluster_weights_v_.resize(vertex_weights_v_.size(), handle_.get_stream()); thrust::sequence(rmm::exec_policy(handle_.get_stream())->on(handle_.get_stream()), cluster_keys_v_.begin(), @@ -363,7 +365,6 @@ class Louvain { rmm::device_uvector &next_cluster_v, bool up_down) { -#ifdef CUCO_STATIC_MAP_DEFINED rmm::device_uvector old_cluster_sum_v( current_graph_view_.get_number_of_local_vertices(), handle_.get_stream()); rmm::device_uvector cluster_subtract_v( @@ -499,7 +500,6 @@ class Louvain { d_src_cluster_cache_, [] __device__(auto src, auto dst, auto wt, auto x, auto y) { return wt; }, weight_t{0}); -#endif } void shrink_graph() @@ -525,7 +525,8 @@ class Louvain { static_cast(numbering_indices.begin())), current_graph_view_.get_number_of_local_vertices(), dendrogram_->current_level_begin(), - dendrogram_->current_level_size()); + dendrogram_->current_level_size(), + false); timer_stop(handle_.get_stream()); } diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index e5874acb04f..db54783453e 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -14,17 +14,17 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu index 918feeb7a10..7e7a4d64b3e 100644 --- a/cpp/src/experimental/relabel.cu +++ b/cpp/src/experimental/relabel.cu @@ -14,19 +14,20 @@ * limitations under the License. */ -#include - -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include #include +#include +#include #include #include @@ -49,15 +50,11 @@ void relabel(raft::handle_t const& handle, vertex_t num_label_pairs, vertex_t* labels /* [INOUT] */, vertex_t num_labels, + bool skip_missing_labels, bool do_expensive_check) { double constexpr load_factor = 0.7; - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "Relabel not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -120,20 +117,21 @@ void relabel(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - cuco::static_map relabel_map{ - // cuco::static_map requires at least one empty slot - std::max( - static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), - rx_label_pair_old_labels.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value}; - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto poly_alloc = + rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = + rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + relabel_map{// cuco::static_map requires at least one empty slot + std::max(static_cast( + static_cast(rx_label_pair_old_labels.size()) / load_factor), + rx_label_pair_old_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())); relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); rx_label_pair_old_labels.resize(0, handle.get_stream()); @@ -156,11 +154,24 @@ void relabel(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - relabel_map.find( - rx_unique_old_labels.begin(), - rx_unique_old_labels.end(), - rx_unique_old_labels - .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels + if (skip_missing_labels) { + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels.begin(), + [view = relabel_map.get_device_view()] __device__(auto old_label) { + auto found = view.find(old_label); + return found != view.end() ? view.find(old_label)->second.load( + cuda::std::memory_order_relaxed) + : old_label; + }); + } else { + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels.begin()); // now rx_unique_old_lables hold new labels for the + // corresponding old labels + } std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); @@ -169,22 +180,25 @@ void relabel(raft::handle_t const& handle, handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream - cuco::static_map relabel_map( - // cuco::static_map requires at least one empty slot - std::max(static_cast(static_cast(unique_old_labels.size()) / load_factor), - unique_old_labels.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value); - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - - relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); - relabel_map.find(labels, labels + num_labels, labels); + { + auto poly_alloc = + rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = + rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + relabel_map{ + // cuco::static_map requires at least one empty slot + std::max(static_cast(static_cast(unique_old_labels.size()) / load_factor), + unique_old_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())); + relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); + relabel_map.find(labels, labels + num_labels, labels); + } } else { cuco::static_map relabel_map( // cuco::static_map requires at least one empty slot @@ -193,18 +207,26 @@ void relabel(raft::handle_t const& handle, invalid_vertex_id::value, invalid_vertex_id::value); - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(std::get<0>(old_new_label_pairs), std::get<1>(old_new_label_pairs))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); - + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(std::get<0>(old_new_label_pairs), std::get<1>(old_new_label_pairs))); relabel_map.insert(pair_first, pair_first + num_label_pairs); - relabel_map.find(labels, labels + num_labels, labels); + if (skip_missing_labels) { + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + num_labels, + labels, + [view = relabel_map.get_device_view()] __device__(auto old_label) { + auto found = view.find(old_label); + return found != view.end() ? view.find(old_label)->second.load( + cuda::std::memory_order_relaxed) + : old_label; + }); + } else { + relabel_map.find(labels, labels + num_labels, labels); + } } - if (do_expensive_check) { + if (do_expensive_check && !skip_missing_labels) { CUGRAPH_EXPECTS( thrust::count(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), labels, @@ -212,7 +234,6 @@ void relabel(raft::handle_t const& handle, invalid_vertex_id::value) == 0, "Invalid input argument: labels include old label values missing in old_new_label_pairs."); } -#endif return; } @@ -224,6 +245,7 @@ template void relabel(raft::handle_t const& handle, int32_t num_label_pairs, int32_t* labels, int32_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel( @@ -232,6 +254,7 @@ template void relabel( int32_t num_label_pairs, int32_t* labels, int32_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel(raft::handle_t const& handle, @@ -239,6 +262,7 @@ template void relabel(raft::handle_t const& handle, int64_t num_label_pairs, int64_t* labels, int64_t num_labels, + bool skip_missing_labels, bool do_expensive_check); template void relabel( @@ -247,6 +271,7 @@ template void relabel( int64_t num_label_pairs, int64_t* labels, int64_t num_labels, + bool skip_missing_labels, bool do_expensive_check); } // namespace experimental diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu index 01022e8fa6d..d6e3f8c93f6 100644 --- a/cpp/src/experimental/renumber_edgelist.cu +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -14,20 +14,21 @@ * limitations under the License. */ -#include - -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include #include +#include #include #include +#include +#include #include #include @@ -45,12 +46,10 @@ namespace cugraph { namespace experimental { namespace detail { -#ifdef CUCO_STATIC_MAP_DEFINED template rmm::device_uvector compute_renumber_map( raft::handle_t const& handle, - vertex_t const* vertices, - vertex_t num_local_vertices /* relevant only if vertices != nullptr */, + std::optional> optional_vertex_span, std::vector const& edgelist_major_vertices, std::vector const& edgelist_minor_vertices, std::vector const& edgelist_edge_counts) @@ -116,9 +115,6 @@ rmm::device_uvector compute_renumber_map( rmm::device_uvector rx_major_labels(0, handle.get_stream()); rmm::device_uvector rx_major_counts(0, handle.get_stream()); - // FIXME: a temporary workaround for a NCCL (2.9.6) bug that causes a hang on DGX1 (due to - // remote memory allocation), this barrier is unnecessary otherwise. - col_comm.barrier(); auto rx_sizes = host_scalar_gather( col_comm, tmp_major_labels.size(), static_cast(i), handle.get_stream()); std::vector rx_displs{}; @@ -290,18 +286,19 @@ rmm::device_uvector compute_renumber_map( // 4. if vertices != nullptr, add isolated vertices rmm::device_uvector isolated_vertices(0, handle.get_stream()); - if (vertices != nullptr) { - auto num_isolated_vertices = thrust::count_if( + if (optional_vertex_span) { + auto [vertices, num_vertices] = *optional_vertex_span; + auto num_isolated_vertices = thrust::count_if( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertices, - vertices + num_local_vertices, + vertices + num_vertices, [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { return !thrust::binary_search(thrust::seq, label_first, label_last, v); }); isolated_vertices.resize(num_isolated_vertices, handle.get_stream()); thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertices, - vertices + num_local_vertices, + vertices + num_vertices, isolated_vertices.begin(), [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { return !thrust::binary_search(thrust::seq, label_first, label_last, v); @@ -335,27 +332,29 @@ rmm::device_uvector compute_renumber_map( template void expensive_check_edgelist( raft::handle_t const& handle, - vertex_t const* local_vertices, - vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, + std::optional> optional_vertex_span, std::vector const& edgelist_major_vertices, std::vector const& edgelist_minor_vertices, std::vector const& edgelist_edge_counts) { - rmm::device_uvector sorted_local_vertices( - local_vertices != nullptr ? num_local_vertices : vertex_t{0}, handle.get_stream()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - local_vertices, - local_vertices + num_local_vertices, - sorted_local_vertices.begin()); - thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - sorted_local_vertices.begin(), - sorted_local_vertices.end()); - CUGRAPH_EXPECTS(static_cast(thrust::distance( - sorted_local_vertices.begin(), - thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - sorted_local_vertices.begin(), - sorted_local_vertices.end()))) == sorted_local_vertices.size(), - "Invalid input argument: local_vertices should not have duplicates."); + rmm::device_uvector sorted_local_vertices(size_t{0}, handle.get_stream()); + if (optional_vertex_span) { + auto [vertices, num_vertices] = *optional_vertex_span; + sorted_local_vertices.resize(num_vertices, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_vertices, + sorted_local_vertices.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_local_vertices.begin(), + sorted_local_vertices.end()); + CUGRAPH_EXPECTS(static_cast(thrust::distance( + sorted_local_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_local_vertices.begin(), + sorted_local_vertices.end()))) == sorted_local_vertices.size(), + "Invalid input argument: local_vertices should not have duplicates."); + } if (multi_gpu) { auto& comm = handle.get_comms(); @@ -373,6 +372,7 @@ void expensive_check_edgelist( "Invalid input argument: both edgelist_major_vertices.size() & " "edgelist_minor_vertices.size() should coincide with col_comm_size."); + auto [local_vertices, num_local_vertices] = *optional_vertex_span; CUGRAPH_EXPECTS( thrust::count_if( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -411,12 +411,7 @@ void expensive_check_edgelist( "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " "pre-shuffled."); - auto aggregate_vertexlist_size = host_scalar_allreduce( - comm, - local_vertices != nullptr ? num_local_vertices : vertex_t{0}, - handle.get_stream()); // local_vertices != nullptr is insufficient in multi-GPU as only a - // subset of GPUs may have a non-zero vertices - if (aggregate_vertexlist_size > 0) { + if (optional_vertex_span) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); @@ -521,48 +516,42 @@ void expensive_check_edgelist( assert(edgelist_major_vertices.size() == 1); assert(edgelist_minor_vertices.size() == 1); - if (local_vertices != nullptr) { + if (optional_vertex_span) { auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist_major_vertices[0], edgelist_minor_vertices[0])); CUGRAPH_EXPECTS( - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - edge_first, - edge_first + edgelist_edge_counts[0], - [num_local_vertices, - sorted_local_vertices = sorted_local_vertices.data()] __device__(auto e) { - return !thrust::binary_search(thrust::seq, - sorted_local_vertices, - sorted_local_vertices + num_local_vertices, - thrust::get<0>(e)) || - !thrust::binary_search(thrust::seq, - sorted_local_vertices, - sorted_local_vertices + num_local_vertices, - thrust::get<1>(e)); - }) == 0, + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_edge_counts[0], + [sorted_local_vertices = sorted_local_vertices.data(), + num_sorted_local_vertices = + static_cast(sorted_local_vertices.size())] __device__(auto e) { + return !thrust::binary_search(thrust::seq, + sorted_local_vertices, + sorted_local_vertices + num_sorted_local_vertices, + thrust::get<0>(e)) || + !thrust::binary_search(thrust::seq, + sorted_local_vertices, + sorted_local_vertices + num_sorted_local_vertices, + thrust::get<1>(e)); + }) == 0, "Invalid input argument: edgelist_major_vertices and/or edgelist_minor_vertices have " "invalid vertex ID(s)."); } } } -#endif template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, - vertex_t const* local_vertices, - vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of enumber_edgelist not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); auto const comm_rank = comm.get_rank(); @@ -582,8 +571,7 @@ renumber_edgelist(raft::handle_t const& handle, if (do_expensive_check) { expensive_check_edgelist(handle, - local_vertices, - num_local_vertices, + optional_local_vertex_span, edgelist_const_major_vertices, edgelist_const_minor_vertices, edgelist_edge_counts); @@ -593,8 +581,7 @@ renumber_edgelist(raft::handle_t const& handle, auto renumber_map_labels = detail::compute_renumber_map(handle, - local_vertices, - num_local_vertices, + optional_local_vertex_span, edgelist_const_major_vertices, edgelist_const_minor_vertices, edgelist_edge_counts); @@ -650,21 +637,21 @@ renumber_edgelist(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - cuco::static_map renumber_map{ - // cuco::static_map requires at least one empty slot - std::max(static_cast( - static_cast(partition.get_matrix_partition_major_size(i)) / load_factor), - static_cast(partition.get_matrix_partition_major_size(i)) + 1), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple( - col_comm_rank == static_cast(i) ? renumber_map_labels.begin() - : renumber_map_major_labels.begin(), - thrust::make_counting_iterator(partition.get_matrix_partition_major_first(i)))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + renumber_map{ + // cuco::static_map requires at least one empty slot + std::max(static_cast( + static_cast(partition.get_matrix_partition_major_size(i)) / load_factor), + static_cast(partition.get_matrix_partition_major_size(i)) + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + col_comm_rank == static_cast(i) ? renumber_map_labels.begin() + : renumber_map_major_labels.begin(), + thrust::make_counting_iterator(partition.get_matrix_partition_major_first(i)))); renumber_map.insert(pair_first, pair_first + partition.get_matrix_partition_major_size(i)); renumber_map.find(edgelist_major_vertices[i], edgelist_major_vertices[i] + edgelist_edge_counts[i], @@ -700,20 +687,19 @@ renumber_edgelist(raft::handle_t const& handle, CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // cuco::static_map currently does not take stream - cuco::static_map renumber_map{ - // cuco::static_map requires at least one empty slot - std::max( - static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), - renumber_map_minor_labels.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple( - renumber_map_minor_labels.begin(), - thrust::make_counting_iterator(partition.get_matrix_partition_minor_first()))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + renumber_map{// cuco::static_map requires at least one empty slot + std::max(static_cast( + static_cast(renumber_map_minor_labels.size()) / load_factor), + renumber_map_minor_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + renumber_map_minor_labels.begin(), + thrust::make_counting_iterator(partition.get_matrix_partition_minor_first()))); renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); for (size_t i = 0; i < edgelist_major_vertices.size(); ++i) { renumber_map.find(edgelist_minor_vertices[i], @@ -734,35 +720,21 @@ renumber_edgelist(raft::handle_t const& handle, return std::make_tuple( std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); -#else - return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), - partition_t{}, - vertex_t{0}, - edge_t{0}); -#endif } template std::enable_if_t> renumber_edgelist( raft::handle_t const& handle, - vertex_t const* vertices, - vertex_t num_vertices /* relevant only if vertices != nullptr */, + std::optional> optional_vertex_span, vertex_t* edgelist_major_vertices /* [INOUT] */, vertex_t* edgelist_minor_vertices /* [INOUT] */, edge_t num_edgelist_edges, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { expensive_check_edgelist( handle, - vertices, - num_vertices, + optional_vertex_span, std::vector{edgelist_major_vertices}, std::vector{edgelist_minor_vertices}, std::vector{num_edgelist_edges}); @@ -770,8 +742,7 @@ std::enable_if_t> renumber_edgelist( auto renumber_map_labels = detail::compute_renumber_map( handle, - vertices, - num_vertices, + optional_vertex_span, std::vector{edgelist_major_vertices}, std::vector{edgelist_minor_vertices}, std::vector{num_edgelist_edges}); @@ -781,18 +752,18 @@ std::enable_if_t> renumber_edgelist( // FIXME: compare this hash based approach with a binary search based approach in both memory // footprint and execution time - cuco::static_map renumber_map{ - // cuco::static_map requires at least one empty slot - std::max(static_cast(static_cast(renumber_map_labels.size()) / load_factor), - renumber_map_labels.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value}; - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + renumber_map{ + // cuco::static_map requires at least one empty slot + std::max(static_cast(static_cast(renumber_map_labels.size()) / load_factor), + renumber_map_labels.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))); renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); renumber_map.find( edgelist_major_vertices, edgelist_major_vertices + num_edgelist_edges, edgelist_major_vertices); @@ -800,9 +771,6 @@ std::enable_if_t> renumber_edgelist( edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); return renumber_map_labels; -#else - return rmm::device_uvector(0, handle.get_stream()); -#endif } } // namespace detail @@ -811,18 +779,14 @@ template std::enable_if_t, partition_t, vertex_t, edge_t>> renumber_edgelist(raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, - static_cast(nullptr), - vertex_t{0}, + optional_local_vertex_span, edgelist_major_vertices, edgelist_minor_vertices, edgelist_edge_counts, @@ -832,65 +796,14 @@ renumber_edgelist(raft::handle_t const& handle, template std::enable_if_t> renumber_edgelist( raft::handle_t const& handle, + std::optional> optional_vertex_span, vertex_t* edgelist_major_vertices /* [INOUT] */, vertex_t* edgelist_minor_vertices /* [INOUT] */, edge_t num_edgelist_edges, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); return detail::renumber_edgelist(handle, - static_cast(nullptr), - vertex_t{0} /* dummy */, - edgelist_major_vertices, - edgelist_minor_vertices, - num_edgelist_edges, - do_expensive_check); -} - -template -std::enable_if_t, partition_t, vertex_t, edge_t>> -renumber_edgelist(raft::handle_t const& handle, - vertex_t const* local_vertices, - vertex_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check) -{ - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); - return detail::renumber_edgelist(handle, - local_vertices, - num_local_vertices, - edgelist_major_vertices, - edgelist_minor_vertices, - edgelist_edge_counts, - do_expensive_check); -} - -template -std::enable_if_t> renumber_edgelist( - raft::handle_t const& handle, - vertex_t const* vertices, - vertex_t num_vertices, - vertex_t* edgelist_major_vertices /* [INOUT] */, - vertex_t* edgelist_minor_vertices /* [INOUT] */, - edge_t num_edgelist_edges, - bool do_expensive_check) -{ - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS( - handle.get_device_properties().major >= 7, - "This version of renumber_edgelist not supported on Pascal and older architectures."); - return detail::renumber_edgelist(handle, - vertices, - num_vertices, + optional_vertex_span, edgelist_major_vertices, edgelist_minor_vertices, num_edgelist_edges, @@ -899,11 +812,13 @@ std::enable_if_t> renumber_edgelist( // explicit instantiation directives (EIDir's): // + // instantiations for // template std::tuple, partition_t, int32_t, int32_t> renumber_edgelist( raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -911,25 +826,7 @@ renumber_edgelist( template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int32_t num_edgelist_edges, - bool do_expensive_check); - -template std::tuple, partition_t, int32_t, int32_t> -renumber_edgelist( - raft::handle_t const& handle, - int32_t const* local_vertices, - int32_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( - raft::handle_t const& handle, - int32_t const* vertices, - int32_t num_vertices, + std::optional> optional_vertex_span, int32_t* edgelist_major_vertices /* [INOUT] */, int32_t* edgelist_minor_vertices /* [INOUT] */, int32_t num_edgelist_edges, @@ -940,6 +837,7 @@ template rmm::device_uvector renumber_edgelist template std::tuple, partition_t, int32_t, int64_t> renumber_edgelist( raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -947,25 +845,7 @@ renumber_edgelist( template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, - int32_t* edgelist_major_vertices /* [INOUT] */, - int32_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool do_expensive_check); - -template std::tuple, partition_t, int32_t, int64_t> -renumber_edgelist( - raft::handle_t const& handle, - int32_t const* local_vertices, - int32_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( - raft::handle_t const& handle, - int32_t const* vertices, - int32_t num_vertices, + std::optional> optional_vertex_span, int32_t* edgelist_major_vertices /* [INOUT] */, int32_t* edgelist_minor_vertices /* [INOUT] */, int64_t num_edgelist_edges, @@ -976,6 +856,7 @@ template rmm::device_uvector renumber_edgelist template std::tuple, partition_t, int64_t, int64_t> renumber_edgelist( raft::handle_t const& handle, + std::optional> optional_local_vertex_span, std::vector const& edgelist_major_vertices /* [INOUT] */, std::vector const& edgelist_minor_vertices /* [INOUT] */, std::vector const& edgelist_edge_counts, @@ -983,25 +864,7 @@ renumber_edgelist( template rmm::device_uvector renumber_edgelist( raft::handle_t const& handle, - int64_t* edgelist_major_vertices /* [INOUT] */, - int64_t* edgelist_minor_vertices /* [INOUT] */, - int64_t num_edgelist_edges, - bool do_expensive_check); - -template std::tuple, partition_t, int64_t, int64_t> -renumber_edgelist( - raft::handle_t const& handle, - int64_t const* local_vertices, - int64_t num_local_vertices, - std::vector const& edgelist_major_vertices /* [INOUT] */, - std::vector const& edgelist_minor_vertices /* [INOUT] */, - std::vector const& edgelist_edge_counts, - bool do_expensive_check); - -template rmm::device_uvector renumber_edgelist( - raft::handle_t const& handle, - int64_t const* vertices, - int64_t num_vertices, + std::optional> optional_vertex_span, int64_t* edgelist_major_vertices /* [INOUT] */, int64_t* edgelist_minor_vertices /* [INOUT] */, int64_t num_edgelist_edges, diff --git a/cpp/src/experimental/renumber_utils.cu b/cpp/src/experimental/renumber_utils.cu index eef6ca88b3c..9cd2b9a1408 100644 --- a/cpp/src/experimental/renumber_utils.cu +++ b/cpp/src/experimental/renumber_utils.cu @@ -14,13 +14,15 @@ * limitations under the License. */ -#include +#include +#include +#include +#include +#include -#include -#include -#include -#include -#include +#include +#include +#include #include #include @@ -46,11 +48,6 @@ void renumber_ext_vertices(raft::handle_t const& handle, { double constexpr load_factor = 0.7; - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "renumber_vertices() not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { rmm::device_uvector labels(local_int_vertex_last - local_int_vertex_first, handle.get_stream()); @@ -66,8 +63,14 @@ void renumber_ext_vertices(raft::handle_t const& handle, "Invalid input arguments: renumber_map_labels have duplicate elements."); } - auto renumber_map_ptr = std::make_unique>( - size_t{0}, invalid_vertex_id::value, invalid_vertex_id::value); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + auto renumber_map_ptr = std::make_unique< + cuco::static_map>( + size_t{0}, + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter); if (multi_gpu) { auto& comm = handle.get_comms(); auto const comm_size = comm.get_size(); @@ -107,40 +110,36 @@ void renumber_ext_vertices(raft::handle_t const& handle, renumber_map_ptr.reset(); - renumber_map_ptr = std::make_unique>( + renumber_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max( static_cast(static_cast(sorted_unique_ext_vertices.size()) / load_factor), sorted_unique_ext_vertices.size() + 1), invalid_vertex_id::value, - invalid_vertex_id::value); - - auto kv_pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator(thrust::make_tuple( - sorted_unique_ext_vertices.begin(), int_vertices_for_sorted_unique_ext_vertices.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + invalid_vertex_id::value, + stream_adapter); + + auto kv_pair_first = thrust::make_zip_iterator(thrust::make_tuple( + sorted_unique_ext_vertices.begin(), int_vertices_for_sorted_unique_ext_vertices.begin())); renumber_map_ptr->insert(kv_pair_first, kv_pair_first + sorted_unique_ext_vertices.size()); } else { handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream renumber_map_ptr.reset(); - renumber_map_ptr = std::make_unique>( + renumber_map_ptr = std::make_unique< + cuco::static_map>( // cuco::static_map requires at least one empty slot std::max(static_cast( static_cast(local_int_vertex_last - local_int_vertex_first) / load_factor), static_cast(local_int_vertex_last - local_int_vertex_first) + 1), invalid_vertex_id::value, - invalid_vertex_id::value); - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(renumber_map_labels, thrust::make_counting_iterator(vertex_t{0}))), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + invalid_vertex_id::value, + stream_adapter); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels, thrust::make_counting_iterator(vertex_t{0}))); renumber_map_ptr->insert(pair_first, pair_first + (local_int_vertex_last - local_int_vertex_first)); } @@ -164,7 +163,6 @@ void renumber_ext_vertices(raft::handle_t const& handle, } renumber_map_ptr->find(vertices, vertices + num_vertices, vertices); -#endif } template @@ -177,11 +175,6 @@ void unrenumber_local_int_vertices( vertex_t local_int_vertex_last, bool do_expensive_check) { - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "unrenumber_local_vertices() not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { CUGRAPH_EXPECTS( thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -204,7 +197,6 @@ void unrenumber_local_int_vertices( ? v : renumber_map_labels[v - local_int_vertex_first]; }); -#endif } template @@ -214,16 +206,11 @@ void unrenumber_int_vertices(raft::handle_t const& handle, vertex_t const* renumber_map_labels, vertex_t local_int_vertex_first, vertex_t local_int_vertex_last, - std::vector& vertex_partition_lasts, + std::vector const& vertex_partition_lasts, bool do_expensive_check) { double constexpr load_factor = 0.7; - // FIXME: remove this check once we drop Pascal support - CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, - "unrenumber_vertices() not supported on Pascal and older architectures."); - -#ifdef CUCO_STATIC_MAP_DEFINED if (do_expensive_check) { CUGRAPH_EXPECTS( thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -306,21 +293,20 @@ void unrenumber_int_vertices(raft::handle_t const& handle, handle.get_stream_view().synchronize(); // cuco::static_map currently does not take stream - cuco::static_map unrenumber_map( - // cuco::static_map requires at least one empty slot - std::max( - static_cast(static_cast(sorted_unique_int_vertices.size()) / load_factor), - sorted_unique_int_vertices.size() + 1), - invalid_vertex_id::value, - invalid_vertex_id::value); - - auto pair_first = thrust::make_transform_iterator( - thrust::make_zip_iterator( - thrust::make_tuple(sorted_unique_int_vertices.begin(), - rx_ext_vertices_for_sorted_unique_int_vertices.begin())), - [] __device__(auto val) { - return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); - }); + auto poly_alloc = rmm::mr::polymorphic_allocator(rmm::mr::get_current_device_resource()); + auto stream_adapter = rmm::mr::make_stream_allocator_adaptor(poly_alloc, cudaStream_t{nullptr}); + cuco::static_map + unrenumber_map{ + // cuco::static_map requires at least one empty slot + std::max( + static_cast(static_cast(sorted_unique_int_vertices.size()) / load_factor), + sorted_unique_int_vertices.size() + 1), + invalid_vertex_id::value, + invalid_vertex_id::value, + stream_adapter}; + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple( + sorted_unique_int_vertices.begin(), rx_ext_vertices_for_sorted_unique_int_vertices.begin())); unrenumber_map.insert(pair_first, pair_first + sorted_unique_int_vertices.size()); unrenumber_map.find(vertices, vertices + num_vertices, vertices); } else { @@ -332,7 +318,6 @@ void unrenumber_int_vertices(raft::handle_t const& handle, local_int_vertex_last, do_expensive_check); } -#endif } // explicit instantiation @@ -385,41 +370,45 @@ template void unrenumber_local_int_vertices(raft::handle_t const& handl int64_t local_int_vertex_last, bool do_expensive_check); -template void unrenumber_int_vertices(raft::handle_t const& handle, - int32_t* vertices, - size_t num_vertices, - int32_t const* renumber_map_labels, - int32_t local_int_vertex_first, - int32_t local_int_vertex_last, - std::vector& vertex_partition_lasts, - bool do_expensive_check); +template void unrenumber_int_vertices( + raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + std::vector const& vertex_partition_lasts, + bool do_expensive_check); -template void unrenumber_int_vertices(raft::handle_t const& handle, - int32_t* vertices, - size_t num_vertices, - int32_t const* renumber_map_labels, - int32_t local_int_vertex_first, - int32_t local_int_vertex_last, - std::vector& vertex_partition_lasts, - bool do_expensive_check); +template void unrenumber_int_vertices( + raft::handle_t const& handle, + int32_t* vertices, + size_t num_vertices, + int32_t const* renumber_map_labels, + int32_t local_int_vertex_first, + int32_t local_int_vertex_last, + std::vector const& vertex_partition_lasts, + bool do_expensive_check); -template void unrenumber_int_vertices(raft::handle_t const& handle, - int64_t* vertices, - size_t num_vertices, - int64_t const* renumber_map_labels, - int64_t local_int_vertex_first, - int64_t local_int_vertex_last, - std::vector& vertex_partition_lasts, - bool do_expensive_check); +template void unrenumber_int_vertices( + raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + std::vector const& vertex_partition_lasts, + bool do_expensive_check); -template void unrenumber_int_vertices(raft::handle_t const& handle, - int64_t* vertices, - size_t num_vertices, - int64_t const* renumber_map_labels, - int64_t local_int_vertex_first, - int64_t local_int_vertex_last, - std::vector& vertex_partition_lasts, - bool do_expensive_check); +template void unrenumber_int_vertices( + raft::handle_t const& handle, + int64_t* vertices, + size_t num_vertices, + int64_t const* renumber_map_labels, + int64_t local_int_vertex_first, + int64_t local_int_vertex_last, + std::vector const& vertex_partition_lasts, + bool do_expensive_check); } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index fc488794795..c8e7f1eb7a0 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -14,16 +14,16 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include @@ -32,6 +32,7 @@ #include #include #include +#include #include #include @@ -126,7 +127,10 @@ void sssp(raft::handle_t const &handle, // 4. initialize SSSP frontier enum class Bucket { cur_near, next_near, far, num_buckets }; - VertexFrontier(Bucket::num_buckets)> + VertexFrontier(Bucket::num_buckets)> vertex_frontier(handle); // 5. SSSP iteration @@ -186,18 +190,25 @@ void sssp(raft::handle_t const &handle, threshold = old_distance < threshold ? old_distance : threshold; } if (new_distance >= threshold) { push = false; } - return thrust::make_tuple(push, thrust::make_tuple(new_distance, src)); + return push ? thrust::optional>{thrust::make_tuple( + new_distance, src)} + : thrust::nullopt; }, reduce_op::min>(), distances, thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), - [near_far_threshold] __device__(auto v_val, auto pushed_val) { + [near_far_threshold] __device__(auto v, auto v_val, auto pushed_val) { auto new_dist = thrust::get<0>(pushed_val); auto idx = new_dist < v_val ? (new_dist < near_far_threshold ? static_cast(Bucket::next_near) : static_cast(Bucket::far)) : VertexFrontier::kInvalidBucketIdx; - return thrust::make_tuple(idx, pushed_val); + return new_dist < v_val + ? thrust::optional>{thrust::make_tuple( + static_cast(new_dist < near_far_threshold ? Bucket::next_near + : Bucket::far), + pushed_val)} + : thrust::nullopt; }); vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).clear(); @@ -220,13 +231,10 @@ void sssp(raft::handle_t const &handle, auto v) { auto dist = *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)); - if (dist < old_near_far_threshold) { - return VertexFrontier::kInvalidBucketIdx; - } else if (dist < near_far_threshold) { - return static_cast(Bucket::cur_near); - } else { - return static_cast(Bucket::far); - } + return dist >= old_near_far_threshold + ? thrust::optional{static_cast( + dist < near_far_threshold ? Bucket::cur_near : Bucket::far)} + : thrust::nullopt; }); near_size = vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).aggregate_size(); diff --git a/cpp/src/generators/erdos_renyi_generator.cu b/cpp/src/generators/erdos_renyi_generator.cu new file mode 100644 index 00000000000..8452a613174 --- /dev/null +++ b/cpp/src/generators/erdos_renyi_generator.cu @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { + +template +std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, + vertex_t num_vertices, + float p, + vertex_t base_vertex_id, + uint64_t seed) +{ + CUGRAPH_EXPECTS(num_vertices < std::numeric_limits::max(), + "Implementation cannot support specified value"); + + auto random_iterator = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), [seed] __device__(size_t index) { + thrust::default_random_engine rng(seed); + thrust::uniform_real_distribution dist(0.0, 1.0); + rng.discard(index); + return dist(rng); + }); + + size_t count = thrust::count_if(rmm::exec_policy(handle.get_stream()), + random_iterator, + random_iterator + num_vertices * num_vertices, + [p] __device__(float prob) { return prob < p; }); + + rmm::device_uvector indices_v(count, handle.get_stream()); + + thrust::copy_if(rmm::exec_policy(handle.get_stream()), + random_iterator, + random_iterator + num_vertices * num_vertices, + indices_v.begin(), + [p] __device__(float prob) { return prob < p; }); + + rmm::device_uvector src_v(count, handle.get_stream()); + rmm::device_uvector dst_v(count, handle.get_stream()); + + thrust::transform(rmm::exec_policy(handle.get_stream()), + indices_v.begin(), + indices_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), src_v.end())), + [num_vertices] __device__(size_t index) { + size_t src = index / num_vertices; + size_t dst = index % num_vertices; + + return thrust::make_tuple(static_cast(src), + static_cast(dst)); + }); + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(src_v), std::move(dst_v)); +} + +template +std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const& handle, + vertex_t num_vertices, + size_t m, + vertex_t base_vertex_id, + uint64_t seed) +{ + CUGRAPH_FAIL("Not implemented"); +} + +template std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, + int32_t num_vertices, + float p, + int32_t base_vertex_id, + uint64_t seed); + +template std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, + int64_t num_vertices, + float p, + int64_t base_vertex_id, + uint64_t seed); + +template std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const& handle, + int32_t num_vertices, + size_t m, + int32_t base_vertex_id, + uint64_t seed); + +template std::tuple, rmm::device_uvector> +generate_erdos_renyi_graph_edgelist_gnm(raft::handle_t const& handle, + int64_t num_vertices, + size_t m, + int64_t base_vertex_id, + uint64_t seed); + +} // namespace cugraph diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/generators/generate_rmat_edgelist.cu similarity index 84% rename from cpp/src/experimental/generate_rmat_edgelist.cu rename to cpp/src/generators/generate_rmat_edgelist.cu index f00443a0596..638d18b1831 100644 --- a/cpp/src/experimental/generate_rmat_edgelist.cu +++ b/cpp/src/generators/generate_rmat_edgelist.cu @@ -14,10 +14,8 @@ * limitations under the License. */ -#include - -#include -#include +#include +#include #include #include @@ -28,11 +26,10 @@ #include #include +#include #include -#include "rmm/detail/error.hpp" namespace cugraph { -namespace experimental { template std::tuple, rmm::device_uvector> generate_rmat_edgelist( @@ -43,8 +40,7 @@ std::tuple, rmm::device_uvector> generat double b, double c, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids) + bool clip_and_flip) { CUGRAPH_EXPECTS((size_t{1} << scale) <= static_cast(std::numeric_limits::max()), "Invalid input argument: scale too large for vertex_t."); @@ -105,21 +101,6 @@ std::tuple, rmm::device_uvector> generat num_edges_generated += num_edges_to_generate; } - if (scramble_vertex_ids) { - rands.resize(0, handle.get_stream()); - rands.shrink_to_fit(handle.get_stream()); - - auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())); - thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - pair_first, - pair_first + srcs.size(), - pair_first, - [scale] __device__(auto pair) { - return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale), - detail::scramble(thrust::get<1>(pair), scale)); - }); - } - return std::make_tuple(std::move(srcs), std::move(dsts)); } @@ -133,8 +114,7 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t component_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids) + bool clip_and_flip) { CUGRAPH_EXPECTS(min_scale > 0, "minimum graph scale is 1."); CUGRAPH_EXPECTS( @@ -171,7 +151,7 @@ generate_rmat_edgelists(raft::handle_t const& handle, for (size_t i = 0; i < n_edgelists; i++) { output.push_back(generate_rmat_edgelist( - handle, scale[i], scale[i] * edge_factor, a, b, c, i, clip_and_flip, scramble_vertex_ids)); + handle, scale[i], scale[i] * edge_factor, a, b, c, i, clip_and_flip)); } return output; } @@ -184,8 +164,7 @@ generate_rmat_edgelist(raft::handle_t const& handle, double b, double c, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); + bool clip_and_flip); template std::tuple, rmm::device_uvector> generate_rmat_edgelist(raft::handle_t const& handle, @@ -195,8 +174,7 @@ generate_rmat_edgelist(raft::handle_t const& handle, double b, double c, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); + bool clip_and_flip); template std::vector, rmm::device_uvector>> generate_rmat_edgelists(raft::handle_t const& handle, @@ -207,8 +185,7 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t component_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); + bool clip_and_flip); template std::vector, rmm::device_uvector>> generate_rmat_edgelists(raft::handle_t const& handle, @@ -219,8 +196,6 @@ generate_rmat_edgelists(raft::handle_t const& handle, generator_distribution_t component_distribution, generator_distribution_t edge_distribution, uint64_t seed, - bool clip_and_flip, - bool scramble_vertex_ids); + bool clip_and_flip); -} // namespace experimental } // namespace cugraph diff --git a/cpp/src/generators/generator_tools.cu b/cpp/src/generators/generator_tools.cu new file mode 100644 index 00000000000..3ebef13f3b1 --- /dev/null +++ b/cpp/src/generators/generator_tools.cu @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include + +#include +#include + +#include +#include + +#include + +namespace cugraph { + +namespace detail { + +template +rmm::device_uvector append_all(raft::handle_t const &handle, + std::vector> &&input) +{ + size_t size{0}; + // for (size_t i = 0; i < input.size(); ++i) size += input[i].size(); + for (auto &element : input) size += element.size(); + + rmm::device_uvector output(size, handle.get_stream()); + auto output_iter = output.begin(); + + for (auto &element : input) { + raft::copy(output_iter, element.begin(), element.size(), handle.get_stream()); + output_iter += element.size(); + } + + /* +for (size_t i = 0; i < input.size(); ++i) { + raft::copy(output_iter, input[i].begin(), input[i].size(), handle.get_stream()); + output_iter += input[i].size(); +} + */ + + return output; +} + +} // namespace detail + +template +void scramble_vertex_ids(raft::handle_t const &handle, + rmm::device_uvector &d_src_v, + rmm::device_uvector &d_dst_v, + vertex_t vertex_id_offset, + uint64_t seed) +{ + vertex_t scale = 1 + raft::log2(d_src_v.size()); + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + thrust::transform(rmm::exec_policy(handle.get_stream()), + pair_first, + pair_first + d_src_v.size(), + pair_first, + [scale] __device__(auto pair) { + return thrust::make_tuple( + experimental::detail::scramble(thrust::get<0>(pair), scale), + experimental::detail::scramble(thrust::get<1>(pair), scale)); + }); +} + +template +std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges) +{ + CUGRAPH_EXPECTS(sources.size() == dests.size(), + "sources and dests vertex lists must be the same size"); + + if (optional_d_weights) { + CUGRAPH_EXPECTS(sources.size() == optional_d_weights.value().size(), + "has_weights is specified, sources and weights must be the same size"); + + thrust::for_each_n( + thrust::host, + thrust::make_zip_iterator( + thrust::make_tuple(sources.begin(), dests.begin(), optional_d_weights.value().begin())), + sources.size(), + [](auto tuple) { + CUGRAPH_EXPECTS(thrust::get<0>(tuple).size() != thrust::get<1>(tuple).size(), + "source vertex and dest vertex uvectors must be same size"); + CUGRAPH_EXPECTS(thrust::get<0>(tuple).size() != thrust::get<2>(tuple).size(), + "source vertex and weights uvectors must be same size"); + }); + } else { + thrust::for_each_n( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(sources.begin(), dests.begin())), + sources.size(), + [](auto tuple) { + CUGRAPH_EXPECTS(thrust::get<0>(tuple).size() == thrust::get<1>(tuple).size(), + "source vertex and dest vertex uvectors must be same size"); + }); + } + + std::vector> d_weights; + + rmm::device_uvector srcs_v(0, handle.get_stream()); + rmm::device_uvector dsts_v(0, handle.get_stream()); + rmm::device_uvector weights_v(0, handle.get_stream()); + + srcs_v = detail::append_all(handle, std::move(sources)); + dsts_v = detail::append_all(handle, std::move(dests)); + + if (optional_d_weights) { + weights_v = detail::append_all(handle, std::move(optional_d_weights.value())); + } + + if (remove_multi_edges) { + size_t number_of_edges{srcs_v.size()}; + + if (optional_d_weights) { + thrust::sort( + rmm::exec_policy(handle.get_stream()), + thrust::make_zip_iterator( + thrust::make_tuple(srcs_v.begin(), dsts_v.begin(), weights_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.end(), dsts_v.end(), weights_v.end()))); + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.begin(), dsts_v.begin())); + auto end_iter = thrust::unique_by_key(rmm::exec_policy(handle.get_stream()), + pair_first, + pair_first + srcs_v.size(), + weights_v.begin()); + + number_of_edges = thrust::distance(pair_first, thrust::get<0>(end_iter)); + } else { + thrust::sort(rmm::exec_policy(handle.get_stream()), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.begin(), dsts_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.end(), dsts_v.end()))); + + auto pair_first = + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.begin(), dsts_v.begin())); + + auto end_iter = thrust::unique( + rmm::exec_policy(handle.get_stream()), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.begin(), dsts_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(srcs_v.end(), dsts_v.end()))); + + number_of_edges = thrust::distance(pair_first, end_iter); + } + + srcs_v.resize(number_of_edges, handle.get_stream()); + srcs_v.shrink_to_fit(handle.get_stream()); + dsts_v.resize(number_of_edges, handle.get_stream()); + dsts_v.shrink_to_fit(handle.get_stream()); + + if (optional_d_weights) { + weights_v.resize(number_of_edges, handle.get_stream()); + weights_v.shrink_to_fit(handle.get_stream()); + } + } + + return std::make_tuple( + std::move(srcs_v), + std::move(dsts_v), + optional_d_weights + ? std::move(std::optional>(std::move(weights_v))) + : std::nullopt); +} + +template +std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v) +{ + auto offset = d_src_v.size(); + d_src_v.resize(offset * 2, handle.get_stream_view()); + d_dst_v.resize(offset * 2, handle.get_stream_view()); + + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + d_dst_v.begin(), + d_dst_v.begin() + offset, + d_src_v.begin() + offset); + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + d_src_v.begin(), + d_src_v.begin() + offset, + d_dst_v.begin() + offset); + if (optional_d_weights_v) { + optional_d_weights_v->resize(d_src_v.size(), handle.get_stream_view()); + thrust::copy(rmm::exec_policy(handle.get_stream_view()), + optional_d_weights_v->begin(), + optional_d_weights_v->begin() + offset, + optional_d_weights_v->begin() + offset); + } + + return std::make_tuple(std::move(d_src_v), + std::move(d_dst_v), + optional_d_weights_v ? std::move(optional_d_weights_v) : std::nullopt); +} + +template void scramble_vertex_ids(raft::handle_t const &handle, + rmm::device_uvector &d_src_v, + rmm::device_uvector &d_dst_v, + int32_t vertex_id_offset, + uint64_t seed); + +template void scramble_vertex_ids(raft::handle_t const &handle, + rmm::device_uvector &d_src_v, + rmm::device_uvector &d_dst_v, + int64_t vertex_id_offset, + uint64_t seed); + +template std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges); + +template std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges); + +template std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges); + +template std::tuple, + rmm::device_uvector, + std::optional>> +combine_edgelists(raft::handle_t const &handle, + std::vector> &&sources, + std::vector> &&dests, + std::optional>> &&optional_d_weights, + bool remove_multi_edges); + +template std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); +template std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); + +template std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); +template std::tuple, + rmm::device_uvector, + std::optional>> +symmetrize_edgelist(raft::handle_t const &handle, + rmm::device_uvector &&d_src_v, + rmm::device_uvector &&d_dst_v, + std::optional> &&optional_d_weights_v); + +} // namespace cugraph diff --git a/cpp/src/generators/simple_generators.cu b/cpp/src/generators/simple_generators.cu new file mode 100644 index 00000000000..413e08962e7 --- /dev/null +++ b/cpp/src/generators/simple_generators.cu @@ -0,0 +1,343 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include + +#include + +namespace cugraph { + +template +std::tuple, rmm::device_uvector> +generate_path_graph_edgelist(raft::handle_t const& handle, + std::vector> const& component_parms_v) +{ + size_t num_edges = thrust::transform_reduce( + thrust::host, + component_parms_v.begin(), + component_parms_v.end(), + [](auto tuple) { return (std::get<0>(tuple) - 1); }, + size_t{0}, + std::plus()); + + bool edge_off_end{false}; + + if (handle.comms_initialized()) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + if (comm_size > 1) { + if (comm_rank < comm_size) { + num_edges += component_parms_v.size(); + edge_off_end = true; + } + } + } + + rmm::device_uvector d_src_v(num_edges, handle.get_stream()); + rmm::device_uvector d_dst_v(num_edges, handle.get_stream()); + + auto src_iterator = d_src_v.begin(); + auto dst_iterator = d_dst_v.begin(); + + for (auto tuple : component_parms_v) { + vertex_t num_vertices, base_vertex_id; + std::tie(num_vertices, base_vertex_id) = tuple; + + vertex_t num_edges{num_vertices - 1}; + + if (edge_off_end) ++num_edges; + + thrust::sequence(rmm::exec_policy(handle.get_stream()), + src_iterator, + src_iterator + num_edges, + base_vertex_id); + + thrust::sequence(rmm::exec_policy(handle.get_stream()), + dst_iterator, + dst_iterator + num_edges, + base_vertex_id + 1); + + src_iterator += num_edges; + dst_iterator += num_edges; + } + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); +} + +template +std::tuple, rmm::device_uvector> +generate_2d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v) +{ + size_t num_edges = thrust::transform_reduce( + thrust::host, + component_parms_v.begin(), + component_parms_v.end(), + [](auto tuple) { + vertex_t x, y; + std::tie(x, y, std::ignore) = tuple; + + return ((x - 1) * y) + (x * (y - 1)); + }, + size_t{0}, + std::plus()); + + rmm::device_uvector d_src_v(num_edges, handle.get_stream()); + rmm::device_uvector d_dst_v(num_edges, handle.get_stream()); + + auto output_iterator = + thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + + for (auto tuple : component_parms_v) { + vertex_t x, y, base_vertex_id; + std::tie(x, y, base_vertex_id) = tuple; + + vertex_t num_vertices = x * y; + + auto x_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + 1))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + x_iterator, + x_iterator + num_vertices - 1, + output_iterator, + [base_vertex_id, x] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + // Want to skip if dst is in the last column of a graph + return ((dst - base_vertex_id) % x) != 0; + }); + + auto y_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + x))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + y_iterator, + y_iterator + num_vertices - x, + output_iterator, + [base_vertex_id, x, y] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + + // Want to skip if dst is in the first row of a new graph + return ((dst - base_vertex_id) % (x * y)) >= x; + }); + } + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); +} + +template +std::tuple, rmm::device_uvector> +generate_3d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v) +{ + size_t num_edges = thrust::transform_reduce( + thrust::host, + component_parms_v.begin(), + component_parms_v.end(), + [](auto tuple) { + vertex_t x, y, z; + std::tie(x, y, z, std::ignore) = tuple; + + return ((x - 1) * y * z) + (x * (y - 1) * z) + (x * y * (z - 1)); + }, + size_t{0}, + std::plus()); + + rmm::device_uvector d_src_v(num_edges, handle.get_stream()); + rmm::device_uvector d_dst_v(num_edges, handle.get_stream()); + + auto output_iterator = + thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + + for (auto tuple : component_parms_v) { + vertex_t x, y, z, base_vertex_id; + std::tie(x, y, z, base_vertex_id) = tuple; + + vertex_t num_vertices = x * y * z; + + auto x_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + 1))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + x_iterator, + x_iterator + num_vertices - 1, + output_iterator, + [base_vertex_id, x] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + // Want to skip if dst is in the last column of a graph + return ((dst - base_vertex_id) % x) != 0; + }); + + auto y_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + x))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + y_iterator, + y_iterator + num_vertices - x, + output_iterator, + [base_vertex_id, x, y] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + // Want to skip if dst is in the first row of a new graph + return ((dst - base_vertex_id) % (x * y)) >= x; + }); + + auto z_iterator = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_counting_iterator(base_vertex_id), + thrust::make_counting_iterator(base_vertex_id + x * y))); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + z_iterator, + z_iterator + num_vertices - x * y, + output_iterator, + [base_vertex_id, x, y, z] __device__(auto pair) { + vertex_t dst = thrust::get<1>(pair); + // Want to skip if dst is in the first row of a new graph + return ((dst - base_vertex_id) % (x * y * z)) >= (x * y); + }); + } + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); +} + +template +std::tuple, rmm::device_uvector> +generate_complete_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v) +{ + std::for_each(component_parms_v.begin(), component_parms_v.end(), [](auto tuple) { + vertex_t num_vertices = std::get<0>(tuple); + CUGRAPH_EXPECTS(num_vertices < std::numeric_limits::max(), + "Implementation cannot support specified value"); + }); + + size_t num_edges = thrust::transform_reduce( + thrust::host, + component_parms_v.begin(), + component_parms_v.end(), + [](auto tuple) { + vertex_t num_vertices = std::get<0>(tuple); + return num_vertices * (num_vertices - 1) / 2; + }, + size_t{0}, + std::plus()); + + vertex_t invalid_vertex{std::numeric_limits::max()}; + + rmm::device_uvector d_src_v(num_edges, handle.get_stream()); + rmm::device_uvector d_dst_v(num_edges, handle.get_stream()); + + auto output_iterator = + thrust::make_zip_iterator(thrust::make_tuple(d_src_v.begin(), d_dst_v.begin())); + + for (auto tuple : component_parms_v) { + vertex_t num_vertices, base_vertex_id; + std::tie(num_vertices, base_vertex_id) = tuple; + + auto transform_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) { + size_t graph_index = index / (num_vertices * num_vertices); + size_t local_index = index % (num_vertices * num_vertices); + + vertex_t src = base_vertex_id + static_cast(local_index / num_vertices); + vertex_t dst = base_vertex_id + static_cast(local_index % num_vertices); + + if (src == dst) { + src = invalid_vertex; + dst = invalid_vertex; + } else { + src += (graph_index * num_vertices); + dst += (graph_index * num_vertices); + } + + return thrust::make_tuple(src, dst); + }); + + output_iterator = thrust::copy_if(rmm::exec_policy(handle.get_stream()), + transform_iter, + transform_iter + num_vertices * num_vertices, + output_iterator, + [invalid_vertex] __device__(auto tuple) { + auto src = thrust::get<0>(tuple); + auto dst = thrust::get<1>(tuple); + + return (src != invalid_vertex) && (src < dst); + }); + } + + handle.get_stream_view().synchronize(); + + return std::make_tuple(std::move(d_src_v), std::move(d_dst_v)); +} + +template std::tuple, rmm::device_uvector> +generate_path_graph_edgelist(raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_path_graph_edgelist(raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_2d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_2d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_3d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_3d_mesh_graph_edgelist( + raft::handle_t const& handle, + std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_complete_graph_edgelist( + raft::handle_t const& handle, std::vector> const& component_parms_v); + +template std::tuple, rmm::device_uvector> +generate_complete_graph_edgelist( + raft::handle_t const& handle, std::vector> const& component_parms_v); + +} // namespace cugraph diff --git a/cpp/src/layout/barnes_hut.hpp b/cpp/src/layout/barnes_hut.hpp index 437c98fce4b..ca62eda3716 100644 --- a/cpp/src/layout/barnes_hut.hpp +++ b/cpp/src/layout/barnes_hut.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,24 +16,26 @@ #pragma once -#include -#include +#include +#include -#include #include -#include -#include +#include + +#include +#include +#include #include "bh_kernels.hpp" #include "fa2_kernels.hpp" -#include "utilities/graph_utils.cuh" #include "utils.hpp" namespace cugraph { namespace detail { template -void barnes_hut(GraphCOOView &graph, +void barnes_hut(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, @@ -50,9 +52,9 @@ void barnes_hut(GraphCOOView &graph, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr) { - cudaStream_t stream = {nullptr}; - const edge_t e = graph.number_of_edges; - const vertex_t n = graph.number_of_vertices; + rmm::cuda_stream_view stream(handle.get_stream()); + const edge_t e = graph.number_of_edges; + const vertex_t n = graph.number_of_vertices; const int blocks = getMultiProcessorCount(); // A tiny jitter to promote numerical stability/ @@ -65,71 +67,72 @@ void barnes_hut(GraphCOOView &graph, // Allocate more space //--------------------------------------------------- - rmm::device_vector d_limiter(1); - rmm::device_vector d_maxdepthd(1); - rmm::device_vector d_bottomd(1); - rmm::device_vector d_radiusd(1); + rmm::device_uvector d_limiter(1, stream); + rmm::device_uvector d_maxdepthd(1, stream); + rmm::device_uvector d_bottomd(1, stream); + rmm::device_uvector d_radiusd(1, stream); - unsigned *limiter = d_limiter.data().get(); - int *maxdepthd = d_maxdepthd.data().get(); - int *bottomd = d_bottomd.data().get(); - float *radiusd = d_radiusd.data().get(); + unsigned *limiter = d_limiter.data(); + int *maxdepthd = d_maxdepthd.data(); + int *bottomd = d_bottomd.data(); + float *radiusd = d_radiusd.data(); - InitializationKernel<<<1, 1, 0, stream>>>(limiter, maxdepthd, radiusd); - CHECK_CUDA(stream); + InitializationKernel<<<1, 1, 0, stream.value()>>>(limiter, maxdepthd, radiusd); + CHECK_CUDA(stream.value()); const int FOUR_NNODES = 4 * nnodes; const int FOUR_N = 4 * n; const float theta_squared = theta * theta; const int NNODES = nnodes; - rmm::device_vector d_startl(nnodes + 1, 0); - rmm::device_vector d_childl((nnodes + 1) * 4, 0); + rmm::device_uvector d_startl(nnodes + 1, stream); + rmm::device_uvector d_childl((nnodes + 1) * 4, stream); // FA2 requires degree + 1 - rmm::device_vector d_massl(nnodes + 1, 1.f); + rmm::device_uvector d_massl(nnodes + 1, stream); + thrust::fill(rmm::exec_policy(stream), d_massl.begin(), d_massl.end(), 1); - rmm::device_vector d_maxxl(blocks * FACTOR1, 0); - rmm::device_vector d_maxyl(blocks * FACTOR1, 0); - rmm::device_vector d_minxl(blocks * FACTOR1, 0); - rmm::device_vector d_minyl(blocks * FACTOR1, 0); + rmm::device_uvector d_maxxl(blocks * FACTOR1, stream); + rmm::device_uvector d_maxyl(blocks * FACTOR1, stream); + rmm::device_uvector d_minxl(blocks * FACTOR1, stream); + rmm::device_uvector d_minyl(blocks * FACTOR1, stream); // Actual mallocs - int *startl = d_startl.data().get(); - int *childl = d_childl.data().get(); - int *massl = d_massl.data().get(); + int *startl = d_startl.data(); + int *childl = d_childl.data(); + int *massl = d_massl.data(); - float *maxxl = d_maxxl.data().get(); - float *maxyl = d_maxyl.data().get(); - float *minxl = d_minxl.data().get(); - float *minyl = d_minyl.data().get(); + float *maxxl = d_maxxl.data(); + float *maxyl = d_maxyl.data(); + float *minxl = d_minxl.data(); + float *minyl = d_minyl.data(); // SummarizationKernel - rmm::device_vector d_countl(nnodes + 1, 0); - int *countl = d_countl.data().get(); + rmm::device_uvector d_countl(nnodes + 1, stream); + int *countl = d_countl.data(); // SortKernel - rmm::device_vector d_sortl(nnodes + 1, 0); - int *sortl = d_sortl.data().get(); + rmm::device_uvector d_sortl(nnodes + 1, stream); + int *sortl = d_sortl.data(); // RepulsionKernel - rmm::device_vector d_rep_forces((nnodes + 1) * 2, 0); - float *rep_forces = d_rep_forces.data().get(); + rmm::device_uvector d_rep_forces((nnodes + 1) * 2, stream); + float *rep_forces = d_rep_forces.data(); - rmm::device_vector d_radius_squared(1, 0); - float *radiusd_squared = d_radius_squared.data().get(); + rmm::device_uvector d_radius_squared(1, stream); + float *radiusd_squared = d_radius_squared.data(); - rmm::device_vector d_nodes_pos((nnodes + 1) * 2, 0); - float *nodes_pos = d_nodes_pos.data().get(); + rmm::device_uvector d_nodes_pos((nnodes + 1) * 2, stream); + float *nodes_pos = d_nodes_pos.data(); // Initialize positions with random values int random_state = 0; // Copy start x and y positions. if (x_start && y_start) { - copy(n, x_start, nodes_pos); - copy(n, y_start, nodes_pos + nnodes + 1); + raft::copy(nodes_pos, x_start, n, stream.value()); + raft::copy(nodes_pos + nnodes + 1, y_start, n, stream.value()); } else { - random_vector(nodes_pos, (nnodes + 1) * 2, random_state, stream); + random_vector(nodes_pos, (nnodes + 1) * 2, random_state, stream.value()); } // Allocate arrays for force computation @@ -138,22 +141,24 @@ void barnes_hut(GraphCOOView &graph, float *swinging{nullptr}; float *traction{nullptr}; - rmm::device_vector d_attract(n * 2, 0); - rmm::device_vector d_old_forces(n * 2, 0); - rmm::device_vector d_swinging(n, 0); - rmm::device_vector d_traction(n, 0); + rmm::device_uvector d_attract(n * 2, stream); + rmm::device_uvector d_old_forces(n * 2, stream); + rmm::device_uvector d_swinging(n, stream); + rmm::device_uvector d_traction(n, stream); + + attract = d_attract.data(); + old_forces = d_old_forces.data(); + swinging = d_swinging.data(); + traction = d_traction.data(); - attract = d_attract.data().get(); - old_forces = d_old_forces.data().get(); - swinging = d_swinging.data().get(); - traction = d_traction.data().get(); + thrust::fill(rmm::exec_policy(stream), d_old_forces.begin(), d_old_forces.end(), 0.f); // Sort COO for coalesced memory access. - sort(graph, stream); - CHECK_CUDA(stream); + sort(graph, stream.value()); + CHECK_CUDA(stream.value()); graph.degree(massl, cugraph::DegreeDirection::OUT); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); const vertex_t *row = graph.src_indices; const vertex_t *col = graph.dst_indices; @@ -167,8 +172,7 @@ void barnes_hut(GraphCOOView &graph, // If outboundAttractionDistribution active, compensate. if (outbound_attraction_distribution) { - int sum = - thrust::reduce(rmm::exec_policy(stream)->on(stream), d_massl.begin(), d_massl.begin() + n); + int sum = thrust::reduce(rmm::exec_policy(stream), d_massl.begin(), d_massl.begin() + n); outbound_att_compensation = sum / (float)n; } @@ -191,70 +195,70 @@ void barnes_hut(GraphCOOView &graph, for (int iter = 0; iter < max_iter; ++iter) { // Reset force values - fill((nnodes + 1) * 2, rep_forces, 0.f); - fill(n * 2, attract, 0.f); - fill(n, swinging, 0.f); - fill(n, traction, 0.f); + thrust::fill(rmm::exec_policy(stream), d_rep_forces.begin(), d_rep_forces.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), d_attract.begin(), d_attract.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), d_swinging.begin(), d_swinging.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), d_traction.begin(), d_traction.end(), 0.f); - ResetKernel<<<1, 1, 0, stream>>>(radiusd_squared, bottomd, NNODES, radiusd); - CHECK_CUDA(stream); + ResetKernel<<<1, 1, 0, stream.value()>>>(radiusd_squared, bottomd, NNODES, radiusd); + CHECK_CUDA(stream.value()); // Compute bounding box arround all bodies - BoundingBoxKernel<<>>(startl, - childl, - massl, - nodes_pos, - nodes_pos + nnodes + 1, - maxxl, - maxyl, - minxl, - minyl, - FOUR_NNODES, - NNODES, - n, - limiter, - radiusd); - CHECK_CUDA(stream); - - ClearKernel1<<>>(childl, FOUR_NNODES, FOUR_N); - CHECK_CUDA(stream); + BoundingBoxKernel<<>>(startl, + childl, + massl, + nodes_pos, + nodes_pos + nnodes + 1, + maxxl, + maxyl, + minxl, + minyl, + FOUR_NNODES, + NNODES, + n, + limiter, + radiusd); + CHECK_CUDA(stream.value()); + + ClearKernel1<<>>(childl, FOUR_NNODES, FOUR_N); + CHECK_CUDA(stream.value()); // Build quadtree - TreeBuildingKernel<<>>( + TreeBuildingKernel<<>>( childl, nodes_pos, nodes_pos + nnodes + 1, NNODES, n, maxdepthd, bottomd, radiusd); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); - ClearKernel2<<>>(startl, massl, NNODES, bottomd); - CHECK_CUDA(stream); + ClearKernel2<<>>(startl, massl, NNODES, bottomd); + CHECK_CUDA(stream.value()); // Summarizes mass and position for each cell, bottom up approach - SummarizationKernel<<>>( + SummarizationKernel<<>>( countl, childl, massl, nodes_pos, nodes_pos + nnodes + 1, NNODES, n, bottomd); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); // Group closed bodies together, used to speed up Repulsion kernel - SortKernel<<>>( + SortKernel<<>>( sortl, countl, startl, childl, NNODES, n, bottomd); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); // Force computation O(n . log(n)) - RepulsionKernel<<>>(scaling_ratio, - theta, - epssq, - sortl, - childl, - massl, - nodes_pos, - nodes_pos + nnodes + 1, - rep_forces, - rep_forces + nnodes + 1, - theta_squared, - NNODES, - FOUR_NNODES, - n, - radiusd_squared, - maxdepthd); - CHECK_CUDA(stream); + RepulsionKernel<<>>(scaling_ratio, + theta, + epssq, + sortl, + childl, + massl, + nodes_pos, + nodes_pos + nnodes + 1, + rep_forces, + rep_forces + nnodes + 1, + theta_squared, + NNODES, + FOUR_NNODES, + n, + radiusd_squared, + maxdepthd); + CHECK_CUDA(stream.value()); apply_gravity(nodes_pos, nodes_pos + nnodes + 1, @@ -265,7 +269,7 @@ void barnes_hut(GraphCOOView &graph, strong_gravity_mode, scaling_ratio, n, - stream); + stream.value()); apply_attraction(row, col, @@ -280,7 +284,7 @@ void barnes_hut(GraphCOOView &graph, lin_log_mode, edge_weight_influence, outbound_att_compensation, - stream); + stream.value()); compute_local_speed(rep_forces, rep_forces + nnodes + 1, @@ -292,43 +296,41 @@ void barnes_hut(GraphCOOView &graph, swinging, traction, n, - stream); + stream.value()); // Compute global swinging and traction values - const float s = - thrust::reduce(rmm::exec_policy(stream)->on(stream), d_swinging.begin(), d_swinging.end()); + const float s = thrust::reduce(rmm::exec_policy(stream), d_swinging.begin(), d_swinging.end()); - const float t = - thrust::reduce(rmm::exec_policy(stream)->on(stream), d_traction.begin(), d_traction.end()); + const float t = thrust::reduce(rmm::exec_policy(stream), d_traction.begin(), d_traction.end()); // Compute global speed based on gloab and local swinging and traction. adapt_speed(jitter_tolerance, &jt, &speed, &speed_efficiency, s, t, n); // Update positions - apply_forces_bh<<>>(nodes_pos, - nodes_pos + nnodes + 1, - attract, - attract + n, - rep_forces, - rep_forces + nnodes + 1, - old_forces, - old_forces + n, - swinging, - speed, - n); + apply_forces_bh<<>>(nodes_pos, + nodes_pos + nnodes + 1, + attract, + attract + n, + rep_forces, + rep_forces + nnodes + 1, + old_forces, + old_forces + n, + swinging, + speed, + n); if (callback) callback->on_epoch_end(nodes_pos); if (verbose) { - printf("iteration %i, speed: %f, speed_efficiency: %f, ", iter + 1, speed, speed_efficiency); - printf("jt: %f, ", jt); - printf("swinging: %f, traction: %f\n", s, t); + std::cout << "iteration: " << iter + 1 << ", speed: " << speed + << ", speed_efficiency: " << speed_efficiency << ", jt: " << jt + << ", swinging: " << s << ", traction: " << t << "\n"; } } // Copy nodes positions into final output pos - copy(n, nodes_pos, pos); - copy(n, nodes_pos + nnodes + 1, pos + n); + raft::copy(pos, nodes_pos, n, stream.value()); + raft::copy(pos + n, nodes_pos + nnodes + 1, n, stream.value()); if (callback) callback->on_train_end(nodes_pos); } diff --git a/cpp/src/layout/exact_fa2.hpp b/cpp/src/layout/exact_fa2.hpp index 0b90e417968..a82b7a5faff 100644 --- a/cpp/src/layout/exact_fa2.hpp +++ b/cpp/src/layout/exact_fa2.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,13 +16,14 @@ #pragma once -#include -#include +#include +#include -#include #include -#include -#include + +#include +#include +#include #include "exact_repulsion.hpp" #include "fa2_kernels.hpp" @@ -32,7 +33,8 @@ namespace cugraph { namespace detail { template -void exact_fa2(GraphCOOView &graph, +void exact_fa2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter = 500, float *x_start = nullptr, @@ -48,9 +50,9 @@ void exact_fa2(GraphCOOView &graph, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr) { - cudaStream_t stream = {nullptr}; - const edge_t e = graph.number_of_edges; - const vertex_t n = graph.number_of_vertices; + rmm::cuda_stream_view stream(handle.get_stream()); + const edge_t e = graph.number_of_edges; + const vertex_t n = graph.number_of_vertices; float *d_repel{nullptr}; float *d_attract{nullptr}; @@ -59,35 +61,37 @@ void exact_fa2(GraphCOOView &graph, float *d_swinging{nullptr}; float *d_traction{nullptr}; - rmm::device_vector repel(n * 2, 0); - rmm::device_vector attract(n * 2, 0); - rmm::device_vector old_forces(n * 2, 0); + rmm::device_uvector repel(n * 2, stream); + rmm::device_uvector attract(n * 2, stream); + rmm::device_uvector old_forces(n * 2, stream); + thrust::fill(rmm::exec_policy(stream), old_forces.begin(), old_forces.end(), 0.f); // FA2 requires degree + 1. - rmm::device_vector mass(n, 1); - rmm::device_vector swinging(n, 0); - rmm::device_vector traction(n, 0); - - d_repel = repel.data().get(); - d_attract = attract.data().get(); - d_old_forces = old_forces.data().get(); - d_mass = mass.data().get(); - d_swinging = swinging.data().get(); - d_traction = traction.data().get(); + rmm::device_uvector mass(n, stream); + thrust::fill(rmm::exec_policy(stream), mass.begin(), mass.end(), 1); + rmm::device_uvector swinging(n, stream); + rmm::device_uvector traction(n, stream); + + d_repel = repel.data(); + d_attract = attract.data(); + d_old_forces = old_forces.data(); + d_mass = mass.data(); + d_swinging = swinging.data(); + d_traction = traction.data(); int random_state = 0; - random_vector(pos, n * 2, random_state, stream); + random_vector(pos, n * 2, random_state, stream.value()); if (x_start && y_start) { - copy(n, x_start, pos); - copy(n, y_start, pos + n); + raft::copy(pos, x_start, n, stream.value()); + raft::copy(pos + n, y_start, n, stream.value()); } // Sort COO for coalesced memory access. - sort(graph, stream); - CHECK_CUDA(stream); + sort(graph, stream.value()); + CHECK_CUDA(stream.value()); graph.degree(d_mass, cugraph::DegreeDirection::OUT); - CHECK_CUDA(stream); + CHECK_CUDA(stream.value()); const vertex_t *row = graph.src_indices; const vertex_t *col = graph.dst_indices; @@ -99,7 +103,7 @@ void exact_fa2(GraphCOOView &graph, float jt = 0.f; if (outbound_attraction_distribution) { - int sum = thrust::reduce(rmm::exec_policy(stream)->on(stream), mass.begin(), mass.end()); + int sum = thrust::reduce(rmm::exec_policy(stream), mass.begin(), mass.end()); outbound_att_compensation = sum / (float)n; } @@ -110,13 +114,14 @@ void exact_fa2(GraphCOOView &graph, for (int iter = 0; iter < max_iter; ++iter) { // Reset force arrays - fill(n * 2, d_repel, 0.f); - fill(n * 2, d_attract, 0.f); - fill(n, d_swinging, 0.f); - fill(n, d_traction, 0.f); + thrust::fill(rmm::exec_policy(stream), repel.begin(), repel.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), attract.begin(), attract.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), swinging.begin(), swinging.end(), 0.f); + thrust::fill(rmm::exec_policy(stream), traction.begin(), traction.end(), 0.f); // Exact repulsion - apply_repulsion(pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream); + apply_repulsion( + pos, pos + n, d_repel, d_repel + n, d_mass, scaling_ratio, n, stream.value()); apply_gravity(pos, pos + n, @@ -127,7 +132,7 @@ void exact_fa2(GraphCOOView &graph, strong_gravity_mode, scaling_ratio, n, - stream); + stream.value()); apply_attraction(row, col, @@ -142,7 +147,7 @@ void exact_fa2(GraphCOOView &graph, lin_log_mode, edge_weight_influence, outbound_att_compensation, - stream); + stream.value()); compute_local_speed(d_repel, d_repel + n, @@ -154,13 +159,11 @@ void exact_fa2(GraphCOOView &graph, d_swinging, d_traction, n, - stream); + stream.value()); // Compute global swinging and traction values. - const float s = - thrust::reduce(rmm::exec_policy(stream)->on(stream), swinging.begin(), swinging.end()); - const float t = - thrust::reduce(rmm::exec_policy(stream)->on(stream), traction.begin(), traction.end()); + const float s = thrust::reduce(rmm::exec_policy(stream), swinging.begin(), swinging.end()); + const float t = thrust::reduce(rmm::exec_policy(stream), traction.begin(), traction.end()); adapt_speed(jitter_tolerance, &jt, &speed, &speed_efficiency, s, t, n); @@ -175,14 +178,14 @@ void exact_fa2(GraphCOOView &graph, d_swinging, speed, n, - stream); + stream.value()); if (callback) callback->on_epoch_end(pos); if (verbose) { - printf("iteration %i, speed: %f, speed_efficiency: %f, ", iter + 1, speed, speed_efficiency); - printf("jt: %f, ", jt); - printf("swinging: %f, traction: %f\n", s, t); + std::cout << "iteration: " << iter + 1 << ", speed: " << speed + << ", speed_efficiency: " << speed_efficiency << ", jt: " << jt + << ", swinging: " << s << ", traction: " << t << "\n"; } } diff --git a/cpp/src/layout/fa2_kernels.hpp b/cpp/src/layout/fa2_kernels.hpp index 0c7e9b1d193..9aec348cec5 100644 --- a/cpp/src/layout/fa2_kernels.hpp +++ b/cpp/src/layout/fa2_kernels.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once #define restrict __restrict__ -#include "utilities/graph_utils.cuh" +#include namespace cugraph { namespace detail { diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu index 6da9b77b45d..86c95cc883e 100644 --- a/cpp/src/layout/force_atlas2.cu +++ b/cpp/src/layout/force_atlas2.cu @@ -20,7 +20,8 @@ namespace cugraph { template -void force_atlas2(GraphCOOView &graph, +void force_atlas2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter, float *x_start, @@ -42,7 +43,8 @@ void force_atlas2(GraphCOOView &graph, CUGRAPH_EXPECTS(graph.number_of_vertices != 0, "Invalid input: Graph is empty"); if (!barnes_hut_optimize) { - cugraph::detail::exact_fa2(graph, + cugraph::detail::exact_fa2(handle, + graph, pos, max_iter, x_start, @@ -58,7 +60,8 @@ void force_atlas2(GraphCOOView &graph, verbose, callback); } else { - cugraph::detail::barnes_hut(graph, + cugraph::detail::barnes_hut(handle, + graph, pos, max_iter, x_start, @@ -77,7 +80,8 @@ void force_atlas2(GraphCOOView &graph, } } -template void force_atlas2(GraphCOOView &graph, +template void force_atlas2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter, float *x_start, @@ -95,7 +99,8 @@ template void force_atlas2(GraphCOOView &graph bool verbose, internals::GraphBasedDimRedCallback *callback); -template void force_atlas2(GraphCOOView &graph, +template void force_atlas2(raft::handle_t const &handle, + GraphCOOView &graph, float *pos, const int max_iter, float *x_start, diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index 40f7be52c90..dfa1e43edad 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -17,12 +17,12 @@ #include #include -#include +#include #include #include -#include +#include #include diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index 5ffaacfe7a6..ffaec16c6a8 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -19,10 +19,10 @@ * @brief wrapper calling gunrock's HITS analytic * --------------------------------------------------------------------------*/ -#include -#include +#include +#include -#include +#include #include diff --git a/cpp/src/link_prediction/jaccard.cu b/cpp/src/link_prediction/jaccard.cu index 83a4ec6e713..b93ad0bd0b3 100644 --- a/cpp/src/link_prediction/jaccard.cu +++ b/cpp/src/link_prediction/jaccard.cu @@ -20,9 +20,9 @@ * ---------------------------------------------------------------------------**/ #include -#include -#include "graph.hpp" -#include "utilities/graph_utils.cuh" +#include +#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/link_prediction/overlap.cu b/cpp/src/link_prediction/overlap.cu index 83fdc799649..915b2c8bd52 100644 --- a/cpp/src/link_prediction/overlap.cu +++ b/cpp/src/link_prediction/overlap.cu @@ -20,9 +20,9 @@ * ---------------------------------------------------------------------------**/ #include -#include -#include "graph.hpp" -#include "utilities/graph_utils.cuh" +#include +#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/sampling/random_walks.cu b/cpp/src/sampling/random_walks.cu index a5410d0e65e..1883535bf70 100644 --- a/cpp/src/sampling/random_walks.cu +++ b/cpp/src/sampling/random_walks.cu @@ -16,7 +16,7 @@ // Andrei Schaffer, aschaffer@nvidia.com // -#include +#include #include "random_walks.cuh" namespace cugraph { diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 10a47318bcb..44a6e9e83aa 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -18,7 +18,7 @@ // #pragma once -#include +#include #include diff --git a/cpp/src/serialization/serializer.cu b/cpp/src/serialization/serializer.cu new file mode 100644 index 00000000000..1950ed780c5 --- /dev/null +++ b/cpp/src/serialization/serializer.cu @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// + +#include + +#include + +#include + +#include + +#include + +#include + +#include + +namespace cugraph { +namespace serializer { +template +void serializer_t::serialize(value_t val) +{ + auto byte_buff_sz = sizeof(value_t); + auto it_end = begin_ + byte_buff_sz; + + raft::update_device( + begin_, reinterpret_cast(&val), byte_buff_sz, handle_.get_stream()); + + begin_ = it_end; +} + +template +value_t serializer_t::unserialize(void) +{ + value_t val{}; + auto byte_buff_sz = sizeof(value_t); + + raft::update_host(&val, reinterpret_cast(cbegin_), 1, handle_.get_stream()); + + cbegin_ += byte_buff_sz; + return val; +} + +template +void serializer_t::serialize(value_t const* p_d_src, size_t size) +{ + auto byte_buff_sz = size * sizeof(value_t); + auto it_end = begin_ + byte_buff_sz; + byte_t const* byte_buff = reinterpret_cast(p_d_src); + + thrust::copy_n(rmm::exec_policy(handle_.get_stream_view()), byte_buff, byte_buff_sz, begin_); + + begin_ = it_end; +} + +template +rmm::device_uvector serializer_t::unserialize(size_t size) +{ + auto byte_buff_sz = size * sizeof(value_t); + rmm::device_uvector d_dest(size, handle_.get_stream()); + byte_t* byte_buff = reinterpret_cast(d_dest.data()); + + thrust::copy_n(rmm::exec_policy(handle_.get_stream_view()), cbegin_, byte_buff_sz, byte_buff); + + cbegin_ += byte_buff_sz; + return d_dest; +} + +// serialization of graph metadata, via device orchestration: +// +template +void serializer_t::serialize(serializer_t::graph_meta_t const& gmeta) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + using bool_t = typename graph_meta_t::bool_ser_t; + + serialize(gmeta.num_vertices_); + serialize(gmeta.num_edges_); + serialize(static_cast(gmeta.properties_.is_symmetric)); + serialize(static_cast(gmeta.properties_.is_multigraph)); + serialize(static_cast(gmeta.properties_.is_weighted)); + + auto seg_off_sz_bytes = gmeta.segment_offsets_.size() * sizeof(vertex_t); + if (seg_off_sz_bytes > 0) { + auto it_end = begin_ + seg_off_sz_bytes; + + raft::update_device(begin_, + reinterpret_cast(gmeta.segment_offsets_.data()), + seg_off_sz_bytes, + handle_.get_stream()); + + begin_ = it_end; + } + + } else { + CUGRAPH_FAIL("Unsupported graph type for serialization."); + } +} + +// unserialization of graph metadata, via device orchestration: +// +template +serializer_t::graph_meta_t serializer_t::unserialize( + size_t graph_meta_sz_bytes, + serializer_t::graph_meta_t const& empty_meta) // tag dispatching parameter +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + using bool_t = typename graph_meta_t::bool_ser_t; + + CUGRAPH_EXPECTS(graph_meta_sz_bytes >= 2 * sizeof(size_t) + 3 * sizeof(bool_t), + "Un/serialization meta size mismatch."); + + size_t num_vertices = unserialize(); + size_t num_edges = unserialize(); + bool_t is_symmetric = unserialize(); + bool_t is_multigraph = unserialize(); + bool_t is_weighted = unserialize(); + + graph_properties_t properties{static_cast(is_symmetric), + static_cast(is_multigraph), + static_cast(is_weighted)}; + + std::vector segment_offsets{}; + + size_t seg_off_sz_bytes = graph_meta_sz_bytes - 2 * sizeof(size_t) - 3 * sizeof(bool_t); + + if (seg_off_sz_bytes > 0) { + raft::update_host(segment_offsets.data(), + reinterpret_cast(cbegin_), + seg_off_sz_bytes, + handle_.get_stream()); + + cbegin_ += seg_off_sz_bytes; + } + + return graph_meta_t{num_vertices, num_edges, properties, segment_offsets}; + + } else { + CUGRAPH_FAIL("Unsupported graph type for unserialization."); + return graph_meta_t{}; + } +} + +// graph serialization: +// metadata argument (gvmeta) can be used for checking / testing; +// +template +void serializer_t::serialize(graph_t const& graph, serializer_t::graph_meta_t& gvmeta) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + size_t num_vertices = graph.get_number_of_vertices(); + size_t num_edges = graph.get_number_of_edges(); + auto&& gview = graph.view(); + + gvmeta = graph_meta_t{graph}; + + edge_t const* offsets = gview.offsets(); + vertex_t const* indices = gview.indices(); + weight_t const* weights = gview.weights(); + + // FIXME: remove when host_bcast() becomes available for vectors; + // + // for now, this must come first, because unserialize() + // needs it at the beginning to extract graph metadata + // to be able to finish the rest of the graph unserialization; + // + serialize(gvmeta); + + serialize(offsets, num_vertices + 1); + serialize(indices, num_edges); + + if (graph.is_weighted()) serialize(weights, num_edges); + + } else { + CUGRAPH_FAIL("Unsupported graph type for serialization."); + } +} + +// graph unserialization: +// +template +graph_t serializer_t::unserialize(size_t device_sz_bytes, size_t host_sz_bytes) +{ + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + graph_meta_t empty_meta{}; // tag-dispatching only + + // FIXME: remove when host_bcast() becomes available for vectors; + // + // for now, this must come first, because unserialize() + // needs it at the beginning to extract graph metadata + // to be able to finish the rest of the graph unserialization; + // + auto gvmeta = unserialize(host_sz_bytes, empty_meta); + + auto pair_sz = get_device_graph_sz_bytes(gvmeta); + + CUGRAPH_EXPECTS((pair_sz.first == device_sz_bytes) && (pair_sz.second == host_sz_bytes), + "Un/serialization size mismatch."); + + vertex_t num_vertices = gvmeta.num_vertices_; + edge_t num_edges = gvmeta.num_edges_; + auto g_props = gvmeta.properties_; + auto seg_offsets = gvmeta.segment_offsets_; + + auto d_offsets = unserialize(num_vertices + 1); + auto d_indices = unserialize(num_edges); + + if (g_props.is_weighted) { + auto d_weights = unserialize(num_edges); + + return graph_t(handle_, + num_vertices, + num_edges, + g_props, + std::move(d_offsets), + std::move(d_indices), + std::move(d_weights), + std::move(seg_offsets)); // RVO-ed + } else { + return graph_t(handle_, + num_vertices, + num_edges, + g_props, + std::move(d_offsets), + std::move(d_indices), + rmm::device_uvector(0, handle_.get_stream()), + std::move(seg_offsets)); // RVO-ed + } + + } else { + CUGRAPH_FAIL("Unsupported graph type for unserialization."); + + return graph_t{handle_}; + } +} + +// Manual template instantiations (EIDir's): +// +template void serializer_t::serialize(int32_t const* p_d_src, size_t size); +template void serializer_t::serialize(int64_t const* p_d_src, size_t size); +template void serializer_t::serialize(float const* p_d_src, size_t size); +template void serializer_t::serialize(double const* p_d_src, size_t size); + +template rmm::device_uvector serializer_t::unserialize(size_t size); +template rmm::device_uvector serializer_t::unserialize(size_t size); +template rmm::device_uvector serializer_t::unserialize(size_t size); +template rmm::device_uvector serializer_t::unserialize(size_t size); + +// serialize graph: +// +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +template void serializer_t::serialize( + graph_t const& graph, + serializer_t::graph_meta_t>&); + +// unserialize graph: +// +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +template graph_t serializer_t::unserialize(size_t, size_t); + +} // namespace serializer +} // namespace cugraph diff --git a/cpp/src/sort/bitonic.cuh b/cpp/src/sort/bitonic.cuh index e2922a58d39..b1b19bafdf0 100644 --- a/cpp/src/sort/bitonic.cuh +++ b/cpp/src/sort/bitonic.cuh @@ -1,7 +1,7 @@ // -*-c++-*- /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,7 +34,7 @@ #include #include -#include +#include namespace cugraph { namespace sort { diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/src/structure/create_graph_from_edgelist.cu similarity index 67% rename from cpp/tests/utilities/generate_graph_from_edgelist.cu rename to cpp/src/structure/create_graph_from_edgelist.cu index 5f41e0e5ce0..27764ead0f0 100644 --- a/cpp/tests/utilities/generate_graph_from_edgelist.cu +++ b/cpp/src/structure/create_graph_from_edgelist.cu @@ -13,21 +13,20 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include - -#include -#include -#include -#include +#include +#include +#include +#include #include -#include +#include +#include #include namespace cugraph { -namespace test { +namespace experimental { namespace { @@ -41,14 +40,14 @@ std::enable_if_t< std::tuple< cugraph::experimental::graph_t, rmm::device_uvector>> -generate_graph_from_edgelist_impl(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +create_graph_from_edgelist_impl( + raft::handle_t const& handle, + std::optional> optional_local_vertex_span, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + graph_properties_t graph_properties, + bool renumber) { CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true."); @@ -71,7 +70,7 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, store_transposed ? thrust::make_zip_iterator(thrust::make_tuple(edgelist_cols.begin(), edgelist_rows.begin())) : thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); - auto edge_counts = test_weighted + auto edge_counts = graph_properties.is_weighted ? cugraph::experimental::groupby_and_count(pair_first, pair_first + edgelist_rows.size(), edgelist_weights.begin(), @@ -111,12 +110,7 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, } std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = cugraph::experimental::renumber_edgelist( - handle, - vertices.data(), - static_cast(vertices.size()), - major_ptrs, - minor_ptrs, - counts); + handle, optional_local_vertex_span, major_ptrs, minor_ptrs, counts); } // 4. create a graph @@ -127,20 +121,14 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, edgelists[i] = cugraph::experimental::edgelist_t{ edgelist_rows.data() + h_displacements[i], edgelist_cols.data() + h_displacements[i], - test_weighted ? edgelist_weights.data() + h_displacements[i] - : static_cast(nullptr), + graph_properties.is_weighted ? edgelist_weights.data() + h_displacements[i] + : static_cast(nullptr), static_cast(h_edge_counts[i])}; } return std::make_tuple( cugraph::experimental::graph_t( - handle, - edgelists, - partition, - number_of_vertices, - number_of_edges, - cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, - true), + handle, edgelists, partition, number_of_vertices, number_of_edges, graph_properties, true), std::move(renumber_map_labels)); } @@ -154,26 +142,43 @@ std::enable_if_t< std::tuple< cugraph::experimental::graph_t, rmm::device_uvector>> -generate_graph_from_edgelist_impl(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +create_graph_from_edgelist_impl( + raft::handle_t const& handle, + std::optional> optional_vertex_span, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + graph_properties_t graph_properties, + bool renumber) { - vertex_t number_of_vertices = static_cast(vertices.size()); - auto renumber_map_labels = renumber ? cugraph::experimental::renumber_edgelist( handle, - vertices.data(), - static_cast(vertices.size()), + optional_vertex_span, store_transposed ? edgelist_cols.data() : edgelist_rows.data(), store_transposed ? edgelist_rows.data() : edgelist_cols.data(), static_cast(edgelist_rows.size())) : rmm::device_uvector(0, handle.get_stream()); + vertex_t num_vertices{}; + if (renumber) { + num_vertices = static_cast(renumber_map_labels.size()); + } else { + if (optional_vertex_span) { + num_vertices = std::get<1>(*optional_vertex_span); + } else { + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin())); + num_vertices = + thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + edgelist_rows.size(), + [] __device__(auto e) { return std::max(thrust::get<0>(e), thrust::get<1>(e)); }, + vertex_t{0}, + thrust::maximum()) + + 1; + } + } return std::make_tuple( cugraph::experimental::graph_t( @@ -181,10 +186,10 @@ generate_graph_from_edgelist_impl(raft::handle_t const& handle, cugraph::experimental::edgelist_t{ edgelist_rows.data(), edgelist_cols.data(), - test_weighted ? edgelist_weights.data() : nullptr, + graph_properties.is_weighted ? edgelist_weights.data() : static_cast(nullptr), static_cast(edgelist_rows.size())}, - number_of_vertices, - cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, + num_vertices, + graph_properties, renumber ? true : false), std::move(renumber_map_labels)); } @@ -198,23 +203,22 @@ template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber) +create_graph_from_edgelist( + raft::handle_t const& handle, + std::optional> optional_vertex_span, + rmm::device_uvector&& edgelist_rows, + rmm::device_uvector&& edgelist_cols, + rmm::device_uvector&& edgelist_weights, + graph_properties_t graph_properties, + bool renumber) { - return generate_graph_from_edgelist_impl( + return create_graph_from_edgelist_impl( handle, - std::move(vertices), + optional_vertex_span, std::move(edgelist_rows), std::move(edgelist_cols), std::move(edgelist_weights), - is_symmetric, - test_weighted, + graph_properties, renumber); } @@ -222,291 +226,267 @@ generate_graph_from_edgelist(raft::handle_t const& handle, template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); template std::tuple, rmm::device_uvector> -generate_graph_from_edgelist( +create_graph_from_edgelist( raft::handle_t const& handle, - rmm::device_uvector&& vertices, + std::optional> optional_vertex_span, rmm::device_uvector&& edgelist_rows, rmm::device_uvector&& edgelist_cols, rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, + graph_properties_t graph_properties, bool renumber); -} // namespace test +} // namespace experimental } // namespace cugraph diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 056ad39fefc..9f683af8209 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -14,9 +14,9 @@ * limitations under the License. */ -#include -#include "utilities/error.hpp" -#include "utilities/graph_utils.cuh" +#include +#include +#include #include @@ -149,4 +149,4 @@ template class GraphCompressedSparseBaseView; template class GraphCompressedSparseBaseView; } // namespace cugraph -#include "utilities/eidir_graph_utils.hpp" +#include diff --git a/cpp/src/traversal/README.md b/cpp/src/traversal/README.md index 7f436926de8..429b58d441e 100644 --- a/cpp/src/traversal/README.md +++ b/cpp/src/traversal/README.md @@ -13,7 +13,7 @@ The unit test code is the best place to search for examples on calling SSSP. The example assumes that you create an SG or MG graph somehow. The caller must create the distances and predecessors vectors in device memory and pass in the raw pointers to those vectors into the SSSP function. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate @@ -40,7 +40,7 @@ The unit test code is the best place to search for examples on calling BFS. The example assumes that you create an SG or MG graph somehow. The caller must create the distances and predecessors vectors in device memory and pass in the raw pointers to those vectors into the BFS function. ```cpp -#include +#include ... using vertex_t = int32_t; // or int64_t, whichever is appropriate using weight_t = float; // or double, whichever is appropriate diff --git a/cpp/src/traversal/bfs.cu b/cpp/src/traversal/bfs.cu index 7c59010cab8..8b62fbfecee 100644 --- a/cpp/src/traversal/bfs.cu +++ b/cpp/src/traversal/bfs.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -14,14 +14,14 @@ #include #include "bfs.cuh" -#include "graph.hpp" +#include -#include +#include +#include #include "bfs_kernels.cuh" #include "mg/bfs.cuh" #include "mg/common_utils.cuh" #include "traversal_common.cuh" -#include "utilities/graph_utils.cuh" namespace cugraph { namespace detail { diff --git a/cpp/src/traversal/bfs_kernels.cuh b/cpp/src/traversal/bfs_kernels.cuh index bf2ec2fc6ee..78ce646d3c6 100644 --- a/cpp/src/traversal/bfs_kernels.cuh +++ b/cpp/src/traversal/bfs_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,7 +18,7 @@ #include #include -#include "graph.hpp" +#include #include "traversal_common.cuh" namespace cugraph { diff --git a/cpp/src/traversal/mg/frontier_expand.cuh b/cpp/src/traversal/mg/frontier_expand.cuh index 2733c319087..5436c060e18 100644 --- a/cpp/src/traversal/mg/frontier_expand.cuh +++ b/cpp/src/traversal/mg/frontier_expand.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "frontier_expand_kernels.cuh" #include "vertex_binning.cuh" diff --git a/cpp/src/traversal/mg/frontier_expand_kernels.cuh b/cpp/src/traversal/mg/frontier_expand_kernels.cuh index 625ec0d956f..00884e01755 100644 --- a/cpp/src/traversal/mg/frontier_expand_kernels.cuh +++ b/cpp/src/traversal/mg/frontier_expand_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #pragma once -#include +#include #include "vertex_binning.cuh" namespace cugraph { diff --git a/cpp/src/traversal/sssp.cu b/cpp/src/traversal/sssp.cu index 6ffbbbf462b..8dcaffd953a 100644 --- a/cpp/src/traversal/sssp.cu +++ b/cpp/src/traversal/sssp.cu @@ -17,9 +17,9 @@ // Author: Prasun Gera pgera@nvidia.com #include -#include +#include -#include "graph.hpp" +#include #include "sssp.cuh" #include "sssp_kernels.cuh" @@ -47,7 +47,7 @@ void SSSP::setup() // Allocate buffer for data that need to be reset every iteration iter_buffer_size = sizeof(int) * (edges_bmap_size + vertices_bmap_size) + sizeof(IndexType); - iter_buffer.resize(iter_buffer_size); + iter_buffer.resize(iter_buffer_size, stream); // ith bit of relaxed_edges_bmap <=> ith edge was relaxed relaxed_edges_bmap = static_cast(iter_buffer.data()); // ith bit of next_frontier_bmap <=> vertex is active in the next frontier diff --git a/cpp/src/traversal/sssp_kernels.cuh b/cpp/src/traversal/sssp_kernels.cuh index d96540b22b9..d1cf9980773 100644 --- a/cpp/src/traversal/sssp_kernels.cuh +++ b/cpp/src/traversal/sssp_kernels.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,8 +19,8 @@ #include #include +#include #include "traversal_common.cuh" -#include "utilities/error.hpp" namespace cugraph { namespace detail { namespace sssp_kernels { diff --git a/cpp/src/traversal/traversal_common.cuh b/cpp/src/traversal/traversal_common.cuh index 2802fb94be8..64a21a89b04 100644 --- a/cpp/src/traversal/traversal_common.cuh +++ b/cpp/src/traversal/traversal_common.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once #include -#include "utilities/error.hpp" +#include #define MAXBLOCKS 65535 #define WARP_SIZE 32 diff --git a/cpp/src/traversal/tsp.hpp b/cpp/src/traversal/tsp.hpp index 1208f8c8790..8c6948f218c 100644 --- a/cpp/src/traversal/tsp.hpp +++ b/cpp/src/traversal/tsp.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include diff --git a/cpp/src/traversal/two_hop_neighbors.cu b/cpp/src/traversal/two_hop_neighbors.cu index fb984dae0ad..770e618637b 100644 --- a/cpp/src/traversal/two_hop_neighbors.cu +++ b/cpp/src/traversal/two_hop_neighbors.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,9 +20,9 @@ * ---------------------------------------------------------------------------**/ #include -#include -#include -#include +#include +#include +#include #include "two_hop_neighbors.cuh" #include diff --git a/cpp/src/tree/mst.cu b/cpp/src/tree/mst.cu index cc3bdc64a2d..54698b588a4 100644 --- a/cpp/src/tree/mst.cu +++ b/cpp/src/tree/mst.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -20,7 +20,7 @@ * @file mst.cu * ---------------------------------------------------------------------------**/ -#include +#include #include #include @@ -28,8 +28,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index b4dcd84a7e1..e9bf9ffe031 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -14,19 +14,22 @@ * limitations under the License. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include -#include #include + +#include #include #include @@ -208,6 +211,7 @@ void populate_graph_container(graph_container_t& graph_container, size_t num_global_edges, bool sorted_by_degree, bool is_weighted, + bool is_symmetric, bool transposed, bool multi_gpu) { @@ -245,7 +249,7 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.do_expensive_check = do_expensive_check; experimental::graph_properties_t graph_props{ - .is_symmetric = false, .is_multigraph = false, .is_weighted = is_weighted}; + .is_symmetric = is_symmetric, .is_multigraph = false, .is_weighted = is_weighted}; graph_container.graph_props = graph_props; graph_container.graph_type = graphTypeEnum::graph_t; @@ -789,6 +793,81 @@ std::unique_ptr call_egonet(raft::handle_t const& handle, CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); } } +// Wrapper for graph generate_rmat_edgelist() +// to expose the API to cython +// enum class generator_distribution_t { POWER_LAW = 0, UNIFORM }; +template +std::unique_ptr call_generate_rmat_edgelist(raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + auto src_dst_tuple = cugraph::generate_rmat_edgelist( + handle, scale, num_edges, a, b, c, seed, clip_and_flip); + + if (scramble_vertex_ids) { + cugraph::scramble_vertex_ids( + handle, std::get<0>(src_dst_tuple), std::get<1>(src_dst_tuple), vertex_t{0}, seed); + } + + graph_generator_t gg_vals{ + std::make_unique(std::get<0>(src_dst_tuple).release()), + std::make_unique(std::get<1>(src_dst_tuple).release())}; + + return std::make_unique(std::move(gg_vals)); +} + +template +std::vector, std::unique_ptr>> +call_generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::generator_distribution_t size_distribution, + cugraph::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids) +{ + auto src_dst_vec_tuple = cugraph::generate_rmat_edgelists(handle, + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip); + + if (scramble_vertex_ids) { + std::for_each( + src_dst_vec_tuple.begin(), src_dst_vec_tuple.end(), [&handle, seed](auto& src_dst_tuple) { + cugraph::scramble_vertex_ids( + handle, std::get<0>(src_dst_tuple), std::get<1>(src_dst_tuple), vertex_t{0}, seed); + }); + } + + std::vector, std::unique_ptr>> + gg_vec; + + std::transform( + src_dst_vec_tuple.begin(), + src_dst_vec_tuple.end(), + std::back_inserter(gg_vec), + [](auto& tpl_dev_uvec) { + return std::make_pair( + std::move(std::make_unique(std::get<0>(tpl_dev_uvec).release())), + std::move(std::make_unique(std::get<1>(tpl_dev_uvec).release()))); + }); + + return gg_vec; +} // Wrapper for random_walks() through a graph container // to expose the API to cython. @@ -800,7 +879,8 @@ call_random_walks(raft::handle_t const& handle, graph_container_t const& graph_container, vertex_t const* ptr_start_set, edge_t num_paths, - edge_t max_depth) + edge_t max_depth, + bool use_padding) { if (graph_container.weightType == numberTypeEnum::floatType) { using weight_t = float; @@ -809,7 +889,7 @@ call_random_walks(raft::handle_t const& handle, detail::create_graph(handle, graph_container); auto triplet = cugraph::experimental::random_walks( - handle, graph->view(), ptr_start_set, num_paths, max_depth); + handle, graph->view(), ptr_start_set, num_paths, max_depth, use_padding); random_walk_ret_t rw_tri{std::get<0>(triplet).size(), std::get<1>(triplet).size(), @@ -828,7 +908,7 @@ call_random_walks(raft::handle_t const& handle, detail::create_graph(handle, graph_container); auto triplet = cugraph::experimental::random_walks( - handle, graph->view(), ptr_start_set, num_paths, max_depth); + handle, graph->view(), ptr_start_set, num_paths, max_depth, use_padding); random_walk_ret_t rw_tri{std::get<0>(triplet).size(), std::get<1>(triplet).size(), @@ -845,6 +925,20 @@ call_random_walks(raft::handle_t const& handle, } } +template +std::unique_ptr call_rw_paths(raft::handle_t const& handle, + index_t num_paths, + index_t const* vertex_path_sizes) +{ + auto triplet = + cugraph::experimental::query_rw_sizes_offsets(handle, num_paths, vertex_path_sizes); + random_walk_path_t rw_path_tri{ + std::make_unique(std::get<0>(triplet).release()), + std::make_unique(std::get<1>(triplet).release()), + std::make_unique(std::get<2>(triplet).release())}; + return std::make_unique(std::move(rw_path_tri)); +} + template std::unique_ptr random_walks_to_coo(raft::handle_t const& handle, random_walk_ret_t& rw_tri) @@ -914,6 +1008,41 @@ void call_sssp(raft::handle_t const& handle, } } +// wrapper for weakly connected components: +// +template +void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* components) +{ + if (graph_container.is_multi_gpu) { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::weakly_connected_components( + handle, graph->view(), reinterpret_cast(components), false); + + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::weakly_connected_components( + handle, graph->view(), reinterpret_cast(components), false); + } + } else { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::weakly_connected_components( + handle, graph->view(), reinterpret_cast(components), false); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::weakly_connected_components( + handle, graph->view(), reinterpret_cast(components), false); + } + } +} + // wrapper for shuffling: // template @@ -1037,10 +1166,11 @@ std::unique_ptr> call_renumber( std::tie( p_ret->get_dv(), p_ret->get_partition(), p_ret->get_num_vertices(), p_ret->get_num_edges()) = cugraph::experimental::renumber_edgelist( - handle, major_ptrs, minor_ptrs, edge_counts, do_expensive_check); + handle, std::nullopt, major_ptrs, minor_ptrs, edge_counts, do_expensive_check); } else { p_ret->get_dv() = cugraph::experimental::renumber_edgelist( handle, + std::nullopt, shuffled_edgelist_major_vertices, shuffled_edgelist_minor_vertices, edge_counts[0], @@ -1239,21 +1369,30 @@ template std::unique_ptr call_random_walks( graph_container_t const& graph_container, int32_t const* ptr_start_set, int32_t num_paths, - int32_t max_depth); + int32_t max_depth, + bool use_padding); template std::unique_ptr call_random_walks( raft::handle_t const& handle, graph_container_t const& graph_container, int32_t const* ptr_start_set, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); template std::unique_ptr call_random_walks( raft::handle_t const& handle, graph_container_t const& graph_container, int64_t const* ptr_start_set, int64_t num_paths, - int64_t max_depth); + int64_t max_depth, + bool use_padding); + +template std::unique_ptr call_rw_paths( + raft::handle_t const& handle, int32_t num_paths, int32_t const* vertex_path_sizes); + +template std::unique_ptr call_rw_paths( + raft::handle_t const& handle, int64_t num_paths, int64_t const* vertex_path_sizes); template std::unique_ptr random_walks_to_coo( raft::handle_t const& handle, random_walk_ret_t& rw_tri); @@ -1292,6 +1431,22 @@ template void call_sssp(raft::handle_t const& handle, int64_t* predecessors, const int64_t source_vertex); +template void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* components); + +template void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* components); + +template void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* components); + +template void call_wcc(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* components); + template std::unique_ptr> call_shuffle( raft::handle_t const& handle, int32_t* edgelist_major_vertices, @@ -1360,5 +1515,53 @@ template std::unique_ptr> call_renumber( bool do_expensive_check, bool multi_gpu); +template std::unique_ptr call_generate_rmat_edgelist( + raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::unique_ptr call_generate_rmat_edgelist( + raft::handle_t const& handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::vector< + std::pair, std::unique_ptr>> +call_generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::generator_distribution_t size_distribution, + cugraph::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + +template std::vector< + std::pair, std::unique_ptr>> +call_generate_rmat_edgelists(raft::handle_t const& handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + cugraph::generator_distribution_t size_distribution, + cugraph::generator_distribution_t edge_distribution, + uint64_t seed, + bool clip_and_flip, + bool scramble_vertex_ids); + } // namespace cython } // namespace cugraph diff --git a/cpp/src/utilities/graph_bcast.cu b/cpp/src/utilities/graph_bcast.cu new file mode 100644 index 00000000000..e06c1508cf9 --- /dev/null +++ b/cpp/src/utilities/graph_bcast.cu @@ -0,0 +1,45 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#include "graph_bcast.cuh" + +namespace cugraph { +namespace broadcast { +using namespace cugraph::experimental; +// Manual template instantiations (EIDir's): +// +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +template graph_t graph_broadcast( + raft::handle_t const& handle, graph_t* graph_ptr); + +} // namespace broadcast +} // namespace cugraph diff --git a/cpp/src/utilities/graph_bcast.cuh b/cpp/src/utilities/graph_bcast.cuh new file mode 100644 index 00000000000..b4007ad20f2 --- /dev/null +++ b/cpp/src/utilities/graph_bcast.cuh @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#pragma once + +#include + +#include +#include + +#include + +namespace cugraph { +namespace broadcast { + +/** + * @brief broadcasts graph_t object (only the single GPU version). + * + * @tparam graph_t Type of graph (view). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_ptr pointer to graph object: not `nullptr` on send, `nullptr` (ignored) on receive. + * @return graph_t object that was sent/received + */ +template +graph_t graph_broadcast(raft::handle_t const& handle, graph_t* graph_ptr) +{ + using namespace cugraph::serializer; + using namespace cugraph::experimental; + + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + if constexpr (!graph_t::is_multi_gpu) { + if (handle.get_comms().get_rank() == 0) { + CUGRAPH_EXPECTS(graph_ptr != nullptr, "Cannot serialize nullptr graph pointer."); + + auto pair = serializer_t::get_device_graph_sz_bytes(*graph_ptr); + thrust::tuple dev_sz_host_sz_bytes = + thrust::make_tuple(pair.first, pair.second); + + auto total_graph_dev_sz = pair.first + pair.second; + + serializer_t ser(handle, total_graph_dev_sz); + serializer_t::graph_meta_t graph_meta{}; + ser.serialize(*graph_ptr, graph_meta); + + int root{0}; + host_scalar_bcast(handle.get_comms(), dev_sz_host_sz_bytes, root, handle.get_stream()); + device_bcast(handle.get_comms(), + ser.get_storage(), + ser.get_storage(), + total_graph_dev_sz, + root, + handle.get_stream()); + + return std::move(*graph_ptr); + } else { + thrust::tuple dev_sz_host_sz_bytes(0, 0); + + int root{0}; + dev_sz_host_sz_bytes = + host_scalar_bcast(handle.get_comms(), dev_sz_host_sz_bytes, root, handle.get_stream()); + // + auto total_graph_dev_sz = + thrust::get<0>(dev_sz_host_sz_bytes) + thrust::get<1>(dev_sz_host_sz_bytes); + + CUGRAPH_EXPECTS(total_graph_dev_sz > 0, "Graph size comm failure."); + + rmm::device_uvector data_buffer(total_graph_dev_sz, + handle.get_stream_view()); + + device_bcast(handle.get_comms(), + data_buffer.data(), + data_buffer.data(), + total_graph_dev_sz, + root, + handle.get_stream()); + + serializer_t ser(handle, data_buffer.data()); + auto graph = ser.unserialize(thrust::get<0>(dev_sz_host_sz_bytes), + thrust::get<1>(dev_sz_host_sz_bytes)); + + return graph; + } + } else { + CUGRAPH_FAIL("Unsupported graph type for broadcasting."); + + return graph_t{handle}; + } +} + +} // namespace broadcast +} // namespace cugraph diff --git a/cpp/src/utilities/graph_utils.cuh b/cpp/src/utilities/graph_utils.cuh index ca0b5831c92..4eeab9376fa 100644 --- a/cpp/src/utilities/graph_utils.cuh +++ b/cpp/src/utilities/graph_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -13,7 +13,7 @@ // Author: Alex Fender afender@nvidia.com #pragma once -#include +#include #include #include diff --git a/cpp/src/utilities/host_barrier.cpp b/cpp/src/utilities/host_barrier.cpp index 1c018d624ed..659e4038c67 100644 --- a/cpp/src/utilities/host_barrier.cpp +++ b/cpp/src/utilities/host_barrier.cpp @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -#include +#include #include diff --git a/cpp/src/utilities/path_retrieval.cu b/cpp/src/utilities/path_retrieval.cu index 93ead5898f8..2d862f659e5 100644 --- a/cpp/src/utilities/path_retrieval.cu +++ b/cpp/src/utilities/path_retrieval.cu @@ -19,8 +19,8 @@ #include -#include -#include +#include +#include namespace cugraph { namespace detail { diff --git a/cpp/src/utilities/spmv_1D.cuh b/cpp/src/utilities/spmv_1D.cuh index 81466595c19..31af0c75585 100644 --- a/cpp/src/utilities/spmv_1D.cuh +++ b/cpp/src/utilities/spmv_1D.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #pragma once #include +#include #include -#include "utilities/error.hpp" namespace cugraph { namespace mg { diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 80484fdfad6..ec18640bc11 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -20,17 +20,41 @@ # - common test utils ----------------------------------------------------------------------------- add_library(cugraphtestutil STATIC - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/matrix_market_file_utilities.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/rmat_utilities.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/thrust_wrapper.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp" - "${CMAKE_CURRENT_SOURCE_DIR}/components/wcc_graphs.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c") + utilities/matrix_market_file_utilities.cu + utilities/rmat_utilities.cu + utilities/thrust_wrapper.cu + utilities/misc_utilities.cpp + components/wcc_graphs.cu + ../../thirdparty/mmio/mmio.c) + +target_compile_options(cugraphtestutil + PUBLIC "$<$:${CUGRAPH_CXX_FLAGS}>" + "$:${CUGRAPH_CUDA_FLAGS}>>" +) set_property(TARGET cugraphtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) target_include_directories(cugraphtestutil + PUBLIC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" + "${CMAKE_CURRENT_SOURCE_DIR}" + "${CUGRAPH_SOURCE_DIR}/src" +) + +target_link_libraries(cugraphtestutil + PUBLIC + cugraph + cuco::cuco + NCCL::NCCL +) + + +add_library(cugraphmgtestutil STATIC + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/device_comm_wrapper.cu") + +set_property(TARGET cugraphmgtestutil PROPERTY POSITION_INDEPENDENT_CODE ON) + +target_include_directories(cugraphmgtestutil PRIVATE "${CUB_INCLUDE_DIR}" "${THRUST_INCLUDE_DIR}" @@ -45,61 +69,28 @@ target_include_directories(cugraphtestutil "${RAFT_DIR}/cpp/include" ) -target_link_libraries(cugraphtestutil cugraph) - -# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the -# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent -# cmake warnings about policy CMP0104. With this setting, arch flags must be -# manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism -# used in cpp/CMakeLists.txt for setting arch options). -# Run "cmake --help-policy CMP0104" for policy details. -# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to -# the findcudatoolkit features in cmake 3.17+ -set_target_properties(cugraphtestutil PROPERTIES - CUDA_ARCHITECTURES OFF) +target_link_libraries(cugraphmgtestutil cugraph) ################################################################################################### # - compiler function ----------------------------------------------------------------------------- -function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) - add_executable(${CMAKE_TEST_NAME} - ${CMAKE_TEST_SRC}) - - target_include_directories(${CMAKE_TEST_NAME} - PRIVATE - "${CUB_INCLUDE_DIR}" - "${THRUST_INCLUDE_DIR}" - "${CUCO_INCLUDE_DIR}" - "${LIBCUDACXX_INCLUDE_DIR}" - "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${RMM_INCLUDE}" - "${NCCL_INCLUDE_DIRS}" - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" - "${CMAKE_CURRENT_SOURCE_DIR}/../include" - "${CMAKE_CURRENT_SOURCE_DIR}/../src" - "${CMAKE_CURRENT_SOURCE_DIR}" - "${RAFT_DIR}/cpp/include" - ) - - target_link_directories(${CMAKE_TEST_NAME} - PRIVATE - # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported - # variable containing the link directories for nvcc. - "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") +function(ConfigureTest CMAKE_TEST_NAME) + add_executable(${CMAKE_TEST_NAME} ${ARGN}) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE - cugraphtestutil - cugraph - GTest::GTest - GTest::Main - ${NCCL_LIBRARIES} - cudart - cuda - cublas - cusparse - cusolver - curand) + cugraphtestutil + cugraph + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main + NCCL::NCCL + CUDA::cublas + CUDA::cusparse + CUDA::cusolver + CUDA::curand + ) if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE @@ -152,59 +143,28 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) - # CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the - # compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent - # cmake warnings about policy CMP0104. With this setting, arch flags must be - # manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism - # used in cpp/CMakeLists.txt for setting arch options). - # Run "cmake --help-policy CMP0104" for policy details. - # NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to - # the findcudatoolkit features in cmake 3.17+ - set_target_properties(${CMAKE_TEST_NAME} PROPERTIES - CUDA_ARCHITECTURES OFF) - add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) endfunction() -function(ConfigureTestMG CMAKE_TEST_NAME CMAKE_TEST_SRC) - add_executable(${CMAKE_TEST_NAME} - ${CMAKE_TEST_SRC}) - - target_include_directories(${CMAKE_TEST_NAME} - PRIVATE - "${CUB_INCLUDE_DIR}" - "${THRUST_INCLUDE_DIR}" - "${CUCO_INCLUDE_DIR}" - "${LIBCUDACXX_INCLUDE_DIR}" - "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${RMM_INCLUDE}" - "${NCCL_INCLUDE_DIRS}" - "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" - "${CMAKE_CURRENT_SOURCE_DIR}/../include" - "${CMAKE_CURRENT_SOURCE_DIR}/../src" - "${CMAKE_CURRENT_SOURCE_DIR}" - "${RAFT_DIR}/cpp/include" - ) - - target_link_directories(${CMAKE_TEST_NAME} - PRIVATE - # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported - # variable containing the link directories for nvcc. - "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") +function(ConfigureTestMG CMAKE_TEST_NAME) + add_executable(${CMAKE_TEST_NAME} ${ARGN}) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE + cugraphmgtestutil cugraphtestutil cugraph - GTest::GTest - GTest::Main - ${NCCL_LIBRARIES} - cudart - cuda - cublas - cusparse - cusolver - curand) + GTest::gmock + GTest::gmock_main + GTest::gtest + GTest::gtest_main + NCCL::NCCL + CUDA::cublas + CUDA::cusparse + CUDA::cusolver + CUDA::curand + MPI::MPI_CXX + ) if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE @@ -257,17 +217,6 @@ function(ConfigureTestMG CMAKE_TEST_NAME CMAKE_TEST_SRC) ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) - # CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the - # compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent - # cmake warnings about policy CMP0104. With this setting, arch flags must be - # manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism - # used in cpp/CMakeLists.txt for setting arch options). - # Run "cmake --help-policy CMP0104" for policy details. - # NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to - # the findcudatoolkit features in cmake 3.17+ - set_target_properties(${CMAKE_TEST_NAME} PROPERTIES - CUDA_ARCHITECTURES OFF) - add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${MPIEXEC_EXECUTABLE} ${MPIEXEC_NUMPROC_FLAG} @@ -292,278 +241,181 @@ endif(RAPIDS_DATASET_ROOT_DIR) ################################################################################################### ################################################################################################### -# - katz centrality tests ------------------------------------------------------------------------- +# - graph generator tests ------------------------------------------------------------------------- -set(KATZ_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/centrality/katz_centrality_test.cu") +set(GRAPH_GENERATORS_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/generators/generators_test.cpp") - ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}") + ConfigureTest(GRAPH_GENERATORS_TEST "${GRAPH_GENERATORS_TEST_SRC}") ################################################################################################### -# - betweenness centrality tests ------------------------------------------------------------------ +# - erdos renyi graph generator tests ------------------------------------------------------------- -set(BETWEENNESS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/centrality/betweenness_centrality_test.cu") +set(ERDOS_RENYI_GENERATOR_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/generators/erdos_renyi_test.cpp") - ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}") + ConfigureTest(ERDOS_RENYI_GENERATOR_TEST "${ERDOS_RENYI_GENERATOR_TEST_SRC}") -set(EDGE_BETWEENNESS_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/centrality/edge_betweenness_centrality_test.cu") +################################################################################################### +# - katz centrality tests ------------------------------------------------------------------------- +ConfigureTest(KATZ_TEST centrality/katz_centrality_test.cu) - ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}") +################################################################################################### +# - betweenness centrality tests ------------------------------------------------------------------ +ConfigureTest(BETWEENNESS_TEST centrality/betweenness_centrality_test.cu) +ConfigureTest(EDGE_BETWEENNESS_TEST centrality/edge_betweenness_centrality_test.cu) ################################################################################################### # - SSSP tests ------------------------------------------------------------------------------------ - -set(SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") - -ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}") +ConfigureTest(SSSP_TEST traversal/sssp_test.cu) ################################################################################################### # - BFS tests ------------------------------------------------------------------------------------- - -set(BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") - -ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}") +ConfigureTest(BFS_TEST traversal/bfs_test.cu) ################################################################################################### # - LOUVAIN tests --------------------------------------------------------------------------------- - -set(LOUVAIN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp") - -ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}") +ConfigureTest(LOUVAIN_TEST community/louvain_test.cpp) ################################################################################################### # - LEIDEN tests --------------------------------------------------------------------------------- - -set(LEIDEN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/leiden_test.cpp") - -ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}") +ConfigureTest(LEIDEN_TEST community/leiden_test.cpp) ################################################################################################### # - ECG tests --------------------------------------------------------------------------------- - -set(ECG_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cpp") - -ConfigureTest(ECG_TEST "${ECG_TEST_SRC}") +ConfigureTest(ECG_TEST community/ecg_test.cpp) ################################################################################################### # - Balanced cut clustering tests ----------------------------------------------------------------- - -set(BALANCED_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/balanced_edge_test.cpp") - -ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}") +ConfigureTest(BALANCED_TEST community/balanced_edge_test.cpp) ################################################################################################### # - TRIANGLE tests -------------------------------------------------------------------------------- - -set(TRIANGLE_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/triangle_test.cu") - -ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}") +ConfigureTest(TRIANGLE_TEST community/triangle_test.cu) ################################################################################################### # - EGO tests -------------------------------------------------------------------------------- +ConfigureTest(EGO_TEST community/egonet_test.cu) -set(EGO_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/egonet_test.cu") - -ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") ################################################################################################### # - RENUMBERING tests ----------------------------------------------------------------------------- - -set(RENUMBERING_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/renumber/renumber_test.cu") - -ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}") +ConfigureTest(RENUMBERING_TEST renumber/renumber_test.cu) ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- - -set(FA2_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/layout/force_atlas2_test.cu") - -ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") +ConfigureTest(FA2_TEST layout/force_atlas2_test.cu) ################################################################################################### # - TSP tests -------------------------------------------------------------------------- - -set(TSP_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/traversal/tsp_test.cu") - - ConfigureTest(TSP_TEST "${TSP_TEST_SRC}" "") +ConfigureTest(TSP_TEST traversal/tsp_test.cu) ################################################################################################### # - CONNECTED COMPONENTS tests ------------------------------------------------------------------- - -set(CONNECT_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/components/con_comp_test.cu") - -ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}") +ConfigureTest(CONNECT_TEST components/con_comp_test.cu) ################################################################################################### # - STRONGLY CONNECTED COMPONENTS tests ---------------------------------------------------------- - -set(SCC_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/components/scc_test.cu") - -ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") +ConfigureTest(SCC_TEST components/scc_test.cu) ################################################################################################### # - WEAKLY CONNECTED COMPONENTS tests ---------------------------------------------------------- - -set(WCC_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/components/wcc_test.cpp") - -ConfigureTest(WCC_TEST "${WCC_TEST_SRC}") +ConfigureTest(WCC_TEST components/wcc_test.cpp) ################################################################################################### -#-Hungarian (Linear Assignment Problem) tests --------------------------------------------------------------------- - -set(HUNGARIAN_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/linear_assignment/hungarian_test.cu") - -ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}") +#-Hungarian (Linear Assignment Problem) tests ---------------------------------------------------- +ConfigureTest(HUNGARIAN_TEST linear_assignment/hungarian_test.cu) ################################################################################################### # - MST tests ---------------------------------------------------------------------------- - -set(MST_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/tree/mst_test.cu") - -ConfigureTest(MST_TEST "${MST_TEST_SRC}") +ConfigureTest(MST_TEST tree/mst_test.cu) ################################################################################################### # - Experimental stream tests ----------------------------------------------------- - -set(EXPERIMENTAL_STREAM_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/streams.cu") - -ConfigureTest(EXPERIMENTAL_STREAM "${EXPERIMENTAL_STREAM_SRCS}" "") +ConfigureTest(EXPERIMENTAL_STREAM experimental/streams.cu) ################################################################################################### # - Experimental R-mat graph generation tests ----------------------------------------------------- - -set(EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/generate_rmat_test.cpp") - -ConfigureTest(EXPERIMENTAL_GENERATE_RMAT_TEST "${EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_GENERATE_RMAT_TEST experimental/generate_rmat_test.cpp) ################################################################################################### # - Experimental Graph tests ---------------------------------------------------------------------- - -set(EXPERIMENTAL_GRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/graph_test.cpp") - -ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_GRAPH_TEST experimental/graph_test.cpp) ################################################################################################### # - Experimental weight-sum tests ----------------------------------------------------------------- - -set(EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/weight_sum_test.cpp") - -ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST experimental/weight_sum_test.cpp) ################################################################################################### # - Experimental degree tests --------------------------------------------------------------------- - -set(EXPERIMENTAL_DEGREE_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/degree_test.cpp") - -ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_DEGREE_TEST experimental/degree_test.cpp) ################################################################################################### # - Experimental coarsening tests ----------------------------------------------------------------- - -set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp") - -ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST experimental/coarsen_graph_test.cpp) ################################################################################################### # - Experimental induced subgraph tests ----------------------------------------------------------- - -set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp") - -ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST experimental/induced_subgraph_test.cpp) ################################################################################################### # - Experimental BFS tests ------------------------------------------------------------------------ - -set(EXPERIMENTAL_BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp") - -ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_BFS_TEST experimental/bfs_test.cpp) ################################################################################################### -# - Experimental BFS tests ------------------------------------------------------------------------ - -set(EXPERIMENTAL_MSBFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/ms_bfs_test.cpp") - -ConfigureTest(EXPERIMENTAL_MSBFS_TEST "${EXPERIMENTAL_MSBFS_TEST_SRCS}") - +# - Experimental Multi-source BFS tests ----------------------------------------------------------- +ConfigureTest(EXPERIMENTAL_MSBFS_TEST experimental/ms_bfs_test.cpp) ################################################################################################### # - Experimental SSSP tests ----------------------------------------------------------------------- - -set(EXPERIMENTAL_SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp") - -ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_SSSP_TEST experimental/sssp_test.cpp) ################################################################################################### # - Experimental PAGERANK tests ------------------------------------------------------------------- - -set(EXPERIMENTAL_PAGERANK_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp") - -ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_PAGERANK_TEST experimental/pagerank_test.cpp) ################################################################################################### # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ - -set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp") - -ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") +ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST experimental/katz_centrality_test.cpp) ################################################################################################### -# - Experimental RANDOM_WALKS tests ------------------------------------------------------------ +# - WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------------- -set(RANDOM_WALKS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/sampling/random_walks_test.cu") +set(WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/components/weakly_connected_components_test.cpp") -ConfigureTest(RANDOM_WALKS_TEST "${RANDOM_WALKS_TEST_SRCS}") +ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST "${WEAKLY_CONNECTED_COMPONENTS_TEST_SRCS}") ################################################################################################### -set(RANDOM_WALKS_LOW_LEVEL_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/sampling/rw_low_level_test.cu") - -ConfigureTest(RANDOM_WALKS_LOW_LEVEL_TEST "${RANDOM_WALKS_LOW_LEVEL_SRCS}") +# - Experimental RANDOM_WALKS tests --------------------------------------------------------------- +ConfigureTest(RANDOM_WALKS_TEST sampling/random_walks_test.cu) ################################################################################################### -set(RANDOM_WALKS_PROFILING_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/sampling/random_walks_profiling.cu") +ConfigureTest(RANDOM_WALKS_LOW_LEVEL_TEST sampling/rw_low_level_test.cu) +################################################################################################### # FIXME: since this is technically not a test, consider refactoring the the # ConfigureTest function to share common code with a new ConfigureBenchmark # function (which would not link gtest, etc.) -ConfigureTest(RANDOM_WALKS_PROFILING "${RANDOM_WALKS_PROFILING_SRCS}") +ConfigureTest(RANDOM_WALKS_PROFILING sampling/random_walks_profiling.cu) + +################################################################################################### +# - Serialization tests --------------------------------------------------------------------------- + +set(SERIALIZATION_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/serialization/un_serialize_test.cpp") + +ConfigureTest(SERIALIZATION_TEST "${SERIALIZATION_TEST_SRCS}") ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- if(BUILD_CUGRAPH_MG_TESTS) + + ############################################################################################### + # - find MPI - only enabled if MG tests are to be built + find_package(MPI REQUIRED) + execute_process( COMMAND nvidia-smi -L COMMAND wc -l @@ -575,54 +427,38 @@ if(BUILD_CUGRAPH_MG_TESTS) if(MPI_CXX_FOUND) ########################################################################################### # - MG PAGERANK tests --------------------------------------------------------------------- - - set(MG_PAGERANK_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/mg_pagerank_test.cpp") - - ConfigureTestMG(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") - target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_PAGERANK_TEST pagerank/mg_pagerank_test.cpp) ########################################################################################### # - MG KATZ CENTRALITY tests -------------------------------------------------------------- - - set(MG_KATZ_CENTRALITY_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_katz_centrality_test.cpp") - - ConfigureTestMG(MG_KATZ_CENTRALITY_TEST "${MG_KATZ_CENTRALITY_TEST_SRCS}") - target_link_libraries(MG_KATZ_CENTRALITY_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_KATZ_CENTRALITY_TEST experimental/mg_katz_centrality_test.cpp) ########################################################################################### # - MG BFS tests -------------------------------------------------------------------------- - - set(MG_BFS_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_bfs_test.cpp") - - ConfigureTestMG(MG_BFS_TEST "${MG_BFS_TEST_SRCS}") - target_link_libraries(MG_BFS_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_BFS_TEST experimental/mg_bfs_test.cpp) ########################################################################################### # - MG SSSP tests ------------------------------------------------------------------------- - - set(MG_SSSP_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/experimental/mg_sssp_test.cpp") - - ConfigureTestMG(MG_SSSP_TEST "${MG_SSSP_TEST_SRCS}") - target_link_libraries(MG_SSSP_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ConfigureTestMG(MG_SSSP_TEST experimental/mg_sssp_test.cpp) ########################################################################################### # - MG LOUVAIN tests ---------------------------------------------------------------------- + ConfigureTestMG(MG_LOUVAIN_TEST + community/mg_louvain_helper.cu + community/mg_louvain_test.cpp) - set(MG_LOUVAIN_TEST_SRCS - "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_helper.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/community/mg_louvain_test.cpp") - - ConfigureTestMG(MG_LOUVAIN_TEST "${MG_LOUVAIN_TEST_SRCS}") - target_link_libraries(MG_LOUVAIN_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + ########################################################################################### + # - MG WEAKLY CONNECTED COMPONENTS tests -------------------------------------------------- + ConfigureTestMG(MG_WEAKLY_CONNECTED_COMPONENTS_TEST + components/mg_weakly_connected_components_test.cpp) - else(MPI_CXX_FOUND) + ########################################################################################### + # - MG GRAPH BROADCAST tests -------------------------------------------------------------- + ConfigureTestMG(MG_GRAPH_BROADCAST_TEST bcast/mg_graph_bcast.cpp) + else() message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") - endif(MPI_CXX_FOUND) -endif(BUILD_CUGRAPH_MG_TESTS) + endif() +endif() ################################################################################################### ### enable testing ################################################################################ diff --git a/cpp/tests/bcast/mg_graph_bcast.cpp b/cpp/tests/bcast/mg_graph_bcast.cpp new file mode 100644 index 00000000000..dbb2460abf4 --- /dev/null +++ b/cpp/tests/bcast/mg_graph_bcast.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Andrei Schaffer, aschaffer@nvidia.com +// +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include + +#include + +//////////////////////////////////////////////////////////////////////////////// +// Test param object. This defines the input and expected output for a test, and +// will be instantiated as the parameter to the tests defined below using +// INSTANTIATE_TEST_SUITE_P() +// +struct GraphBcast_Usecase { + std::string graph_file_full_path{}; + + // FIXME: We really should have a Graph_Testparms_Base class or something + // like that which can handle this graph_full_path thing. + // + explicit GraphBcast_Usecase(std::string const& graph_file_path) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +}; + +//////////////////////////////////////////////////////////////////////////////// +// Parameterized test fixture, to be used with TEST_P(). This defines common +// setup and teardown steps as well as common utilities used by each E2E MG +// test. In this case, each test is identical except for the inputs and +// expected outputs, so the entire test is defined in the run_test() method. +// +class GraphBcast_MG_Testfixture : public ::testing::TestWithParam { + public: + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + // Run once for each test instance + // + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of broadcasting a graph, + // by comparing the graph that was sent (`sg_graph`) + // with th eone that was received (`graph-copy`): + // + template + void run_test(const GraphBcast_Usecase& param) + { + using namespace cugraph::broadcast; + using sg_graph_t = cugraph::experimental::graph_t; + + raft::handle_t handle; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + const auto& comm = handle.get_comms(); + + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + cudaStream_t stream = handle.get_stream(); + + sg_graph_t sg_graph(handle); + + rmm::device_uvector d_renumber_map_labels(0, stream); + + std::tie(sg_graph, d_renumber_map_labels) = + cugraph::test::read_graph_from_matrix_market_file( + handle, param.graph_file_full_path, true, /*renumber=*/false); + + if (comm_rank == 0) { + graph_broadcast(handle, &sg_graph); + ; + } else { + sg_graph_t* g_ignore{nullptr}; + auto graph_copy = graph_broadcast(handle, g_ignore); + auto [same, str_fail] = cugraph::test::compare_graphs(handle, sg_graph, graph_copy); + + if (!same) std::cerr << "Graph comparison failed on " << str_fail << '\n'; + + ASSERT_TRUE(same); + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// +TEST_P(GraphBcast_MG_Testfixture, CheckInt32Int32Float) +{ + run_test(GetParam()); +} + +INSTANTIATE_TEST_SUITE_P(simple_test, + GraphBcast_MG_Testfixture, + ::testing::Values(GraphBcast_Usecase("test/datasets/karate.mtx") + //,GraphBcast_Usecase("test/datasets/smallworld.mtx") + )); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/centrality/betweenness_centrality_test.cu b/cpp/tests/centrality/betweenness_centrality_test.cu index 89168618b9c..4cafab68986 100644 --- a/cpp/tests/centrality/betweenness_centrality_test.cu +++ b/cpp/tests/centrality/betweenness_centrality_test.cu @@ -18,8 +18,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/centrality/edge_betweenness_centrality_test.cu b/cpp/tests/centrality/edge_betweenness_centrality_test.cu index 50cbef86e11..e31af4dba77 100644 --- a/cpp/tests/centrality/edge_betweenness_centrality_test.cu +++ b/cpp/tests/centrality/edge_betweenness_centrality_test.cu @@ -26,8 +26,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/centrality/katz_centrality_test.cu b/cpp/tests/centrality/katz_centrality_test.cu index 114a89858b8..44e52a7626f 100644 --- a/cpp/tests/centrality/katz_centrality_test.cu +++ b/cpp/tests/centrality/katz_centrality_test.cu @@ -20,8 +20,8 @@ #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/community/balanced_edge_test.cpp b/cpp/tests/community/balanced_edge_test.cpp index 81cee945821..a4bd8de769f 100644 --- a/cpp/tests/community/balanced_edge_test.cpp +++ b/cpp/tests/community/balanced_edge_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -10,7 +10,7 @@ */ #include -#include +#include #include diff --git a/cpp/tests/community/ecg_test.cpp b/cpp/tests/community/ecg_test.cpp index a13ee2fe360..0f0960b0abb 100644 --- a/cpp/tests/community/ecg_test.cpp +++ b/cpp/tests/community/ecg_test.cpp @@ -10,8 +10,8 @@ */ #include -#include -#include +#include +#include #include diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu index 27a235ee15b..6f1ca4eb374 100644 --- a/cpp/tests/community/egonet_test.cu +++ b/cpp/tests/community/egonet_test.cu @@ -18,9 +18,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index 9083400f85c..a586810b6b6 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -10,8 +10,8 @@ */ #include -#include -#include +#include +#include #include diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index 43d274e6723..821e8651d70 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -16,9 +16,9 @@ #include #include -#include +#include -#include +#include #include #include diff --git a/cpp/tests/community/mg_louvain_helper.cu b/cpp/tests/community/mg_louvain_helper.cu index d62eaa1ec55..935c36c9232 100644 --- a/cpp/tests/community/mg_louvain_helper.cu +++ b/cpp/tests/community/mg_louvain_helper.cu @@ -16,11 +16,11 @@ #include "mg_louvain_helper.hpp" -#include +#include -#include -#include -#include +#include +#include +#include #include @@ -31,74 +31,6 @@ namespace cugraph { namespace test { -template -rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, - T const *d_input, - size_t size) -{ - auto rx_sizes = - cugraph::experimental::host_scalar_gather(handle.get_comms(), size, 0, handle.get_stream()); - std::vector rx_displs(static_cast(handle.get_comms().get_rank()) == 0 - ? handle.get_comms().get_size() - : int{0}, - size_t{0}); - if (static_cast(handle.get_comms().get_rank()) == 0) { - std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); - } - - auto total_size = thrust::reduce(thrust::host, rx_sizes.begin(), rx_sizes.end()); - rmm::device_uvector gathered_v(total_size, handle.get_stream()); - - cugraph::experimental::device_gatherv(handle.get_comms(), - d_input, - gathered_v.data(), - size, - rx_sizes, - rx_displs, - 0, - handle.get_stream()); - - return gathered_v; -} - -template -bool compare_renumbered_vectors(raft::handle_t const &handle, - rmm::device_uvector const &v1, - rmm::device_uvector const &v2) -{ - vertex_t max = 1 + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - v1.begin(), - v1.end(), - vertex_t{0}); - - rmm::device_uvector map(max, handle.get_stream()); - - auto iter = thrust::make_zip_iterator(thrust::make_tuple(v1.begin(), v2.begin())); - - thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - iter, - iter + v1.size(), - [d_map = map.data()] __device__(auto pair) { - vertex_t e1 = thrust::get<0>(pair); - vertex_t e2 = thrust::get<1>(pair); - - d_map[e1] = e2; - }); - - auto error_count = - thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - iter, - iter + v1.size(), - [d_map = map.data()] __device__(auto pair) { - vertex_t e1 = thrust::get<0>(pair); - vertex_t e2 = thrust::get<1>(pair); - - return (d_map[e1] != e2); - }); - - return (error_count == 0); -} - template void single_gpu_renumber_edgelist_given_number_map(raft::handle_t const &handle, rmm::device_uvector &edgelist_rows_v, @@ -336,14 +268,6 @@ template void single_gpu_renumber_edgelist_given_number_map( rmm::device_uvector &d_edgelist_cols, rmm::device_uvector &d_renumber_map_gathered_v); -template rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, - int const *d_input, - size_t size); - -template bool compare_renumbered_vectors(raft::handle_t const &handle, - rmm::device_uvector const &v1, - rmm::device_uvector const &v2); - template std::unique_ptr> coarsen_graph( raft::handle_t const &handle, diff --git a/cpp/tests/community/mg_louvain_helper.hpp b/cpp/tests/community/mg_louvain_helper.hpp index 43eb294cd13..5ed710b7417 100644 --- a/cpp/tests/community/mg_louvain_helper.hpp +++ b/cpp/tests/community/mg_louvain_helper.hpp @@ -16,7 +16,7 @@ #pragma once -#include +#include #include #include @@ -24,10 +24,10 @@ namespace cugraph { namespace test { -template -rmm::device_uvector gather_distributed_vector(raft::handle_t const &handle, - T const *d_input, - size_t size); +template +bool compare_renumbered_vectors(raft::handle_t const &handle, + std::vector const &v1, + std::vector const &v2); template bool compare_renumbered_vectors(raft::handle_t const &handle, diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index 4b398f0a4aa..9c6d7bb4491 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -17,10 +17,12 @@ #include "mg_louvain_helper.hpp" #include +#include #include -#include -#include +#include +#include +#include #include #include @@ -128,14 +130,14 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam handle, d_edgelist_rows, d_edgelist_cols, d_renumber_map_gathered_v); std::tie(*sg_graph, std::ignore) = - cugraph::test::generate_graph_from_edgelist( + cugraph::experimental::create_graph_from_edgelist( handle, - std::move(d_vertices), + std::optional>{ + std::make_tuple(d_vertices.data(), static_cast(d_vertices.size()))}, std::move(d_edgelist_rows), std::move(d_edgelist_cols), std::move(d_edgelist_weights), - is_symmetric, - true, + cugraph::experimental::graph_properties_t{is_symmetric, false, true}, false); } @@ -144,7 +146,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam thrust::make_counting_iterator(dendrogram.num_levels()), [&dendrogram, &sg_graph, &d_clustering_v, &sg_modularity, &handle, resolution, rank]( size_t i) { - auto d_dendrogram_gathered_v = cugraph::test::gather_distributed_vector( + auto d_dendrogram_gathered_v = cugraph::test::device_gatherv( handle, dendrogram.get_level_ptr_nocheck(i), dendrogram.get_level_size_nocheck(i)); if (rank == 0) { @@ -155,7 +157,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam std::tie(std::ignore, sg_modularity) = cugraph::louvain(handle, graph_view, d_clustering_v.data(), size_t{1}, resolution); - EXPECT_TRUE(cugraph::test::compare_renumbered_vectors( + EXPECT_TRUE(cugraph::test::renumbered_vectors_same( handle, d_clustering_v, d_dendrogram_gathered_v)); sg_graph = @@ -207,7 +209,7 @@ class Louvain_MG_Testfixture : public ::testing::TestWithParam SCOPED_TRACE("compare modularity input: " + param.graph_file_full_path); - auto d_renumber_map_gathered_v = cugraph::test::gather_distributed_vector( + auto d_renumber_map_gathered_v = cugraph::test::device_gatherv( handle, d_renumber_map_labels.data(), d_renumber_map_labels.size()); compare_sg_results(handle, diff --git a/cpp/tests/community/triangle_test.cu b/cpp/tests/community/triangle_test.cu index 1c5c99261d2..b40c4734a14 100644 --- a/cpp/tests/community/triangle_test.cu +++ b/cpp/tests/community/triangle_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -10,8 +10,8 @@ */ #include -#include -#include +#include +#include #include @@ -63,4 +63,47 @@ TEST(triangle, dolphin) ASSERT_EQ(count, expected); } +TEST(triangle, karate) +{ + using vertex_t = int32_t; + using edge_t = int32_t; + using weight_t = float; + + std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, + 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, + 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156}; + std::vector ind_h = { + 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13, 17, 19, + 21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0, 6, 10, 0, 6, + 10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2, 33, 0, 4, 5, 0, 0, 3, + 0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, 1, 33, 32, 33, 0, 1, 32, + 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, 29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26, + 32, 33, 1, 8, 32, 33, 0, 24, 25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, + 33, 8, 9, 13, 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32}; + + std::vector w_h(ind_h.size(), weight_t{1.0}); + + vertex_t num_verts = off_h.size() - 1; + int num_edges = ind_h.size(); + + uint64_t expected{135}; + + rmm::device_vector offsets_v(off_h); + rmm::device_vector indices_v(ind_h); + rmm::device_vector weights_v(w_h); + + cugraph::GraphCSRView graph_csr( + offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + + uint64_t count{0}; + + try { + count = cugraph::triangle::triangle_count(graph_csr); + } catch (std::exception& e) { + std::cout << "Exception: " << e.what() << std::endl; + } + + ASSERT_EQ(count, expected); +} + CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/con_comp_test.cu b/cpp/tests/components/con_comp_test.cu index fdae77f2384..e394d5fc97c 100644 --- a/cpp/tests/components/con_comp_test.cu +++ b/cpp/tests/components/con_comp_test.cu @@ -18,9 +18,9 @@ #include -#include #include -#include +#include +#include #include #include diff --git a/cpp/tests/components/mg_weakly_connected_components_test.cpp b/cpp/tests/components/mg_weakly_connected_components_test.cpp new file mode 100644 index 00000000000..a64919c4f92 --- /dev/null +++ b/cpp/tests/components/mg_weakly_connected_components_test.cpp @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + +struct WeaklyConnectedComponents_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_MGWeaklyConnectedComponents + : public ::testing::TestWithParam< + std::tuple> { + public: + Tests_MGWeaklyConnectedComponents() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of running weakly connected components on multiple GPUs to that of a + // single-GPU run + template + void run_current_test( + WeaklyConnectedComponents_Usecase const& weakly_connected_components_usecase, + input_usecase_t const& input_usecase) + { + using weight_t = float; + + // 1. initialize handle + + raft::handle_t handle{}; + HighResClock hr_clock{}; + + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + auto row_comm_size = static_cast(sqrt(static_cast(comm_size))); + while (comm_size % row_comm_size != 0) { --row_comm_size; } + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + // 2. create MG graph + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + + cugraph::experimental::graph_t mg_graph(handle); + rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); + std::tie(mg_graph, d_mg_renumber_map_labels) = + input_usecase.template construct_graph( + handle, false, true); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto mg_graph_view = mg_graph.view(); + + // 3. run MG weakly connected components + + rmm::device_uvector d_mg_components(mg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + hr_clock.start(); + } + + cugraph::experimental::weakly_connected_components( + handle, mg_graph_view, d_mg_components.data()); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "MG weakly_connected_components took " << elapsed_time * 1e-6 << " s.\n"; + } + + // 4. compare SG & MG results + + if (weakly_connected_components_usecase.check_correctness) { + // 4-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_components = + cugraph::test::device_gatherv(handle, d_mg_components.data(), d_mg_components.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 4-2. unrenumbr MG results + + std::tie(std::ignore, d_mg_aggregate_components) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_components.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 4-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, false, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 4-4. run SG weakly connected components + + rmm::device_uvector d_sg_components(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::weakly_connected_components( + handle, sg_graph_view, d_sg_components.data()); + + // 4-5. compare + + std::vector h_mg_aggregate_components(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_components.data(), + d_mg_aggregate_components.data(), + d_mg_aggregate_components.size(), + handle.get_stream()); + + std::vector h_sg_components(sg_graph_view.get_number_of_vertices()); + raft::update_host(h_sg_components.data(), + d_sg_components.data(), + d_sg_components.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + std::unordered_map mg_to_sg_map{}; + for (size_t i = 0; i < h_sg_components.size(); ++i) { + mg_to_sg_map.insert({h_mg_aggregate_components[i], h_sg_components[i]}); + } + std::transform(h_mg_aggregate_components.begin(), + h_mg_aggregate_components.end(), + h_mg_aggregate_components.begin(), + [&mg_to_sg_map](auto mg_c) { return mg_to_sg_map[mg_c]; }); + + ASSERT_TRUE(std::equal( + h_sg_components.begin(), h_sg_components.end(), h_mg_aggregate_components.begin())) + << "components do not match with the SG values."; + } + } + } +}; + +using Tests_MGWeaklyConnectedComponents_File = + Tests_MGWeaklyConnectedComponents; +using Tests_MGWeaklyConnectedComponents_Rmat = + Tests_MGWeaklyConnectedComponents; + +TEST_P(Tests_MGWeaklyConnectedComponents_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGWeaklyConnectedComponents_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGWeaklyConnectedComponents_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(WeaklyConnectedComponents_Usecase{0}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/polbooks.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGWeaklyConnectedComponents_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, true, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_test, + Tests_MGWeaklyConnectedComponents_Rmat, + ::testing::Values( + // disable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{false}, + cugraph::test::Rmat_Usecase( + 20, 16, 0.57, 0.19, 0.19, 0, true, false, 0, true)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/scc_test.cu b/cpp/tests/components/scc_test.cu index b875a459bd0..0d2e87c40a2 100644 --- a/cpp/tests/components/scc_test.cu +++ b/cpp/tests/components/scc_test.cu @@ -16,10 +16,10 @@ #include #include -#include #include #include -#include +#include +#include #include #include diff --git a/cpp/tests/components/wcc_graphs.cu b/cpp/tests/components/wcc_graphs.cu index fb11f872fb8..8cbe18ed71e 100644 --- a/cpp/tests/components/wcc_graphs.cu +++ b/cpp/tests/components/wcc_graphs.cu @@ -12,6 +12,8 @@ #include #include +#include + #include #include @@ -68,15 +70,16 @@ LineGraph_Usecase::construct_graph(raft::handle_t const& handle, handle.get_stream_view().synchronize(); - return generate_graph_from_edgelist( - handle, - std::move(vertices_v), - std::move(src_v), - std::move(dst_v), - std::move(weights_v), - true, - false, - false); + return cugraph::experimental:: + create_graph_from_edgelist( + handle, + std::optional>{ + std::make_tuple(vertices_v.data(), static_cast(vertices_v.size()))}, + std::move(src_v), + std::move(dst_v), + std::move(weights_v), + cugraph::experimental::graph_properties_t{true, false, false}, + false); } template std::tuple, diff --git a/cpp/tests/components/wcc_graphs.hpp b/cpp/tests/components/wcc_graphs.hpp index 2b5955c2b78..18989b9b46f 100644 --- a/cpp/tests/components/wcc_graphs.hpp +++ b/cpp/tests/components/wcc_graphs.hpp @@ -9,7 +9,7 @@ * */ -#include +#include #include diff --git a/cpp/tests/components/wcc_test.cpp b/cpp/tests/components/wcc_test.cpp index 962ecefe8f3..9f6254d445f 100644 --- a/cpp/tests/components/wcc_test.cpp +++ b/cpp/tests/components/wcc_test.cpp @@ -9,12 +9,12 @@ * */ -#include #include +#include #include -#include -#include +#include +#include #include #include @@ -42,9 +42,13 @@ class Tests_WCC : public ::testing::TestWithParam graph(handle); + std::cout << "calling construct_graph" << std::endl; + std::tie(graph, std::ignore) = - input_usecase.template construct_graph( - handle, false, false); + cugraph::test::construct_graph( + handle, input_usecase, false, false); + + std::cout << "back from construct_graph" << std::endl; auto graph_view = graph.view(); @@ -59,7 +63,7 @@ class Tests_WCC : public ::testing::TestWithParam; using Tests_WCC_Rmat = Tests_WCC; -using Tests_WCC_LineGraph = Tests_WCC; +using Tests_WCC_PathGraph = Tests_WCC; TEST_P(Tests_WCC_File, WCC) { @@ -71,7 +75,7 @@ TEST_P(Tests_WCC_Rmat, WCC) auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_WCC_LineGraph, WCC) +TEST_P(Tests_WCC_PathGraph, WCC) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); @@ -89,9 +93,13 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(WCC_Usecase{}, cugraph::test::File_Usecase("test/datasets/hollywood.mtx")))); INSTANTIATE_TEST_SUITE_P( - line_graph_test, - Tests_WCC_LineGraph, - ::testing::Values(std::make_tuple(WCC_Usecase{}, cugraph::test::LineGraph_Usecase(1000)), - std::make_tuple(WCC_Usecase{}, cugraph::test::LineGraph_Usecase(100000)))); + path_graph_test, + Tests_WCC_PathGraph, + ::testing::Values(std::make_tuple(WCC_Usecase{}, + cugraph::test::PathGraph_Usecase( + std::vector>({{1000, 0}}))), + std::make_tuple(WCC_Usecase{}, + cugraph::test::PathGraph_Usecase( + std::vector>({{100000, 0}}))))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/weakly_connected_components_test.cpp b/cpp/tests/components/weakly_connected_components_test.cpp new file mode 100644 index 00000000000..6523b6a280a --- /dev/null +++ b/cpp/tests/components/weakly_connected_components_test.cpp @@ -0,0 +1,267 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +// do the perf measurements +// enabled by command line parameter s'--perf' +// +static int PERF = 0; + +template +void weakly_connected_components_reference(edge_t const* offsets, + vertex_t const* indices, + vertex_t* components, + vertex_t num_vertices) +{ + vertex_t depth{0}; + + std::fill(components, + components + num_vertices, + cugraph::experimental::invalid_component_id::value); + + vertex_t num_scanned{0}; + while (true) { + auto it = std::find(components + num_scanned, + components + num_vertices, + cugraph::experimental::invalid_component_id::value); + if (it == components + num_vertices) { break; } + num_scanned += static_cast(std::distance(components + num_scanned, it)); + auto source = num_scanned; + *(components + source) = source; + std::vector cur_frontier_rows{source}; + std::vector new_frontier_rows{}; + + while (cur_frontier_rows.size() > 0) { + for (auto const row : cur_frontier_rows) { + auto nbr_offset_first = *(offsets + row); + auto nbr_offset_last = *(offsets + row + 1); + for (auto nbr_offset = nbr_offset_first; nbr_offset != nbr_offset_last; ++nbr_offset) { + auto nbr = *(indices + nbr_offset); + if (*(components + nbr) == cugraph::experimental::invalid_component_id::value) { + *(components + nbr) = source; + new_frontier_rows.push_back(nbr); + } + } + } + std::swap(cur_frontier_rows, new_frontier_rows); + new_frontier_rows.clear(); + } + } + + return; +} + +struct WeaklyConnectedComponents_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_WeaklyConnectedComponent + : public ::testing::TestWithParam< + std::tuple> { + public: + Tests_WeaklyConnectedComponent() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test( + WeaklyConnectedComponents_Usecase const& weakly_connected_components_usecase, + input_usecase_t const& input_usecase) + { + constexpr bool renumber = true; + + using weight_t = float; + + raft::handle_t handle{}; + HighResClock hr_clock{}; + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } + + cugraph::experimental::graph_t graph(handle); + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + std::tie(graph, d_renumber_map_labels) = + input_usecase.template construct_graph( + handle, false, renumber); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "construct_graph took " << elapsed_time * 1e-6 << " s.\n"; + } + + auto graph_view = graph.view(); + ASSERT_TRUE(graph_view.is_symmetric()) + << "Weakly connected components works only on undirected (symmetric) graphs."; + + rmm::device_uvector d_components(graph_view.get_number_of_vertices(), + handle.get_stream()); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_clock.start(); + } + + cugraph::experimental::weakly_connected_components(handle, graph_view, d_components.data()); + + if (PERF) { + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + double elapsed_time{0.0}; + hr_clock.stop(&elapsed_time); + std::cout << "weakly_connected_components took " << elapsed_time * 1e-6 << " s.\n"; + } + + if (weakly_connected_components_usecase.check_correctness) { + cugraph::experimental::graph_t unrenumbered_graph( + handle); + if (renumber) { + std::tie(unrenumbered_graph, std::ignore) = + input_usecase.template construct_graph( + handle, false, false); + } + auto unrenumbered_graph_view = renumber ? unrenumbered_graph.view() : graph_view; + + std::vector h_offsets(unrenumbered_graph_view.get_number_of_vertices() + 1); + std::vector h_indices(unrenumbered_graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + unrenumbered_graph_view.offsets(), + unrenumbered_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + unrenumbered_graph_view.indices(), + unrenumbered_graph_view.get_number_of_edges(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + std::vector h_reference_components( + unrenumbered_graph_view.get_number_of_vertices()); + + weakly_connected_components_reference(h_offsets.data(), + h_indices.data(), + h_reference_components.data(), + unrenumbered_graph_view.get_number_of_vertices()); + + std::vector h_cugraph_components(graph_view.get_number_of_vertices()); + if (renumber) { + rmm::device_uvector d_unrenumbered_components(size_t{0}, + handle.get_stream_view()); + std::tie(std::ignore, d_unrenumbered_components) = cugraph::test::sort_by_key( + handle, d_renumber_map_labels.data(), d_components.data(), d_renumber_map_labels.size()); + raft::update_host(h_cugraph_components.data(), + d_unrenumbered_components.data(), + d_unrenumbered_components.size(), + handle.get_stream()); + } else { + raft::update_host(h_cugraph_components.data(), + d_components.data(), + d_components.size(), + handle.get_stream()); + } + handle.get_stream_view().synchronize(); + + std::unordered_map cuda_to_reference_map{}; + for (size_t i = 0; i < h_reference_components.size(); ++i) { + cuda_to_reference_map.insert({h_cugraph_components[i], h_reference_components[i]}); + } + std::transform( + h_cugraph_components.begin(), + h_cugraph_components.end(), + h_cugraph_components.begin(), + [&cuda_to_reference_map](auto cugraph_c) { return cuda_to_reference_map[cugraph_c]; }); + + ASSERT_TRUE(std::equal( + h_reference_components.begin(), h_reference_components.end(), h_cugraph_components.begin())) + << "components do not match with the reference values."; + } + } +}; + +using Tests_WeaklyConnectedComponents_File = + Tests_WeaklyConnectedComponent; +using Tests_WeaklyConnectedComponents_Rmat = + Tests_WeaklyConnectedComponent; + +// FIXME: add tests for type combinations +TEST_P(Tests_WeaklyConnectedComponents_File, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_WeaklyConnectedComponents_Rmat, CheckInt32Int32) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_WeaklyConnectedComponents_File, + ::testing::Values( + // enable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::File_Usecase("test/datasets/karate.mtx")), + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::File_Usecase("test/datasets/polbooks.mtx")), + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_WeaklyConnectedComponents_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{}, + cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_test, + Tests_WeaklyConnectedComponents_Rmat, + ::testing::Values( + // disable correctness checks + std::make_tuple(WeaklyConnectedComponents_Usecase{false}, + cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp index 44b664c5b92..3fea9f371e0 100644 --- a/cpp/tests/experimental/bfs_test.cpp +++ b/cpp/tests/experimental/bfs_test.cpp @@ -16,13 +16,14 @@ #include #include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -212,12 +213,15 @@ class Tests_BFS : public ::testing::TestWithParam d_unrenumbered_distances(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_distances) = cugraph::test::sort_by_key( handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); - auto d_unrenumbered_predecessors = cugraph::test::sort_by_key(handle, - d_renumber_map_labels.data(), - d_predecessors.data(), - d_renumber_map_labels.size()); + rmm::device_uvector d_unrenumbered_predecessors(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_predecessors) = + cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_predecessors.data(), + d_renumber_map_labels.size()); raft::update_host(h_cugraph_distances.data(), d_unrenumbered_distances.data(), d_unrenumbered_distances.size(), diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp index 5943a5cd286..7f76094fa0f 100644 --- a/cpp/tests/experimental/coarsen_graph_test.cpp +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -17,10 +17,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp index ea7cc246df0..80f1b51f80c 100644 --- a/cpp/tests/experimental/degree_test.cpp +++ b/cpp/tests/experimental/degree_test.cpp @@ -17,9 +17,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp index 60c3a322725..7c2dbb3911a 100644 --- a/cpp/tests/experimental/generate_rmat_test.cpp +++ b/cpp/tests/experimental/generate_rmat_test.cpp @@ -18,8 +18,8 @@ #include #include -#include -#include +#include +#include #include #include @@ -178,7 +178,7 @@ class Tests_GenerateRmat : public ::testing::TestWithParam CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - std::tie(d_srcs, d_dsts) = cugraph::experimental::generate_rmat_edgelist( + std::tie(d_srcs, d_dsts) = cugraph::generate_rmat_edgelist( handle, configuration.scale, (size_t{1} << configuration.scale) * configuration.edge_factor, @@ -186,8 +186,8 @@ class Tests_GenerateRmat : public ::testing::TestWithParam configuration.b, configuration.c, uint64_t{0}, - configuration.clip_and_flip, - static_cast(scramble)); + configuration.clip_and_flip); + // static_cast(scramble)); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -290,15 +290,15 @@ typedef struct GenerateRmats_Usecase_t { size_t min_scale{0}; size_t max_scale{0}; size_t edge_factor{0}; - cugraph::experimental::generator_distribution_t component_distribution; - cugraph::experimental::generator_distribution_t edge_distribution; + cugraph::generator_distribution_t component_distribution; + cugraph::generator_distribution_t edge_distribution; GenerateRmats_Usecase_t(size_t n_edgelists, size_t min_scale, size_t max_scale, size_t edge_factor, - cugraph::experimental::generator_distribution_t component_distribution, - cugraph::experimental::generator_distribution_t edge_distribution) + cugraph::generator_distribution_t component_distribution, + cugraph::generator_distribution_t edge_distribution) : n_edgelists(n_edgelists), min_scale(min_scale), max_scale(max_scale), @@ -322,15 +322,14 @@ class Tests_GenerateRmats : public ::testing::TestWithParam(handle, - configuration.n_edgelists, - configuration.min_scale, - configuration.max_scale, - configuration.edge_factor, - configuration.component_distribution, - configuration.edge_distribution, - uint64_t{0}); + auto outputs = cugraph::generate_rmat_edgelists(handle, + configuration.n_edgelists, + configuration.min_scale, + configuration.max_scale, + configuration.edge_factor, + configuration.component_distribution, + configuration.edge_distribution, + uint64_t{0}); CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement ASSERT_EQ(configuration.n_edgelists, outputs.size()); @@ -346,29 +345,28 @@ TEST_P(Tests_GenerateRmats, CheckInt32) { run_current_test(GetParam()); INSTANTIATE_TEST_SUITE_P( simple_test, Tests_GenerateRmats, - ::testing::Values( - GenerateRmats_Usecase(8, - 1, - 16, - 32, - cugraph::experimental::generator_distribution_t::UNIFORM, - cugraph::experimental::generator_distribution_t::UNIFORM), - GenerateRmats_Usecase(8, - 1, - 16, - 32, - cugraph::experimental::generator_distribution_t::UNIFORM, - cugraph::experimental::generator_distribution_t::POWER_LAW), - GenerateRmats_Usecase(8, - 3, - 16, - 32, - cugraph::experimental::generator_distribution_t::POWER_LAW, - cugraph::experimental::generator_distribution_t::UNIFORM), - GenerateRmats_Usecase(8, - 3, - 16, - 32, - cugraph::experimental::generator_distribution_t::POWER_LAW, - cugraph::experimental::generator_distribution_t::POWER_LAW))); + ::testing::Values(GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::generator_distribution_t::UNIFORM, + cugraph::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 1, + 16, + 32, + cugraph::generator_distribution_t::UNIFORM, + cugraph::generator_distribution_t::POWER_LAW), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::generator_distribution_t::POWER_LAW, + cugraph::generator_distribution_t::UNIFORM), + GenerateRmats_Usecase(8, + 3, + 16, + 32, + cugraph::generator_distribution_t::POWER_LAW, + cugraph::generator_distribution_t::POWER_LAW))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp index bdf56ae7aff..ae899c3ba33 100644 --- a/cpp/tests/experimental/graph_test.cpp +++ b/cpp/tests/experimental/graph_test.cpp @@ -17,8 +17,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp index 2d49c174d7e..8a69da1475a 100644 --- a/cpp/tests/experimental/induced_subgraph_test.cpp +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -17,9 +17,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index 232d82a1c91..aa66e69d4f7 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -16,13 +16,14 @@ #include #include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -225,7 +226,9 @@ class Tests_KatzCentrality std::vector h_cugraph_katz_centralities(graph_view.get_number_of_vertices()); if (renumber) { - auto d_unrenumbered_katz_centralities = + rmm::device_uvector d_unrenumbered_katz_centralities(size_t{0}, + handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_katz_centralities) = cugraph::test::sort_by_key(handle, d_renumber_map_labels.data(), d_katz_centralities.data(), diff --git a/cpp/tests/experimental/mg_bfs_test.cpp b/cpp/tests/experimental/mg_bfs_test.cpp index f6e0a57e602..04eb1bf7b43 100644 --- a/cpp/tests/experimental/mg_bfs_test.cpp +++ b/cpp/tests/experimental/mg_bfs_test.cpp @@ -16,14 +16,16 @@ #include #include +#include +#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -80,8 +82,10 @@ class Tests_MGBFS : public ::testing::TestWithParam mg_graph(handle); rmm::device_uvector d_mg_renumber_map_labels(0, handle.get_stream()); std::tie(mg_graph, d_mg_renumber_map_labels) = @@ -90,6 +94,7 @@ class Tests_MGBFS : public ::testing::TestWithParam sg_graph(handle); - std::tie(sg_graph, std::ignore) = - input_usecase.template construct_graph( - handle, false, false); - - auto sg_graph_view = sg_graph.view(); - - std::vector vertex_partition_lasts(comm_size); - for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { - vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); - } - - rmm::device_scalar d_source(static_cast(bfs_usecase.source), - handle.get_stream()); - cugraph::experimental::unrenumber_int_vertices( - handle, - d_source.data(), - size_t{1}, - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - true); - auto unrenumbered_source = d_source.value(handle.get_stream()); - - // 5-2. run SG BFS - - rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - rmm::device_uvector d_sg_predecessors(sg_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - - cugraph::experimental::bfs(handle, - sg_graph_view, - d_sg_distances.data(), - d_sg_predecessors.data(), - unrenumbered_source, - false, - std::numeric_limits::max()); - - // 5-3. compare - - std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); - std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); - raft::update_host(h_sg_offsets.data(), - sg_graph_view.offsets(), - sg_graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_sg_indices.data(), - sg_graph_view.indices(), - sg_graph_view.get_number_of_edges(), - handle.get_stream()); - - std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); - std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); - raft::update_host( - h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); - raft::update_host(h_sg_predecessors.data(), - d_sg_predecessors.data(), - d_sg_predecessors.size(), - handle.get_stream()); - - std::vector h_mg_distances(mg_graph_view.get_number_of_local_vertices()); - std::vector h_mg_predecessors(mg_graph_view.get_number_of_local_vertices()); - raft::update_host( - h_mg_distances.data(), d_mg_distances.data(), d_mg_distances.size(), handle.get_stream()); - cugraph::experimental::unrenumber_int_vertices( - handle, - d_mg_predecessors.data(), - d_mg_predecessors.size(), - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - true); - raft::update_host(h_mg_predecessors.data(), - d_mg_predecessors.data(), - d_mg_predecessors.size(), - handle.get_stream()); - - std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); - - handle.get_stream_view().synchronize(); - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(h_mg_distances[i] == h_sg_distances[mapped_vertex]) - << "MG BFS distance for vertex: " << mapped_vertex << " in rank: " << comm_rank - << " has value: " << h_mg_distances[i] - << " different from the corresponding SG value: " << h_sg_distances[mapped_vertex]; - if (h_mg_predecessors[i] == cugraph::invalid_vertex_id::value) { - ASSERT_TRUE(h_sg_predecessors[mapped_vertex] == h_mg_predecessors[i]) - << "vertex reachability does not match with the SG result."; - } else { - ASSERT_TRUE(h_sg_distances[h_mg_predecessors[i]] + 1 == h_sg_distances[mapped_vertex]) - << "distances to this vertex != distances to the predecessor vertex + 1."; - bool found{false}; - for (auto j = h_sg_offsets[h_mg_predecessors[i]]; - j < h_sg_offsets[h_mg_predecessors[i] + 1]; - ++j) { - if (h_sg_indices[j] == mapped_vertex) { - found = true; - break; + // 4-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_distances = + cugraph::test::device_gatherv(handle, d_mg_distances.data(), d_mg_distances.size()); + auto d_mg_aggregate_predecessors = + cugraph::test::device_gatherv(handle, d_mg_predecessors.data(), d_mg_predecessors.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 4-2. unrenumbr MG results + + cugraph::experimental::unrenumber_int_vertices( + handle, + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.size(), + d_mg_aggregate_renumber_map_labels.data(), + vertex_t{0}, + mg_graph_view.get_number_of_vertices(), + std::vector{mg_graph_view.get_number_of_vertices()}); + + std::tie(std::ignore, d_mg_aggregate_distances) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_distances.data(), + d_mg_aggregate_renumber_map_labels.size()); + std::tie(std::ignore, d_mg_aggregate_predecessors) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 4-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, false, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 4-4. run SG BFS + + rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + rmm::device_uvector d_sg_predecessors( + sg_graph_view.get_number_of_local_vertices(), handle.get_stream()); + + vertex_t unrenumbered_source{}; + raft::update_host(&unrenumbered_source, + d_mg_aggregate_renumber_map_labels.data() + bfs_usecase.source, + size_t{1}, + handle.get_stream()); + handle.get_stream_view().synchronize(); + + cugraph::experimental::bfs(handle, + sg_graph_view, + d_sg_distances.data(), + d_sg_predecessors.data(), + unrenumbered_source, + false, + std::numeric_limits::max()); + // 4-5. compare + + std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); + std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); + raft::update_host(h_sg_offsets.data(), + sg_graph_view.offsets(), + sg_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_sg_indices.data(), + sg_graph_view.indices(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + + std::vector h_mg_aggregate_distances(mg_graph_view.get_number_of_vertices()); + std::vector h_mg_aggregate_predecessors(mg_graph_view.get_number_of_vertices()); + + raft::update_host(h_mg_aggregate_distances.data(), + d_mg_aggregate_distances.data(), + d_mg_aggregate_distances.size(), + handle.get_stream()); + raft::update_host(h_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.size(), + handle.get_stream()); + + std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); + std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); + + raft::update_host( + h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); + raft::update_host(h_sg_predecessors.data(), + d_sg_predecessors.data(), + d_sg_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + ASSERT_TRUE(std::equal(h_mg_aggregate_distances.begin(), + h_mg_aggregate_distances.end(), + h_sg_distances.begin())); + for (size_t i = 0; i < h_mg_aggregate_predecessors.size(); ++i) { + if (h_mg_aggregate_predecessors[i] == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_sg_predecessors[i] == h_mg_aggregate_predecessors[i]) + << "vertex reachability does not match with the SG result."; + } else { + ASSERT_TRUE(h_sg_distances[h_mg_aggregate_predecessors[i]] + 1 == h_sg_distances[i]) + << "distances to this vertex != distances to the predecessor vertex + 1."; + bool found{false}; + for (auto j = h_sg_offsets[h_mg_aggregate_predecessors[i]]; + j < h_sg_offsets[h_mg_aggregate_predecessors[i] + 1]; + ++j) { + if (h_sg_indices[j] == i) { + found = true; + break; + } } + ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; } - ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; } } } @@ -275,20 +293,20 @@ INSTANTIATE_TEST_SUITE_P( cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); -INSTANTIATE_TEST_SUITE_P( - rmat_small_test, - Tests_MGBFS_Rmat, - ::testing::Values( - // enable correctness checks - std::make_tuple(BFS_Usecase{0}, - cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); - -INSTANTIATE_TEST_SUITE_P( - rmat_large_test, - Tests_MGBFS_Rmat, - ::testing::Values( - // disable correctness checks for large graphs - std::make_tuple(BFS_Usecase{0, false}, - cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGBFS_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(BFS_Usecase{0}, + cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_test, + Tests_MGBFS_Rmat, + ::testing::Values( + // disable correctness checks for large graphs + std::make_tuple(BFS_Usecase{0, false}, + cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_katz_centrality_test.cpp b/cpp/tests/experimental/mg_katz_centrality_test.cpp index 864b68caf33..27ef64d124e 100644 --- a/cpp/tests/experimental/mg_katz_centrality_test.cpp +++ b/cpp/tests/experimental/mg_katz_centrality_test.cpp @@ -16,11 +16,13 @@ #include #include +#include +#include #include #include -#include -#include +#include +#include #include #include @@ -77,6 +79,7 @@ class Tests_MGKatzCentrality if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); hr_clock.start(); } cugraph::experimental::graph_t mg_graph(handle); @@ -87,6 +90,7 @@ class Tests_MGKatzCentrality if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; @@ -109,6 +113,7 @@ class Tests_MGKatzCentrality if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); hr_clock.start(); } @@ -124,6 +129,7 @@ class Tests_MGKatzCentrality if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "MG Katz Centrality took " << elapsed_time * 1e-6 << " s.\n"; @@ -132,68 +138,81 @@ class Tests_MGKatzCentrality // 5. copmare SG & MG results if (katz_usecase.check_correctness) { - // 5-1. create SG graph - - cugraph::experimental::graph_t sg_graph(handle); - std::tie(sg_graph, std::ignore) = - input_usecase.template construct_graph( - handle, true, false); - - auto sg_graph_view = sg_graph.view(); - - // 5-3. run SG Katz Centrality - - rmm::device_uvector d_sg_katz_centralities(sg_graph_view.get_number_of_vertices(), - handle.get_stream()); - - cugraph::experimental::katz_centrality(handle, - sg_graph_view, - static_cast(nullptr), - d_sg_katz_centralities.data(), - alpha, - beta, - epsilon, - std::numeric_limits::max(), // max_iterations - false); - - // 5-4. compare - - std::vector h_sg_katz_centralities(sg_graph_view.get_number_of_vertices()); - raft::update_host(h_sg_katz_centralities.data(), - d_sg_katz_centralities.data(), - d_sg_katz_centralities.size(), - handle.get_stream()); - - std::vector h_mg_katz_centralities(mg_graph_view.get_number_of_local_vertices()); - raft::update_host(h_mg_katz_centralities.data(), - d_mg_katz_centralities.data(), - d_mg_katz_centralities.size(), - handle.get_stream()); - - std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); - - handle.get_stream_view().synchronize(); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low KatzCentrality verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_katz_centralities[i], h_sg_katz_centralities[mapped_vertex])) - << "MG KatzCentrality value for vertex: " << mapped_vertex << " in rank: " << comm_rank - << " has value: " << h_mg_katz_centralities[i] - << " which exceeds the error margin for comparing to SG value: " - << h_sg_katz_centralities[mapped_vertex]; + // 5-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_katz_centralities = cugraph::test::device_gatherv( + handle, d_mg_katz_centralities.data(), d_mg_katz_centralities.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 5-2. unrenumbr MG results + + std::tie(std::ignore, d_mg_aggregate_katz_centralities) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_katz_centralities.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 5-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, true, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 5-4. run SG Katz Centrality + + rmm::device_uvector d_sg_katz_centralities(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::katz_centrality( + handle, + sg_graph_view, + static_cast(nullptr), + d_sg_katz_centralities.data(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), // max_iterations + false); + + // 5-5. compare + + std::vector h_mg_aggregate_katz_centralities( + mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_katz_centralities.data(), + d_mg_aggregate_katz_centralities.data(), + d_mg_aggregate_katz_centralities.size(), + handle.get_stream()); + + std::vector h_sg_katz_centralities(sg_graph_view.get_number_of_vertices()); + raft::update_host(h_sg_katz_centralities.data(), + d_sg_katz_centralities.data(), + d_sg_katz_centralities.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low KatzCentrality verties (lowly ranked + // vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_mg_aggregate_katz_centralities.begin(), + h_mg_aggregate_katz_centralities.end(), + h_sg_katz_centralities.begin(), + nearly_equal)); } } } @@ -232,7 +251,7 @@ INSTANTIATE_TEST_SUITE_P(rmat_small_test, ::testing::Values(KatzCentrality_Usecase{false}, KatzCentrality_Usecase{true}), ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); INSTANTIATE_TEST_SUITE_P(rmat_large_test, Tests_MGKatzCentrality_Rmat, @@ -241,6 +260,6 @@ INSTANTIATE_TEST_SUITE_P(rmat_large_test, ::testing::Values(KatzCentrality_Usecase{false, false}, KatzCentrality_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase( - 20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/mg_sssp_test.cpp b/cpp/tests/experimental/mg_sssp_test.cpp index 70f1a95e1f4..da5120163df 100644 --- a/cpp/tests/experimental/mg_sssp_test.cpp +++ b/cpp/tests/experimental/mg_sssp_test.cpp @@ -16,14 +16,16 @@ #include #include +#include +#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -77,6 +79,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam mg_graph(handle); @@ -87,6 +90,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam sg_graph(handle); - std::tie(sg_graph, std::ignore) = - input_usecase.template construct_graph( - handle, true, false); - - auto sg_graph_view = sg_graph.view(); - - std::vector vertex_partition_lasts(comm_size); - for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { - vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); - } - - rmm::device_scalar d_source(static_cast(sssp_usecase.source), - handle.get_stream()); - cugraph::experimental::unrenumber_int_vertices( - handle, - d_source.data(), - size_t{1}, - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - true); - auto unrenumbered_source = d_source.value(handle.get_stream()); - - // 5-2. run SG SSSP - - rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - rmm::device_uvector d_sg_predecessors(sg_graph_view.get_number_of_local_vertices(), - handle.get_stream()); - - // FIXME: disable do_expensive_check - cugraph::experimental::sssp(handle, - sg_graph_view, - d_sg_distances.data(), - d_sg_predecessors.data(), - unrenumbered_source, - std::numeric_limits::max()); - - // 5-3. compare - - std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); - std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); - std::vector h_sg_weights(sg_graph_view.get_number_of_edges()); - raft::update_host(h_sg_offsets.data(), - sg_graph_view.offsets(), - sg_graph_view.get_number_of_vertices() + 1, - handle.get_stream()); - raft::update_host(h_sg_indices.data(), - sg_graph_view.indices(), - sg_graph_view.get_number_of_edges(), - handle.get_stream()); - raft::update_host(h_sg_weights.data(), - sg_graph_view.weights(), - sg_graph_view.get_number_of_edges(), - handle.get_stream()); - - std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); - std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); - raft::update_host( - h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); - raft::update_host(h_sg_predecessors.data(), - d_sg_predecessors.data(), - d_sg_predecessors.size(), - handle.get_stream()); - - std::vector h_mg_distances(mg_graph_view.get_number_of_local_vertices()); - std::vector h_mg_predecessors(mg_graph_view.get_number_of_local_vertices()); - raft::update_host( - h_mg_distances.data(), d_mg_distances.data(), d_mg_distances.size(), handle.get_stream()); - cugraph::experimental::unrenumber_int_vertices( - handle, - d_mg_predecessors.data(), - d_mg_predecessors.size(), - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - true); - raft::update_host(h_mg_predecessors.data(), - d_mg_predecessors.data(), - d_mg_predecessors.size(), - handle.get_stream()); - - std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); - - handle.get_stream_view().synchronize(); - - auto max_weight_element = std::max_element(h_sg_weights.begin(), h_sg_weights.end()); - auto epsilon = *max_weight_element * weight_t{1e-6}; - auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_distances[i], h_sg_distances[mapped_vertex])) - << "MG SSSP distance for vertex: " << mapped_vertex << " in rank: " << comm_rank - << " has value: " << h_mg_distances[i] - << " different from the corresponding SG value: " << h_sg_distances[mapped_vertex]; - if (h_mg_predecessors[i] == cugraph::invalid_vertex_id::value) { - ASSERT_TRUE(h_sg_predecessors[mapped_vertex] == h_mg_predecessors[i]) - << "vertex reachability does not match with the SG result."; - } else { - auto pred_distance = h_sg_distances[h_mg_predecessors[i]]; - bool found{false}; - for (auto j = h_sg_offsets[h_mg_predecessors[i]]; - j < h_sg_offsets[h_mg_predecessors[i] + 1]; - ++j) { - if (h_sg_indices[j] == mapped_vertex) { - if (nearly_equal(pred_distance + h_sg_weights[j], h_sg_distances[mapped_vertex])) { - found = true; - break; + // 4-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_distances = + cugraph::test::device_gatherv(handle, d_mg_distances.data(), d_mg_distances.size()); + auto d_mg_aggregate_predecessors = + cugraph::test::device_gatherv(handle, d_mg_predecessors.data(), d_mg_predecessors.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 4-2. unrenumber MG results + + cugraph::experimental::unrenumber_int_vertices( + handle, + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.size(), + d_mg_aggregate_renumber_map_labels.data(), + vertex_t{0}, + mg_graph_view.get_number_of_vertices(), + std::vector{mg_graph_view.get_number_of_vertices()}); + + std::tie(std::ignore, d_mg_aggregate_distances) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_distances.data(), + d_mg_aggregate_renumber_map_labels.size()); + std::tie(std::ignore, d_mg_aggregate_predecessors) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 4-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, true, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_TRUE(mg_graph_view.get_number_of_vertices() == + sg_graph_view.get_number_of_vertices()); + + // 4-4. run SG SSSP + + rmm::device_uvector d_sg_distances(sg_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector d_sg_predecessors( + sg_graph_view.get_number_of_local_vertices(), handle.get_stream()); + vertex_t unrenumbered_source{}; + raft::update_host(&unrenumbered_source, + d_mg_aggregate_renumber_map_labels.data() + sssp_usecase.source, + size_t{1}, + handle.get_stream()); + handle.get_stream_view().synchronize(); + + cugraph::experimental::sssp(handle, + sg_graph_view, + d_sg_distances.data(), + d_sg_predecessors.data(), + unrenumbered_source, + std::numeric_limits::max()); + + // 4-5. compare + + std::vector h_sg_offsets(sg_graph_view.get_number_of_vertices() + 1); + std::vector h_sg_indices(sg_graph_view.get_number_of_edges()); + std::vector h_sg_weights(sg_graph_view.get_number_of_edges()); + raft::update_host(h_sg_offsets.data(), + sg_graph_view.offsets(), + sg_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_sg_indices.data(), + sg_graph_view.indices(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_sg_weights.data(), + sg_graph_view.weights(), + sg_graph_view.get_number_of_edges(), + handle.get_stream()); + + std::vector h_mg_aggregate_distances(mg_graph_view.get_number_of_vertices()); + std::vector h_mg_aggregate_predecessors(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_distances.data(), + d_mg_aggregate_distances.data(), + d_mg_aggregate_distances.size(), + handle.get_stream()); + raft::update_host(h_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.data(), + d_mg_aggregate_predecessors.size(), + handle.get_stream()); + + std::vector h_sg_distances(sg_graph_view.get_number_of_vertices()); + std::vector h_sg_predecessors(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_distances.data(), d_sg_distances.data(), d_sg_distances.size(), handle.get_stream()); + raft::update_host(h_sg_predecessors.data(), + d_sg_predecessors.data(), + d_sg_predecessors.size(), + handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto max_weight_element = std::max_element(h_sg_weights.begin(), h_sg_weights.end()); + auto epsilon = *max_weight_element * weight_t{1e-6}; + auto nearly_equal = [epsilon](auto lhs, auto rhs) { + return std::fabs(lhs - rhs) < epsilon; + }; + + ASSERT_TRUE(std::equal(h_mg_aggregate_distances.begin(), + h_mg_aggregate_distances.end(), + h_sg_distances.begin(), + nearly_equal)); + + for (size_t i = 0; i < h_mg_aggregate_predecessors.size(); ++i) { + if (h_mg_aggregate_predecessors[i] == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_sg_predecessors[i] == h_mg_aggregate_predecessors[i]) + << "vertex reachability does not match with the SG result."; + } else { + auto pred_distance = h_sg_distances[h_mg_aggregate_predecessors[i]]; + bool found{false}; + for (auto j = h_sg_offsets[h_mg_aggregate_predecessors[i]]; + j < h_sg_offsets[h_mg_aggregate_predecessors[i] + 1]; + ++j) { + if (h_sg_indices[j] == i) { + if (nearly_equal(pred_distance + h_sg_weights[j], h_sg_distances[i])) { + found = true; + break; + } } } + ASSERT_TRUE(found) + << "no edge from the predecessor vertex to this vertex with the matching weight."; } - ASSERT_TRUE(found) - << "no edge from the predecessor vertex to this vertex with the matching weight."; } } } @@ -282,20 +299,20 @@ INSTANTIATE_TEST_SUITE_P( std::make_tuple(SSSP_Usecase{1000}, cugraph::test::File_Usecase("test/datasets/wiki2003.mtx")))); -INSTANTIATE_TEST_SUITE_P( - rmat_small_test, - Tests_MGSSSP_Rmat, - ::testing::Values( - // enable correctness checks - std::make_tuple(SSSP_Usecase{0}, - cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); - -INSTANTIATE_TEST_SUITE_P( - rmat_large_test, - Tests_MGSSSP_Rmat, - ::testing::Values( - // disable correctness checks for large graphs - std::make_tuple(SSSP_Usecase{0, false}, - cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGSSSP_Rmat, + ::testing::Values( + // enable correctness checks + std::make_tuple(SSSP_Usecase{0}, + cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P(rmat_large_test, + Tests_MGSSSP_Rmat, + ::testing::Values( + // disable correctness checks for large graphs + std::make_tuple(SSSP_Usecase{0, false}, + cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/ms_bfs_test.cpp b/cpp/tests/experimental/ms_bfs_test.cpp index eec51f105ab..b8d1e43f81e 100644 --- a/cpp/tests/experimental/ms_bfs_test.cpp +++ b/cpp/tests/experimental/ms_bfs_test.cpp @@ -18,10 +18,10 @@ #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 1e26245b74c..5c0b0f288d4 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -16,13 +16,14 @@ #include #include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -303,10 +304,11 @@ class Tests_PageRank d_renumber_map_labels.data(), vertex_t{0}, graph_view.get_number_of_vertices()); - cugraph::test::sort_by_key(handle, - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_values.data(), - d_unrenumbered_personalization_vertices.size()); + std::tie(d_unrenumbered_personalization_vertices, d_unrenumbered_personalization_values) = + cugraph::test::sort_by_key(handle, + d_unrenumbered_personalization_vertices.data(), + d_unrenumbered_personalization_values.data(), + d_unrenumbered_personalization_vertices.size()); raft::update_host(h_unrenumbered_personalization_vertices.data(), d_unrenumbered_personalization_vertices.data(), @@ -346,7 +348,8 @@ class Tests_PageRank std::vector h_cugraph_pageranks(graph_view.get_number_of_vertices()); if (renumber) { - auto d_unrenumbered_pageranks = cugraph::test::sort_by_key( + rmm::device_uvector d_unrenumbered_pageranks(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_pageranks) = cugraph::test::sort_by_key( handle, d_renumber_map_labels.data(), d_pageranks.data(), d_renumber_map_labels.size()); raft::update_host(h_cugraph_pageranks.data(), d_unrenumbered_pageranks.data(), diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp index d84c1c2fc6c..9a50553a114 100644 --- a/cpp/tests/experimental/sssp_test.cpp +++ b/cpp/tests/experimental/sssp_test.cpp @@ -16,13 +16,14 @@ #include #include +#include #include #include -#include -#include -#include -#include +#include +#include +#include +#include #include #include @@ -220,12 +221,15 @@ class Tests_SSSP : public ::testing::TestWithParam d_unrenumbered_distances(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_distances) = cugraph::test::sort_by_key( handle, d_renumber_map_labels.data(), d_distances.data(), d_renumber_map_labels.size()); - auto d_unrenumbered_predecessors = cugraph::test::sort_by_key(handle, - d_renumber_map_labels.data(), - d_predecessors.data(), - d_renumber_map_labels.size()); + rmm::device_uvector d_unrenumbered_predecessors(size_t{0}, handle.get_stream()); + std::tie(std::ignore, d_unrenumbered_predecessors) = + cugraph::test::sort_by_key(handle, + d_renumber_map_labels.data(), + d_predecessors.data(), + d_renumber_map_labels.size()); raft::update_host(h_cugraph_distances.data(), d_unrenumbered_distances.data(), diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp index d04cba2d132..0320438c9a6 100644 --- a/cpp/tests/experimental/weight_sum_test.cpp +++ b/cpp/tests/experimental/weight_sum_test.cpp @@ -17,9 +17,9 @@ #include #include -#include -#include -#include +#include +#include +#include #include #include diff --git a/cpp/tests/generators/erdos_renyi_test.cpp b/cpp/tests/generators/erdos_renyi_test.cpp new file mode 100644 index 00000000000..c91a9af7c41 --- /dev/null +++ b/cpp/tests/generators/erdos_renyi_test.cpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include + +#include + +#include + +struct GenerateErdosRenyiTest : public ::testing::Test { +}; + +template +void test_symmetric(std::vector &h_src_v, std::vector &h_dst_v) +{ + std::vector reverse_src_v(h_src_v.size()); + std::vector reverse_dst_v(h_dst_v.size()); + + std::copy(h_src_v.begin(), h_src_v.end(), reverse_dst_v.begin()); + std::copy(h_dst_v.begin(), h_dst_v.end(), reverse_src_v.begin()); + + thrust::sort(thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(h_src_v.begin(), h_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(h_src_v.end(), h_dst_v.end()))); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(reverse_src_v.begin(), reverse_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(reverse_src_v.end(), reverse_dst_v.end()))); + + EXPECT_EQ(reverse_src_v, h_src_v); + EXPECT_EQ(reverse_dst_v, h_dst_v); +} + +template +void er_test(size_t num_vertices, float p) +{ + raft::handle_t handle; + rmm::device_uvector d_src_v(0, handle.get_stream()); + rmm::device_uvector d_dst_v(0, handle.get_stream()); + + std::tie(d_src_v, d_dst_v) = + cugraph::generate_erdos_renyi_graph_edgelist_gnp(handle, num_vertices, p, 0); + + handle.get_stream_view().synchronize(); + + std::vector h_src_v(d_src_v.size()); + std::vector h_dst_v(d_dst_v.size()); + + raft::update_host(h_src_v.data(), d_src_v.data(), d_src_v.size(), handle.get_stream()); + raft::update_host(h_dst_v.data(), d_dst_v.data(), d_dst_v.size(), handle.get_stream()); + + handle.get_stream_view().synchronize(); + + float expected_edge_count = p * num_vertices * num_vertices; + + ASSERT_GE(h_src_v.size(), static_cast(expected_edge_count * 0.8)); + ASSERT_LE(h_src_v.size(), static_cast(expected_edge_count * 1.2)); + ASSERT_EQ(std::count_if(h_src_v.begin(), + h_src_v.end(), + [n = static_cast(num_vertices)](auto v) { + return !cugraph::experimental::is_valid_vertex(n, v); + }), + 0); + ASSERT_EQ(std::count_if(h_dst_v.begin(), + h_dst_v.end(), + [n = static_cast(num_vertices)](auto v) { + return !cugraph::experimental::is_valid_vertex(n, v); + }), + 0); +} + +TEST_F(GenerateErdosRenyiTest, ERTest) +{ + er_test(size_t{10}, float{0.1}); + er_test(size_t{20}, float{0.1}); + er_test(size_t{50}, float{0.1}); + er_test(size_t{10000}, float{0.1}); +} + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/generators/generators_test.cpp b/cpp/tests/generators/generators_test.cpp new file mode 100644 index 00000000000..11e63d81f36 --- /dev/null +++ b/cpp/tests/generators/generators_test.cpp @@ -0,0 +1,689 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include + +#include +#include +#include + +#include + +struct GeneratorsTest : public ::testing::Test { +}; + +TEST_F(GeneratorsTest, PathGraphTest) +{ + using vertex_t = int32_t; + + std::vector expected_src_v({0, 1, 2, 3}); + std::vector expected_dst_v({1, 2, 3, 4}); + std::vector actual_src_v; + std::vector actual_dst_v; + + std::vector> parameters({{5, 0}}); + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::tie(src_v, dst_v) = cugraph::generate_path_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, Mesh2DGraphTest) +{ + using vertex_t = int32_t; + + std::vector expected_src_v({0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, + 20, 21, 22, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19}); + std::vector expected_dst_v({1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, + 21, 22, 23, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters( + {{4, 2, 0}, {4, 2, 8}, {4, 2, 16}}); + + std::tie(src_v, dst_v) = cugraph::generate_2d_mesh_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, Mesh3DGraphTest) +{ + using vertex_t = int32_t; + + std::vector expected_src_v( + {0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24, 25, 27, 28, 30, 31, 33, 34, + 36, 37, 39, 40, 42, 43, 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, 69, 70, + 72, 73, 75, 76, 78, 79, 0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 18, 19, 20, 21, 22, 23, + 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 50, 54, 55, 56, 57, 58, 59, + 63, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 77, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}); + + std::vector expected_dst_v( + {1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19, 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, + 37, 38, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, 71, + 73, 74, 76, 77, 79, 80, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, + 30, 31, 32, 33, 34, 35, 39, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 53, 57, 58, 59, 60, 61, 62, + 66, 67, 68, 69, 70, 71, 75, 76, 77, 78, 79, 80, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80}); + + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters( + {{3, 3, 3, 0}, {3, 3, 3, 27}, {3, 3, 3, 54}}); + + std::tie(src_v, dst_v) = cugraph::generate_3d_mesh_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CompleteGraphTestTriangles) +{ + using vertex_t = int32_t; + + std::vector expected_src_v({0, 0, 1, 3, 3, 4, 6, 6, 7}); + std::vector expected_dst_v({1, 2, 2, 4, 5, 5, 7, 8, 8}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{3, 0}, {3, 3}, {3, 6}}); + + std::tie(src_v, dst_v) = cugraph::generate_complete_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CompleteGraphTest5) +{ + using vertex_t = int32_t; + + size_t num_vertices{5}; + size_t num_graphs{3}; + + std::vector expected_src_v({0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 5, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 10, 10, 10, 10, 11, 11, 11, 12, 12, 13}); + std::vector expected_dst_v({1, 2, 3, 4, 2, 3, 4, 3, 4, 4, 6, 7, 8, 9, 7, + 8, 9, 8, 9, 9, 11, 12, 13, 14, 12, 13, 14, 13, 14, 14}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{5, 0}, {5, 5}, {5, 10}}); + + std::tie(src_v, dst_v) = cugraph::generate_complete_graph_edgelist(handle, parameters); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, LineGraphTestSymmetric) +{ + using vertex_t = int32_t; + + size_t num_vertices{5}; + std::vector expected_src_v({0, 1, 2, 3, 1, 2, 3, 4}); + std::vector expected_dst_v({1, 2, 3, 4, 0, 1, 2, 3}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{5, 0}}); + + std::tie(src_v, dst_v) = cugraph::generate_path_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, Mesh2DGraphTestSymmetric) +{ + using vertex_t = int32_t; + + size_t x{4}; + size_t y{2}; + size_t num_graphs{3}; + + std::vector expected_src_v({0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, + 20, 21, 22, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19, + 1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, + 21, 22, 23, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23}); + std::vector expected_dst_v({1, 2, 3, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, + 21, 22, 23, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, + 0, 1, 2, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, + 20, 21, 22, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters( + {{4, 2, 0}, {4, 2, 8}, {4, 2, 16}}); + + std::tie(src_v, dst_v) = cugraph::generate_2d_mesh_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, Mesh3DGraphTestSymmetric) +{ + using vertex_t = int32_t; + + size_t x{3}; + size_t y{3}; + size_t z{3}; + size_t num_graphs{3}; + + std::vector expected_src_v( + {0, 1, 3, 4, 6, 7, 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24, 25, 27, 28, 30, 31, 33, 34, + 36, 37, 39, 40, 42, 43, 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, 69, 70, + 72, 73, 75, 76, 78, 79, 0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 18, 19, 20, 21, 22, 23, + 27, 28, 29, 30, 31, 32, 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 50, 54, 55, 56, 57, 58, 59, + 63, 64, 65, 66, 67, 68, 72, 73, 74, 75, 76, 77, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, + 12, 13, 14, 15, 16, 17, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 1, 2, 4, 5, 7, 8, + 10, 11, 13, 14, 16, 17, 19, 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, 37, 38, 40, 41, 43, 44, + 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, 71, 73, 74, 76, 77, 79, 80, + 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, 30, 31, 32, 33, 34, 35, + 39, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 53, 57, 58, 59, 60, 61, 62, 66, 67, 68, 69, 70, 71, + 75, 76, 77, 78, 79, 80, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 63, 64, 65, 66, 67, 68, + 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80}); + + std::vector expected_dst_v( + {1, 2, 4, 5, 7, 8, 10, 11, 13, 14, 16, 17, 19, 20, 22, 23, 25, 26, 28, 29, 31, 32, 34, 35, + 37, 38, 40, 41, 43, 44, 46, 47, 49, 50, 52, 53, 55, 56, 58, 59, 61, 62, 64, 65, 67, 68, 70, 71, + 73, 74, 76, 77, 79, 80, 3, 4, 5, 6, 7, 8, 12, 13, 14, 15, 16, 17, 21, 22, 23, 24, 25, 26, + 30, 31, 32, 33, 34, 35, 39, 40, 41, 42, 43, 44, 48, 49, 50, 51, 52, 53, 57, 58, 59, 60, 61, 62, + 66, 67, 68, 69, 70, 71, 75, 76, 77, 78, 79, 80, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, + 21, 22, 23, 24, 25, 26, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 0, 1, 3, 4, 6, 7, + 9, 10, 12, 13, 15, 16, 18, 19, 21, 22, 24, 25, 27, 28, 30, 31, 33, 34, 36, 37, 39, 40, 42, 43, + 45, 46, 48, 49, 51, 52, 54, 55, 57, 58, 60, 61, 63, 64, 66, 67, 69, 70, 72, 73, 75, 76, 78, 79, + 0, 1, 2, 3, 4, 5, 9, 10, 11, 12, 13, 14, 18, 19, 20, 21, 22, 23, 27, 28, 29, 30, 31, 32, + 36, 37, 38, 39, 40, 41, 45, 46, 47, 48, 49, 50, 54, 55, 56, 57, 58, 59, 63, 64, 65, 66, 67, 68, + 72, 73, 74, 75, 76, 77, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 54, 55, 56, 57, 58, 59, + 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71}); + + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters( + {{3, 3, 3, 0}, {3, 3, 3, 27}, {3, 3, 3, 54}}); + + std::tie(src_v, dst_v) = cugraph::generate_3d_mesh_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CompleteGraphTestTrianglesSymmetric) +{ + using vertex_t = int32_t; + + size_t num_vertices{3}; + size_t num_graphs{3}; + + std::vector expected_src_v({0, 0, 1, 3, 3, 4, 6, 6, 7, 1, 2, 2, 4, 5, 5, 7, 8, 8}); + std::vector expected_dst_v({1, 2, 2, 4, 5, 5, 7, 8, 8, 0, 0, 1, 3, 3, 4, 6, 6, 7}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{3, 0}, {3, 3}, {3, 6}}); + + std::tie(src_v, dst_v) = cugraph::generate_complete_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CompleteGraphTest5Symmetric) +{ + using vertex_t = int32_t; + + size_t num_vertices{5}; + size_t num_graphs{3}; + + std::vector expected_src_v({0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 5, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 10, 10, 10, 10, 11, 11, 11, 12, 12, 13, + 1, 2, 3, 4, 2, 3, 4, 3, 4, 4, 6, 7, 8, 9, 7, + 8, 9, 8, 9, 9, 11, 12, 13, 14, 12, 13, 14, 13, 14, 14}); + std::vector expected_dst_v({1, 2, 3, 4, 2, 3, 4, 3, 4, 4, 6, 7, 8, 9, 7, + 8, 9, 8, 9, 9, 11, 12, 13, 14, 12, 13, 14, 13, 14, 14, + 0, 0, 0, 0, 1, 1, 1, 2, 2, 3, 5, 5, 5, 5, 6, + 6, 6, 7, 7, 8, 10, 10, 10, 10, 11, 11, 11, 12, 12, 13}); + std::vector actual_src_v; + std::vector actual_dst_v; + + raft::handle_t handle; + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters({{5, 0}, {5, 5}, {5, 10}}); + + std::tie(src_v, dst_v) = cugraph::generate_complete_graph_edgelist(handle, parameters); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(src_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CombineGraphsTest) +{ + using vertex_t = int32_t; + using weight_t = float; + + raft::handle_t handle; + + size_t num_vertices{8}; + + std::vector expected_src_v({0, 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 13, 14, 16, 17, 18, + 20, 21, 22, 0, 1, 2, 3, 8, 9, 10, 11, 16, 17, 18, 19}); + std::vector expected_dst_v({1, 2, 3, 4, 5, 6, 7, 9, 10, 11, 13, 14, 15, 17, 18, 19, + 21, 22, 23, 4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23}); + + rmm::device_uvector src_graph_1_v(0, handle.get_stream()); + rmm::device_uvector dst_graph_1_v(0, handle.get_stream()); + rmm::device_uvector src_graph_2_v(0, handle.get_stream()); + rmm::device_uvector dst_graph_2_v(0, handle.get_stream()); + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters1({{num_vertices, 0}}); + std::vector> parameters2( + {{4, 2, 0}, {4, 2, 8}, {4, 2, 16}}); + + std::tie(src_graph_1_v, dst_graph_1_v) = + cugraph::generate_path_graph_edgelist(handle, parameters1); + std::tie(src_graph_2_v, dst_graph_2_v) = + cugraph::generate_2d_mesh_graph_edgelist(handle, parameters2); + + std::vector> sources; + sources.push_back(std::move(src_graph_1_v)); + sources.push_back(std::move(src_graph_2_v)); + + std::vector> dests; + dests.push_back(std::move(dst_graph_1_v)); + dests.push_back(std::move(dst_graph_2_v)); + + std::tie(src_v, dst_v, std::ignore) = cugraph::combine_edgelists( + handle, std::move(sources), std::move(dests), std::nullopt); + + std::vector actual_src_v; + std::vector actual_dst_v; + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(dst_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, CombineGraphsOffsetsTest) +{ + using vertex_t = int32_t; + using weight_t = float; + + raft::handle_t handle; + + size_t num_vertices{8}; + vertex_t offset{10}; + + std::vector expected_src_v({0, 1, 2, 3, 4, 5, 6, 10, 11, 12, 14, 15, 16, + 18, 19, 20, 22, 23, 24, 26, 27, 28, 30, 31, 32, 10, + 11, 12, 13, 18, 19, 20, 21, 26, 27, 28, 29}); + std::vector expected_dst_v({1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 15, 16, 17, + 19, 20, 21, 23, 24, 25, 27, 28, 29, 31, 32, 33, 14, + 15, 16, 17, 22, 23, 24, 25, 30, 31, 32, 33}); + + rmm::device_uvector src_graph_1_v(0, handle.get_stream()); + rmm::device_uvector dst_graph_1_v(0, handle.get_stream()); + rmm::device_uvector src_graph_2_v(0, handle.get_stream()); + rmm::device_uvector dst_graph_2_v(0, handle.get_stream()); + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::vector> parameters1({{num_vertices, 0}}); + std::vector> parameters2( + {{4, 2, 10}, {4, 2, 18}, {4, 2, 26}}); + + std::tie(src_graph_1_v, dst_graph_1_v) = + cugraph::generate_path_graph_edgelist(handle, parameters1); + std::tie(src_graph_2_v, dst_graph_2_v) = + cugraph::generate_2d_mesh_graph_edgelist(handle, parameters2); + + std::vector> sources; + sources.push_back(std::move(src_graph_1_v)); + sources.push_back(std::move(src_graph_2_v)); + + std::vector> dests; + dests.push_back(std::move(dst_graph_1_v)); + dests.push_back(std::move(dst_graph_2_v)); + + std::tie(src_v, dst_v, std::ignore) = cugraph::combine_edgelists( + handle, std::move(sources), std::move(dests), std::nullopt); + + std::vector actual_src_v; + std::vector actual_dst_v; + + actual_src_v.resize(src_v.size()); + actual_dst_v.resize(dst_v.size()); + + raft::update_host(actual_src_v.data(), src_v.data(), src_v.size(), handle.get_stream()); + raft::update_host(actual_dst_v.data(), dst_v.data(), dst_v.size(), handle.get_stream()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(expected_src_v.begin(), expected_dst_v.begin())) + + expected_src_v.size()); + + thrust::sort( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(actual_src_v.begin(), actual_dst_v.begin())) + + actual_src_v.size()); + + EXPECT_EQ(expected_src_v, actual_src_v); + EXPECT_EQ(expected_dst_v, actual_dst_v); +} + +TEST_F(GeneratorsTest, ScrambleTest) +{ + using vertex_t = int32_t; + using edge_t = int32_t; + + edge_t num_vertices{30}; + edge_t num_edges{100}; + + raft::handle_t handle; + + std::vector input_src_v(num_edges); + std::vector input_dst_v(num_edges); + + std::default_random_engine generator{}; + std::uniform_int_distribution distribution{0, num_vertices - 1}; + + std::generate(input_src_v.begin(), input_src_v.end(), [&distribution, &generator]() { + return distribution(generator); + }); + std::generate(input_dst_v.begin(), input_dst_v.end(), [&distribution, &generator]() { + return distribution(generator); + }); + + rmm::device_uvector d_src_v(input_src_v.size(), handle.get_stream()); + rmm::device_uvector d_dst_v(input_src_v.size(), handle.get_stream()); + std::vector output_src_v(input_src_v.size()); + std::vector output_dst_v(input_src_v.size()); + + raft::update_device(d_src_v.data(), input_src_v.data(), input_src_v.size(), handle.get_stream()); + raft::update_device(d_dst_v.data(), input_dst_v.data(), input_dst_v.size(), handle.get_stream()); + + cugraph::scramble_vertex_ids(handle, d_src_v, d_dst_v, 5, 0); + + raft::update_host(output_src_v.data(), d_src_v.data(), d_src_v.size(), handle.get_stream()); + raft::update_host(output_dst_v.data(), d_dst_v.data(), d_dst_v.size(), handle.get_stream()); + + EXPECT_TRUE(cugraph::test::renumbered_vectors_same(handle, input_src_v, output_src_v)); + EXPECT_TRUE(cugraph::test::renumbered_vectors_same(handle, input_dst_v, output_dst_v)); +} + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/layout/force_atlas2_test.cu b/cpp/tests/layout/force_atlas2_test.cu index c6067407b70..f2f5561a7d8 100644 --- a/cpp/tests/layout/force_atlas2_test.cu +++ b/cpp/tests/layout/force_atlas2_test.cu @@ -17,8 +17,8 @@ #include #include -#include -#include +#include +#include #include #include @@ -111,9 +111,10 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam std::vector> adj_matrix(m, std::vector(m)); std::vector force_atlas2(m * 2); + raft::handle_t const handle; + auto stream = handle.get_stream(); // device alloc - rmm::device_vector force_atlas2_vector(m * 2); - float* d_force_atlas2 = force_atlas2_vector.data().get(); + rmm::device_uvector pos(m * 2, stream); // Read ASSERT_EQ((cugraph::test::mm_to_coo( @@ -131,13 +132,13 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam } // Allocate COO on device - rmm::device_vector srcs_v(nnz); - rmm::device_vector dests_v(nnz); - rmm::device_vector weights_v(nnz); + rmm::device_uvector srcs_v(nnz, stream); + rmm::device_uvector dests_v(nnz, stream); + rmm::device_uvector weights_v(nnz, stream); - int* srcs = srcs_v.data().get(); - int* dests = dests_v.data().get(); - T* weights = weights_v.data().get(); + int* srcs = srcs_v.data(); + int* dests = dests_v.data(); + T* weights = weights_v.data(); // FIXME: RAFT error handling mechanism should be used instead CUDA_TRY(cudaMemcpy(srcs, &cooRowInd[0], sizeof(int) * nnz, cudaMemcpyDefault)); @@ -163,8 +164,9 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam if (PERF) { hr_clock.start(); for (int i = 0; i < PERF_MULTIPLIER; ++i) { - cugraph::force_atlas2(G, - d_force_atlas2, + cugraph::force_atlas2(handle, + G, + pos.data(), max_iter, x_start, y_start, @@ -185,8 +187,9 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam force_atlas2_time.push_back(time_tmp); } else { cudaProfilerStart(); - cugraph::force_atlas2(G, - d_force_atlas2, + cugraph::force_atlas2(handle, + G, + pos.data(), max_iter, x_start, y_start, @@ -207,7 +210,7 @@ class Tests_Force_Atlas2 : public ::testing::TestWithParam // Copy pos to host std::vector h_pos(m * 2); - CUDA_TRY(cudaMemcpy(&h_pos[0], d_force_atlas2, sizeof(float) * m * 2, cudaMemcpyDeviceToHost)); + CUDA_TRY(cudaMemcpy(&h_pos[0], pos.data(), sizeof(float) * m * 2, cudaMemcpyDeviceToHost)); // Transpose the data std::vector> C_contiguous_embedding(m, std::vector(2)); diff --git a/cpp/tests/linear_assignment/hungarian_test.cu b/cpp/tests/linear_assignment/hungarian_test.cu index 656957a85eb..282524ffe0d 100644 --- a/cpp/tests/linear_assignment/hungarian_test.cu +++ b/cpp/tests/linear_assignment/hungarian_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -15,8 +15,8 @@ #include #include -#include -#include +#include +#include #include @@ -261,7 +261,7 @@ void random_test(int32_t num_rows, int32_t num_cols, int32_t upper_bound, int re int32_t *d_data = data_v.data().get(); //int64_t seed{85}; int64_t seed{time(nullptr)}; - + thrust::for_each(rmm::exec_policy(stream)->on(stream), thrust::make_counting_iterator(0), thrust::make_counting_iterator(num_rows * num_cols), @@ -287,8 +287,8 @@ void random_test(int32_t num_rows, int32_t num_cols, int32_t upper_bound, int re std::cout << "cost = " << r << std::endl; hr_timer.display(std::cout); - - for (int i = 0 ; i < num_cols ; ++i) + + for (int i = 0 ; i < num_cols ; ++i) validate[i] = 0; int32_t assignment_out_of_range{0}; @@ -303,8 +303,8 @@ void random_test(int32_t num_rows, int32_t num_cols, int32_t upper_bound, int re EXPECT_EQ(assignment_out_of_range, 0); - int32_t assignment_missed = 0; - + int32_t assignment_missed = 0; + for (int32_t i = 0 ; i < num_cols ; ++i) { if (validate[i] != 1) { ++assignment_missed; diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp index 659a62a727c..6370c7b7758 100644 --- a/cpp/tests/pagerank/mg_pagerank_test.cpp +++ b/cpp/tests/pagerank/mg_pagerank_test.cpp @@ -16,14 +16,16 @@ #include #include +#include +#include #include #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -80,6 +82,7 @@ class Tests_MGPageRank if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); hr_clock.start(); } cugraph::experimental::graph_t mg_graph(handle); @@ -89,6 +92,7 @@ class Tests_MGPageRank if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "MG construct_graph took " << elapsed_time * 1e-6 << " s.\n"; @@ -146,6 +150,7 @@ class Tests_MGPageRank if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); hr_clock.start(); } @@ -163,6 +168,7 @@ class Tests_MGPageRank if (PERF) { CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle.get_comms().barrier(); double elapsed_time{0.0}; hr_clock.stop(&elapsed_time); std::cout << "MG PageRank took " << elapsed_time * 1e-6 << " s.\n"; @@ -171,134 +177,95 @@ class Tests_MGPageRank // 5. copmare SG & MG results if (pagerank_usecase.check_correctness) { - // 5-1. create SG graph - - cugraph::experimental::graph_t sg_graph(handle); - std::tie(sg_graph, std::ignore) = - input_usecase.template construct_graph( - handle, true, false); - - auto sg_graph_view = sg_graph.view(); - - // 5-2. collect personalization vertex/value pairs - - rmm::device_uvector d_sg_personalization_vertices(0, handle.get_stream()); - rmm::device_uvector d_sg_personalization_values(0, handle.get_stream()); - if (pagerank_usecase.personalization_ratio > 0.0) { - rmm::device_uvector d_unrenumbered_personalization_vertices( - d_mg_personalization_vertices.size(), handle.get_stream()); - rmm::device_uvector d_unrenumbered_personalization_values( - d_unrenumbered_personalization_vertices.size(), handle.get_stream()); - raft::copy_async(d_unrenumbered_personalization_vertices.data(), - d_mg_personalization_vertices.data(), - d_mg_personalization_vertices.size(), - handle.get_stream()); - raft::copy_async(d_unrenumbered_personalization_values.data(), - d_mg_personalization_values.data(), - d_mg_personalization_values.size(), - handle.get_stream()); - - std::vector vertex_partition_lasts(comm_size); - for (size_t i = 0; i < vertex_partition_lasts.size(); ++i) { - vertex_partition_lasts[i] = mg_graph_view.get_vertex_partition_last(i); - } - cugraph::experimental::unrenumber_int_vertices( + // 5-1. aggregate MG results + + auto d_mg_aggregate_renumber_map_labels = cugraph::test::device_gatherv( + handle, d_mg_renumber_map_labels.data(), d_mg_renumber_map_labels.size()); + auto d_mg_aggregate_personalization_vertices = cugraph::test::device_gatherv( + handle, d_mg_personalization_vertices.data(), d_mg_personalization_vertices.size()); + auto d_mg_aggregate_personalization_values = cugraph::test::device_gatherv( + handle, d_mg_personalization_values.data(), d_mg_personalization_values.size()); + auto d_mg_aggregate_pageranks = + cugraph::test::device_gatherv(handle, d_mg_pageranks.data(), d_mg_pageranks.size()); + + if (handle.get_comms().get_rank() == int{0}) { + // 5-2. unrenumbr MG results + + cugraph::experimental::unrenumber_int_vertices( handle, - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_vertices.size(), - d_mg_renumber_map_labels.data(), - mg_graph_view.get_local_vertex_first(), - mg_graph_view.get_local_vertex_last(), - vertex_partition_lasts, - handle.get_stream()); - - rmm::device_scalar d_local_personalization_vector_size( - d_unrenumbered_personalization_vertices.size(), handle.get_stream()); - rmm::device_uvector d_recvcounts(comm_size, handle.get_stream()); - comm.allgather( - d_local_personalization_vector_size.data(), d_recvcounts.data(), 1, handle.get_stream()); - std::vector recvcounts(d_recvcounts.size()); - raft::update_host( - recvcounts.data(), d_recvcounts.data(), d_recvcounts.size(), handle.get_stream()); - auto status = comm.sync_stream(handle.get_stream()); - ASSERT_EQ(status, raft::comms::status_t::SUCCESS); - - std::vector displacements(recvcounts.size(), size_t{0}); - std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); - - d_sg_personalization_vertices.resize(displacements.back() + recvcounts.back(), - handle.get_stream()); - d_sg_personalization_values.resize(d_sg_personalization_vertices.size(), - handle.get_stream()); - - comm.allgatherv(d_unrenumbered_personalization_vertices.data(), - d_sg_personalization_vertices.data(), - recvcounts.data(), - displacements.data(), - handle.get_stream()); - comm.allgatherv(d_unrenumbered_personalization_values.data(), - d_sg_personalization_values.data(), - recvcounts.data(), - displacements.data(), - handle.get_stream()); - - cugraph::test::sort_by_key(handle, - d_unrenumbered_personalization_vertices.data(), - d_unrenumbered_personalization_values.data(), - d_unrenumbered_personalization_vertices.size()); - } + d_mg_aggregate_personalization_vertices.data(), + d_mg_aggregate_personalization_vertices.size(), + d_mg_aggregate_renumber_map_labels.data(), + vertex_t{0}, + mg_graph_view.get_number_of_vertices(), + std::vector{mg_graph_view.get_number_of_vertices()}); + std::tie(d_mg_aggregate_personalization_vertices, d_mg_aggregate_personalization_values) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_personalization_vertices.data(), + d_mg_aggregate_personalization_values.data(), + d_mg_aggregate_personalization_vertices.size()); + std::tie(std::ignore, d_mg_aggregate_pageranks) = + cugraph::test::sort_by_key(handle, + d_mg_aggregate_renumber_map_labels.data(), + d_mg_aggregate_pageranks.data(), + d_mg_aggregate_renumber_map_labels.size()); + + // 5-3. create SG graph + + cugraph::experimental::graph_t sg_graph(handle); + std::tie(sg_graph, std::ignore) = + input_usecase.template construct_graph( + handle, true, false); + + auto sg_graph_view = sg_graph.view(); + + ASSERT_EQ(mg_graph_view.get_number_of_vertices(), sg_graph_view.get_number_of_vertices()); + + // 5-4. run SG PageRank + + rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), + handle.get_stream()); + + cugraph::experimental::pagerank( + handle, + sg_graph_view, + static_cast(nullptr), + d_mg_aggregate_personalization_vertices.data(), + d_mg_aggregate_personalization_values.data(), + static_cast(d_mg_aggregate_personalization_vertices.size()), + d_sg_pageranks.data(), + alpha, + epsilon, + std::numeric_limits::max(), // max_iterations + false); + + // 5-4. compare + + std::vector h_mg_aggregate_pageranks(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_mg_aggregate_pageranks.data(), + d_mg_aggregate_pageranks.data(), + d_mg_aggregate_pageranks.size(), + handle.get_stream()); - // 5-3. run SG PageRank - - rmm::device_uvector d_sg_pageranks(sg_graph_view.get_number_of_vertices(), - handle.get_stream()); - - cugraph::experimental::pagerank(handle, - sg_graph_view, - static_cast(nullptr), - d_sg_personalization_vertices.data(), - d_sg_personalization_values.data(), - static_cast(d_sg_personalization_vertices.size()), - d_sg_pageranks.data(), - alpha, - epsilon, - std::numeric_limits::max(), // max_iterations - false); - - // 5-4. compare - - std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); - raft::update_host( - h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); - - std::vector h_mg_pageranks(mg_graph_view.get_number_of_local_vertices()); - raft::update_host( - h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream()); - - std::vector h_mg_renumber_map_labels(d_mg_renumber_map_labels.size()); - raft::update_host(h_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.data(), - d_mg_renumber_map_labels.size(), - handle.get_stream()); - - handle.get_stream_view().synchronize(); - - auto threshold_ratio = 1e-3; - auto threshold_magnitude = - (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * - threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) - auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - return std::abs(lhs - rhs) < - std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); - }; - - for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) { - auto mapped_vertex = h_mg_renumber_map_labels[i]; - ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex])) - << "MG PageRank value for vertex: " << mapped_vertex << " in rank: " << comm_rank - << " has value: " << h_mg_pageranks[i] - << " which exceeds the error margin for comparing to SG value: " - << h_sg_pageranks[mapped_vertex]; + std::vector h_sg_pageranks(sg_graph_view.get_number_of_vertices()); + raft::update_host( + h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream()); + + handle.get_stream_view().synchronize(); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + ASSERT_TRUE(std::equal(h_mg_aggregate_pageranks.begin(), + h_mg_aggregate_pageranks.end(), + h_sg_pageranks.begin(), + nearly_equal)); } } } @@ -333,22 +300,24 @@ INSTANTIATE_TEST_SUITE_P( cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_tests, - Tests_MGPageRank_Rmat, - ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false}, - PageRank_Usecase{0.5, false}, - PageRank_Usecase{0.0, true}, - PageRank_Usecase{0.5, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false, true)))); - -INSTANTIATE_TEST_SUITE_P(rmat_large_tests, - Tests_MGPageRank_Rmat, - ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false, false}, - PageRank_Usecase{0.5, false, false}, - PageRank_Usecase{0.0, true, false}, - PageRank_Usecase{0.5, true, false}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 20, 32, 0.57, 0.19, 0.19, 0, false, false, true)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGPageRank_Rmat, + ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false}, + PageRank_Usecase{0.5, false}, + PageRank_Usecase{0.0, true}, + PageRank_Usecase{0.5, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_large_tests, + Tests_MGPageRank_Rmat, + ::testing::Combine(::testing::Values(PageRank_Usecase{0.0, false, false}, + PageRank_Usecase{0.5, false, false}, + PageRank_Usecase{0.0, true, false}, + PageRank_Usecase{0.5, true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 20, 32, 0.57, 0.19, 0.19, 0, false, false, 0, true)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/random_walks_profiling.cu b/cpp/tests/sampling/random_walks_profiling.cu index 397196c4c78..355d62e8141 100644 --- a/cpp/tests/sampling/random_walks_profiling.cu +++ b/cpp/tests/sampling/random_walks_profiling.cu @@ -18,8 +18,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/sampling/random_walks_test.cu b/cpp/tests/sampling/random_walks_test.cu index 9e4ecd0d024..186c45109e9 100644 --- a/cpp/tests/sampling/random_walks_test.cu +++ b/cpp/tests/sampling/random_walks_test.cu @@ -23,8 +23,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/sampling/random_walks_utils.cuh b/cpp/tests/sampling/random_walks_utils.cuh index 44a6f8d561b..f2db29eb23b 100644 --- a/cpp/tests/sampling/random_walks_utils.cuh +++ b/cpp/tests/sampling/random_walks_utils.cuh @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include #include diff --git a/cpp/tests/sampling/rw_low_level_test.cu b/cpp/tests/sampling/rw_low_level_test.cu index 29fd01fc7e0..77c5b18499a 100644 --- a/cpp/tests/sampling/rw_low_level_test.cu +++ b/cpp/tests/sampling/rw_low_level_test.cu @@ -23,8 +23,8 @@ #include #include -#include -#include +#include +#include #include #include @@ -47,38 +47,6 @@ using vector_test_t = detail::device_vec_t; // for debug purposes namespace { // anonym. -template -graph_t make_graph(raft::handle_t const& handle, - std::vector const& v_src, - std::vector const& v_dst, - std::vector const& v_w, - vertex_t num_vertices, - edge_t num_edges, - bool is_weighted) -{ - vector_test_t d_src(num_edges, handle.get_stream()); - vector_test_t d_dst(num_edges, handle.get_stream()); - vector_test_t d_weights(num_edges, handle.get_stream()); - - raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); - raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); - - weight_t* ptr_d_weights{nullptr}; - if (is_weighted) { - raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); - - ptr_d_weights = d_weights.data(); - } - - edgelist_t edgelist{ - d_src.data(), d_dst.data(), ptr_d_weights, num_edges}; - - graph_t graph( - handle, edgelist, num_vertices, graph_properties_t{false, false, is_weighted}, false); - - return graph; -} - template bool check_col_indices(raft::handle_t const& handle, vector_test_t const& d_crt_out_degs, @@ -126,7 +94,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRWStart) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -206,7 +174,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceExperiments) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -282,7 +250,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphColExtraction) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -378,7 +346,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRndGenColIndx) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -456,7 +424,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphUpdatePathSizes) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -528,7 +496,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphScatterUpdate) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -673,7 +641,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphCoalesceDefragment) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -748,7 +716,7 @@ TEST_F(RandomWalksPrimsTest, SimpleGraphRandomWalk) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -805,7 +773,7 @@ TEST(RandomWalksQuery, GraphRWQueryOffsets) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -866,7 +834,7 @@ TEST(RandomWalksSpecialCase, SingleRandomWalk) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); @@ -923,8 +891,8 @@ TEST(RandomWalksSpecialCase, UnweightedGraph) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w; - auto graph = - make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, false); // un-weighted + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, v_w, num_vertices, num_edges, false); // un-weighted auto graph_view = graph.view(); @@ -981,7 +949,7 @@ TEST(RandomWalksPadded, SimpleGraph) std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; - auto graph = make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); auto graph_view = graph.view(); diff --git a/cpp/tests/serialization/un_serialize_test.cpp b/cpp/tests/serialization/un_serialize_test.cpp new file mode 100644 index 00000000000..9f11a9aaa27 --- /dev/null +++ b/cpp/tests/serialization/un_serialize_test.cpp @@ -0,0 +1,178 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include "cuda_profiler_api.h" + +#include +#include + +#include +#include + +#include + +TEST(SerializationTest, GraphSerUnser) +{ + using namespace cugraph::serializer; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = float; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + + auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); + auto total_ser_sz = pair_sz.first + pair_sz.second; + + serializer_t ser(handle, total_ser_sz); + serializer_t::graph_meta_t graph_meta{}; + ser.serialize(graph, graph_meta); + + pair_sz = serializer_t::get_device_graph_sz_bytes(graph_meta); + auto post_ser_sz = pair_sz.first + pair_sz.second; + + EXPECT_EQ(total_ser_sz, post_ser_sz); + + auto graph_copy = ser.unserialize(pair_sz.first, pair_sz.second); + + auto pair = cugraph::test::compare_graphs(handle, graph, graph_copy); + if (pair.first == false) std::cerr << "Test failed with " << pair.second << ".\n"; + + ASSERT_TRUE(pair.first); +} + +TEST(SerializationTest, GraphDecoupledSerUnser) +{ + using namespace cugraph::serializer; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = double; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{0.1, 1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1}; + + auto graph = cugraph::test::make_graph(handle, v_src, v_dst, v_w, num_vertices, num_edges, true); + + auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); + auto total_ser_sz = pair_sz.first + pair_sz.second; + + // use the following buffer to simulate communication between + // sender and reciever of the serialization: + // + rmm::device_uvector d_storage_comm(0, handle.get_stream()); + + { + serializer_t ser(handle, total_ser_sz); + serializer_t::graph_meta_t graph_meta{}; + ser.serialize(graph, graph_meta); + + pair_sz = serializer_t::get_device_graph_sz_bytes(graph_meta); + auto post_ser_sz = pair_sz.first + pair_sz.second; + + EXPECT_EQ(total_ser_sz, post_ser_sz); + + d_storage_comm.resize(total_ser_sz, handle.get_stream()); + raft::copy(d_storage_comm.data(), ser.get_storage(), total_ser_sz, handle.get_stream()); + } + + { + serializer_t ser(handle, d_storage_comm.data()); + + auto graph_copy = ser.unserialize(pair_sz.first, pair_sz.second); + + auto pair = cugraph::test::compare_graphs(handle, graph, graph_copy); + if (pair.first == false) std::cerr << "Test failed with " << pair.second << ".\n"; + + ASSERT_TRUE(pair.first); + } +} + +TEST(SerializationTest, UnweightedGraphDecoupledSerUnser) +{ + using namespace cugraph::serializer; + + using vertex_t = int32_t; + using edge_t = vertex_t; + using weight_t = double; + using index_t = vertex_t; + + raft::handle_t handle{}; + + edge_t num_edges = 8; + vertex_t num_vertices = 6; + + std::vector v_src{0, 1, 1, 2, 2, 2, 3, 4}; + std::vector v_dst{1, 3, 4, 0, 1, 3, 5, 5}; + std::vector v_w{}; + + auto graph = cugraph::test::make_graph( + handle, v_src, v_dst, v_w, num_vertices, num_edges, /*weighted=*/false); + + ASSERT_TRUE(graph.view().weights() == nullptr); + + auto pair_sz = serializer_t::get_device_graph_sz_bytes(graph); + auto total_ser_sz = pair_sz.first + pair_sz.second; + + // use the following buffer to simulate communication between + // sender and reciever of the serialization: + // + rmm::device_uvector d_storage_comm(0, handle.get_stream()); + + { + serializer_t ser(handle, total_ser_sz); + serializer_t::graph_meta_t graph_meta{}; + ser.serialize(graph, graph_meta); + + pair_sz = serializer_t::get_device_graph_sz_bytes(graph_meta); + auto post_ser_sz = pair_sz.first + pair_sz.second; + + EXPECT_EQ(total_ser_sz, post_ser_sz); + + d_storage_comm.resize(total_ser_sz, handle.get_stream()); + raft::copy(d_storage_comm.data(), ser.get_storage(), total_ser_sz, handle.get_stream()); + } + + { + serializer_t ser(handle, d_storage_comm.data()); + + auto graph_copy = ser.unserialize(pair_sz.first, pair_sz.second); + + ASSERT_TRUE(graph_copy.view().weights() == nullptr); + + auto pair = cugraph::test::compare_graphs(handle, graph, graph_copy); + if (pair.first == false) std::cerr << "Test failed with " << pair.second << ".\n"; + + ASSERT_TRUE(pair.first); + } +} diff --git a/cpp/tests/traversal/bfs_test.cu b/cpp/tests/traversal/bfs_test.cu index 9027d73b83e..8cbfe0081d6 100644 --- a/cpp/tests/traversal/bfs_test.cu +++ b/cpp/tests/traversal/bfs_test.cu @@ -19,7 +19,7 @@ #include #include -#include +#include #include diff --git a/cpp/tests/traversal/sssp_test.cu b/cpp/tests/traversal/sssp_test.cu index e151ab64e68..1903f9ad302 100644 --- a/cpp/tests/traversal/sssp_test.cu +++ b/cpp/tests/traversal/sssp_test.cu @@ -13,9 +13,9 @@ #include #include -#include #include -#include +#include +#include #include diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu index 47a72757bd8..806d9dea51a 100644 --- a/cpp/tests/traversal/tsp_test.cu +++ b/cpp/tests/traversal/tsp_test.cu @@ -30,8 +30,8 @@ #include #include -#include -#include +#include +#include #include diff --git a/cpp/tests/tree/mst_test.cu b/cpp/tests/tree/mst_test.cu index e3d7b70d51e..aee88d981c1 100644 --- a/cpp/tests/tree/mst_test.cu +++ b/cpp/tests/tree/mst_test.cu @@ -21,8 +21,8 @@ #include #include -#include -#include +#include +#include #include #include diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 770fbc99397..3121d3074d4 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -16,8 +16,8 @@ #pragma once +#include #include -#include #include @@ -160,11 +160,6 @@ inline auto parse_test_options(int argc, char **argv) auto const cmd_opts = parse_test_options(argc, argv); \ auto const rmm_mode = cmd_opts["rmm_mode"].as(); \ auto resource = cugraph::test::create_memory_resource(rmm_mode); \ - \ - if (comm_rank != 0) { \ - auto &listeners = ::testing::UnitTest::GetInstance()->listeners(); \ - delete listeners.Release(listeners.default_result_printer()); \ - } \ rmm::mr::set_current_device_resource(resource.get()); \ auto ret = RUN_ALL_TESTS(); \ MPI_TRY(MPI_Finalize()); \ diff --git a/cpp/tests/utilities/device_comm_wrapper.cu b/cpp/tests/utilities/device_comm_wrapper.cu new file mode 100644 index 00000000000..2d66e05c59f --- /dev/null +++ b/cpp/tests/utilities/device_comm_wrapper.cu @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "device_comm_wrapper.hpp" + +#include +#include + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector device_gatherv(raft::handle_t const &handle, T const *d_input, size_t size) +{ + bool is_root = handle.get_comms().get_rank() == int{0}; + auto rx_sizes = cugraph::experimental::host_scalar_gather( + handle.get_comms(), size, int{0}, handle.get_stream()); + std::vector rx_displs(is_root ? static_cast(handle.get_comms().get_size()) + : size_t{0}); + if (is_root) { std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); } + + rmm::device_uvector gathered_v( + is_root ? std::reduce(rx_sizes.begin(), rx_sizes.end()) : size_t{0}, handle.get_stream()); + + cugraph::experimental::device_gatherv(handle.get_comms(), + d_input, + gathered_v.data(), + size, + rx_sizes, + rx_displs, + int{0}, + handle.get_stream()); + + return gathered_v; +} + +// explicit instantiation + +template rmm::device_uvector device_gatherv(raft::handle_t const &handle, + int32_t const *d_input, + size_t size); + +template rmm::device_uvector device_gatherv(raft::handle_t const &handle, + int64_t const *d_input, + size_t size); + +template rmm::device_uvector device_gatherv(raft::handle_t const &handle, + float const *d_input, + size_t size); + +template rmm::device_uvector device_gatherv(raft::handle_t const &handle, + double const *d_input, + size_t size); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/device_comm_wrapper.hpp b/cpp/tests/utilities/device_comm_wrapper.hpp new file mode 100644 index 00000000000..f56f24248d6 --- /dev/null +++ b/cpp/tests/utilities/device_comm_wrapper.hpp @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace test { + +template +rmm::device_uvector device_gatherv(raft::handle_t const &handle, T const *d_input, size_t size); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu index bf7539864be..0457cbcc918 100644 --- a/cpp/tests/utilities/matrix_market_file_utilities.cu +++ b/cpp/tests/utilities/matrix_market_file_utilities.cu @@ -16,10 +16,11 @@ #include -#include -#include -#include -#include +#include +#include +#include +#include +#include #include #include @@ -273,7 +274,7 @@ read_edgelist_from_matrix_market_file(raft::handle_t const& handle, size_t nnz{}; FILE* file = fopen(graph_file_full_path.c_str(), "r"); - CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); + CUGRAPH_EXPECTS(file != nullptr, "fopen (%s) failure.", graph_file_full_path.c_str()); size_t tmp_m{}; size_t tmp_k{}; @@ -409,15 +410,16 @@ read_graph_from_matrix_market_file(raft::handle_t const& handle, } handle.get_stream_view().synchronize(); - return generate_graph_from_edgelist( - handle, - std::move(d_vertices), - std::move(d_edgelist_rows), - std::move(d_edgelist_cols), - std::move(d_edgelist_weights), - is_symmetric, - test_weighted, - renumber); + return cugraph::experimental:: + create_graph_from_edgelist( + handle, + std::optional>{ + std::make_tuple(d_vertices.data(), static_cast(d_vertices.size()))}, + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, + renumber); } // explicit instantiations diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu index 3f0bb0b4a1f..fda72fc9054 100644 --- a/cpp/tests/utilities/rmat_utilities.cu +++ b/cpp/tests/utilities/rmat_utilities.cu @@ -16,11 +16,12 @@ #include -#include -#include -#include -#include -#include +#include +#include +#include +#include +#include +#include #include #include @@ -93,15 +94,14 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, rmm::device_uvector d_tmp_rows(0, handle.get_stream()); rmm::device_uvector d_tmp_cols(0, handle.get_stream()); std::tie(i == 0 ? d_edgelist_rows : d_tmp_rows, i == 0 ? d_edgelist_cols : d_tmp_cols) = - cugraph::experimental::generate_rmat_edgelist(handle, - scale, - partition_edge_counts[i], - a, - b, - c, - base_seed + id, - undirected ? true : false, - scramble_vertex_ids); + cugraph::generate_rmat_edgelist(handle, + scale, + partition_edge_counts[i], + a, + b, + c, + base_seed + id, + undirected ? true : false); rmm::device_uvector d_tmp_weights(0, handle.get_stream()); if (test_weighted) { @@ -142,8 +142,29 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, } if (undirected) { - // FIXME: need to symmetrize - CUGRAPH_FAIL("unimplemented."); +// FIXME: may need to undo this and handle symmetrization elsewhere once the new test graph +// generation API gets integrated +#if 1 + auto offset = d_edgelist_rows.size(); + d_edgelist_rows.resize(offset * 2, handle.get_stream()); + d_edgelist_cols.resize(d_edgelist_rows.size(), handle.get_stream()); + d_edgelist_weights.resize(test_weighted ? d_edgelist_rows.size() : size_t{0}, + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_edgelist_cols.begin(), + d_edgelist_cols.begin() + offset, + d_edgelist_rows.begin() + offset); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_edgelist_rows.begin(), + d_edgelist_rows.begin() + offset, + d_edgelist_cols.begin() + offset); + if (test_weighted) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_edgelist_weights.begin(), + d_edgelist_weights.begin() + offset, + d_edgelist_weights.begin() + offset); + } +#endif } if (multi_gpu) { @@ -231,15 +252,16 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, d_vertices = std::move(d_rx_vertices); } - return generate_graph_from_edgelist( - handle, - std::move(d_vertices), - std::move(d_edgelist_rows), - std::move(d_edgelist_cols), - std::move(d_edgelist_weights), - false, - test_weighted, - renumber); + return cugraph::experimental:: + create_graph_from_edgelist( + handle, + std::optional>{ + std::make_tuple(d_vertices.data(), static_cast(d_vertices.size()))}, + std::move(d_edgelist_rows), + std::move(d_edgelist_cols), + std::move(d_edgelist_weights), + cugraph::experimental::graph_properties_t{undirected, true, test_weighted}, + renumber); } // explicit instantiations diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp new file mode 100644 index 00000000000..b8ee8f024b0 --- /dev/null +++ b/cpp/tests/utilities/test_graphs.hpp @@ -0,0 +1,558 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +namespace cugraph { +namespace test { + +namespace detail { + +class TranslateGraph_Usecase { + public: + TranslateGraph_Usecase() = delete; + TranslateGraph_Usecase(size_t base_vertex_id = 0) : base_vertex_id_(base_vertex_id) {} + + template + void translate(raft::handle_t const& handle, + rmm::device_uvector& d_src, + rmm::device_uvector& d_dst) const + { + if (base_vertex_id_ > 0) + cugraph::test::translate_vertex_ids( + handle, d_src, d_dst, static_cast(base_vertex_id_)); + } + + size_t base_vertex_id_{}; +}; + +} // namespace detail + +class File_Usecase : public detail::TranslateGraph_Usecase { + public: + File_Usecase() = delete; + + File_Usecase(std::string const& graph_file_path, size_t base_vertex_id = 0) + : detail::TranslateGraph_Usecase(base_vertex_id) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path_ = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path_ = graph_file_path; + } + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + rmm::device_uvector d_src_v(0, handle.get_stream()); + rmm::device_uvector d_dst_v(0, handle.get_stream()); + rmm::device_uvector d_weights_v(0, handle.get_stream()); + vertex_t num_vertices; + bool is_symmetric; + + std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + read_edgelist_from_matrix_market_file( + handle, graph_file_full_path_, test_weighted); + + translate(handle, d_src_v, d_dst_v); + +#if 0 + if (multi_gpu) { + std::tie(d_src_v, d_dst_v) = filter_edgelist_by_gpu(handle, d_src_v, d_dst_v); + } +#endif + + return std::make_tuple( + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + static_cast(detail::TranslateGraph_Usecase::base_vertex_id_) + num_vertices, + is_symmetric); + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + rmm::device_uvector d_src_v(0, handle.get_stream()); + rmm::device_uvector d_dst_v(0, handle.get_stream()); + rmm::device_uvector d_weights_v(0, handle.get_stream()); + vertex_t num_vertices; + bool is_symmetric; + + std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + this->template construct_edgelist( + handle, test_weighted); + + // TODO: Consider calling construct_edgelist and creating + // a generic test function to take the edgelist and + // do the graph construction. + // + // Would be more reusable across tests + // + return read_graph_from_matrix_market_file( + handle, graph_file_full_path_, test_weighted, renumber); + } + + private: + std::string graph_file_full_path_{}; +}; + +class Rmat_Usecase : public detail::TranslateGraph_Usecase { + public: + Rmat_Usecase() = delete; + + Rmat_Usecase(size_t scale, + size_t edge_factor, + double a, + double b, + double c, + uint64_t seed, + bool undirected, + bool scramble_vertex_ids, + size_t base_vertex_id = 0, + bool multi_gpu_usecase = false) + : detail::TranslateGraph_Usecase(base_vertex_id), + scale_(scale), + edge_factor_(edge_factor), + a_(a), + b_(b), + c_(c), + seed_(seed), + undirected_(undirected), + scramble_vertex_ids_(scramble_vertex_ids), + multi_gpu_usecase_(multi_gpu_usecase) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + // TODO: Tease through generate_graph_from_rmat_params + // to extract the edgelist part + // Call cugraph::translate_vertex_ids(handle, d_src_v, d_dst_v, base_vertex_id_); + + CUGRAPH_FAIL("Not implemented"); + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + std::vector partition_ids(1); + size_t comm_size; + + if (multi_gpu_usecase_) { + auto& comm = handle.get_comms(); + comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + + partition_ids.resize(multi_gpu ? size_t{1} : static_cast(comm_size)); + + std::iota(partition_ids.begin(), + partition_ids.end(), + multi_gpu ? static_cast(comm_rank) : size_t{0}); + } else { + comm_size = 1; + partition_ids[0] = size_t{0}; + } + + // TODO: Need to offset by base_vertex_id_ + // static_cast(base_vertex_id_)); + // Consider using construct_edgelist like other options + return generate_graph_from_rmat_params( + handle, + scale_, + edge_factor_, + a_, + b_, + c_, + seed_, + undirected_, + scramble_vertex_ids_, + test_weighted, + renumber, + partition_ids, + comm_size); + } + + private: + size_t scale_{}; + size_t edge_factor_{}; + double a_{}; + double b_{}; + double c_{}; + uint64_t seed_{}; + bool undirected_{}; + bool scramble_vertex_ids_{}; + bool multi_gpu_usecase_{}; +}; + +class PathGraph_Usecase { + public: + PathGraph_Usecase() = delete; + + PathGraph_Usecase(std::vector> parms, + bool weighted = false, + bool scramble = false) + : parms_(parms), weighted_(weighted) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + rmm::device_uvector weights_v(0, handle.get_stream()); + + constexpr bool symmetric{true}; + + std::vector> converted_parms(parms_.size()); + + std::transform(parms_.begin(), parms_.end(), converted_parms.begin(), [](auto p) { + return std::make_tuple(static_cast(std::get<0>(p)), + static_cast(std::get<1>(p))); + }); + + rmm::device_uvector src_v(0, handle.get_stream()); + rmm::device_uvector dst_v(0, handle.get_stream()); + + std::tie(src_v, dst_v) = + cugraph::generate_path_graph_edgelist(handle, converted_parms); + std::tie(src_v, dst_v, std::ignore) = cugraph::symmetrize_edgelist( + handle, std::move(src_v), std::move(dst_v), std::nullopt); + + if (test_weighted) { + auto length = src_v.size(); + weights_v.resize(length, handle.get_stream()); + } + + return std::make_tuple( + std::move(src_v), std::move(dst_v), std::move(weights_v), num_vertices_, symmetric); + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + CUGRAPH_FAIL("not implemented"); + } + + private: + std::vector> parms_{}; + size_t num_vertices_{0}; + bool weighted_{false}; +}; + +class Mesh2DGraph_Usecase { + public: + Mesh2DGraph_Usecase() = delete; + + Mesh2DGraph_Usecase(std::vector> const& parms, bool weighted) + : parms_(parms), weighted_(weighted) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; + + private: + std::vector> parms_{}; + bool weighted_{false}; +}; + +class Mesh3DGraph_Usecase { + public: + Mesh3DGraph_Usecase() = delete; + + Mesh3DGraph_Usecase(std::vector> const& parms, + bool weighted) + : parms_(parms), weighted_(weighted) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const; + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; + + private: + std::vector> parms_{}; + bool weighted_{false}; +}; + +class CompleteGraph_Usecase { + public: + CompleteGraph_Usecase() = delete; + + CompleteGraph_Usecase(std::vector> const& parms, bool weighted) + : parms_(parms), weighted_(weighted) + { + } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const; + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const; + + private: + std::vector> parms_{}; + bool weighted_{false}; +}; + +namespace detail { + +template +struct combined_construct_graph_tuple_impl { + template + std::vector, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool>> + construct_edges(raft::handle_t const& handle, + bool test_weighted, + generator_tuple_t const& generator_tuple) const + { + return combined_construct_graph_tuple_impl() + .construct_edges(generator_tuple) + .push_back(std::get(generator_tuple).construct_edges(handle, test_weighted)); + } +}; + +template +struct combined_construct_graph_tuple_impl { + template + std::vector, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool>> + construct_edges(raft::handle_t const& handle, + bool test_weighted, + generator_tuple_t const& generator_tuple) const + { + return std::vector, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool>>(); + } +}; + +} // namespace detail + +template +class CombinedGenerator_Usecase { + CombinedGenerator_Usecase() = delete; + + CombinedGenerator_Usecase(generator_tuple_t const& tuple) : generator_tuple_(tuple) {} + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector, + vertex_t, + bool> + construct_edgelist(raft::handle_t const& handle, bool test_weighted) const + { + size_t constexpr tuple_size{std::tuple_size::value}; + + auto edge_tuple_vector = + detail::combined_construct_graph_tuple_impl() + .construct_edges(handle, test_weighted, generator_tuple_); + + // Need to combine + CUGRAPH_FAIL("not implemented"); + } + + template + std::tuple< + cugraph::experimental::graph_t, + rmm::device_uvector> + construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const + { + // Call construct_edgelist to get tuple of edge lists + // return generate_graph_from_edgelist<...>(...) + CUGRAPH_FAIL("not implemented"); + } + + private: + generator_tuple_t const& generator_tuple_; +}; + +template +std::tuple, + rmm::device_uvector> +construct_graph(raft::handle_t const& handle, + input_usecase_t const& input_usecase, + bool test_weighted, + bool renumber = true) +{ + rmm::device_uvector d_src_v(0, handle.get_stream()); + rmm::device_uvector d_dst_v(0, handle.get_stream()); + rmm::device_uvector d_weights_v(0, handle.get_stream()); + vertex_t num_vertices{0}; + bool is_symmetric{false}; + + std::tie(d_src_v, d_dst_v, d_weights_v, num_vertices, is_symmetric) = + input_usecase + .template construct_edgelist( + handle, test_weighted); + + return cugraph::experimental:: + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + cugraph::experimental::graph_properties_t{is_symmetric, false, test_weighted}, + renumber); +} + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 196128e37c0..09da0556e44 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -15,14 +15,17 @@ */ #pragma once -#include -#include +#include +#include +#include +#include #include #include #include #include +#include #include extern "C" { @@ -107,22 +110,6 @@ static const std::string& get_rapids_dataset_root_dir() return rdrd; } -template -std::tuple, - rmm::device_uvector> -generate_graph_from_edgelist(raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& edgelist_rows, - rmm::device_uvector&& edgelist_cols, - rmm::device_uvector&& edgelist_weights, - bool is_symmetric, - bool test_weighted, - bool renumber); - // returns a tuple of (rows, columns, weights, number_of_vertices, is_symmetric) template std::tuple, @@ -168,121 +155,171 @@ generate_graph_from_rmat_params(raft::handle_t const& handle, std::vector const& partition_ids, size_t num_partitions); -class File_Usecase { - public: - File_Usecase() = delete; +// alias for easy customization for debug purposes: +// +template +using vector_test_t = rmm::device_uvector; - File_Usecase(std::string const& graph_file_path) - { - if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { - graph_file_full_path_ = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; - } else { - graph_file_full_path_ = graph_file_path; - } - } +template +decltype(auto) make_graph(raft::handle_t const& handle, + std::vector const& v_src, + std::vector const& v_dst, + std::vector const& v_w, + vertex_t num_vertices, + edge_t num_edges, + bool is_weighted) +{ + using namespace cugraph::experimental; - template - std::tuple< - cugraph::experimental::graph_t, - rmm::device_uvector> - construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const - { - return read_graph_from_matrix_market_file( - handle, graph_file_full_path_, test_weighted, renumber); - } + vector_test_t d_src(num_edges, handle.get_stream()); + vector_test_t d_dst(num_edges, handle.get_stream()); + vector_test_t d_weights(num_edges, handle.get_stream()); + + raft::update_device(d_src.data(), v_src.data(), d_src.size(), handle.get_stream()); + raft::update_device(d_dst.data(), v_dst.data(), d_dst.size(), handle.get_stream()); - private: - std::string graph_file_full_path_{}; -}; - -class Rmat_Usecase { - public: - Rmat_Usecase() = delete; - - Rmat_Usecase(size_t scale, - size_t edge_factor, - double a, - double b, - double c, - uint64_t seed, - bool undirected, - bool scramble_vertex_ids, - bool multi_gpu_usecase = false) - : scale_(scale), - edge_factor_(edge_factor), - a_(a), - b_(b), - c_(c), - seed_(seed), - undirected_(undirected), - scramble_vertex_ids_(scramble_vertex_ids), - multi_gpu_usecase_(multi_gpu_usecase) - { + weight_t* ptr_d_weights{nullptr}; + if (is_weighted) { + raft::update_device(d_weights.data(), v_w.data(), d_weights.size(), handle.get_stream()); + + ptr_d_weights = d_weights.data(); } - template - std::tuple< - cugraph::experimental::graph_t, - rmm::device_uvector> - construct_graph(raft::handle_t const& handle, bool test_weighted, bool renumber = true) const - { - std::vector partition_ids(1); - size_t comm_size; - - if (multi_gpu_usecase_) { - auto& comm = handle.get_comms(); - comm_size = comm.get_size(); - auto const comm_rank = comm.get_rank(); - - partition_ids.resize(multi_gpu ? size_t{1} : static_cast(comm_size)); - - std::iota(partition_ids.begin(), - partition_ids.end(), - multi_gpu ? static_cast(comm_rank) : size_t{0}); - } else { - comm_size = 1; - partition_ids[0] = size_t{0}; + edgelist_t edgelist{ + d_src.data(), d_dst.data(), ptr_d_weights, num_edges}; + + graph_t graph( + handle, edgelist, num_vertices, graph_properties_t{false, false, is_weighted}, false); + + return graph; +} + +// compares single GPU CSR graph data: +// (for testing / debugging); +// on first == false, second == brief description of what is different; +// +template +std::pair compare_graphs(raft::handle_t const& handle, + left_graph_t const& lgraph, + right_graph_t const& rgraph) +{ + if constexpr (left_graph_t::is_multi_gpu && right_graph_t::is_multi_gpu) { + // no support for comparing distributed graphs, yet: + // + CUGRAPH_FAIL("Unsupported graph type for comparison."); + return std::make_pair(false, std::string("unsupported")); + } else if constexpr (!std::is_same_v) { + return std::make_pair(false, std::string("type")); + } else { + // both graphs are single GPU: + // + using graph_t = left_graph_t; + + using vertex_t = typename graph_t::vertex_type; + using edge_t = typename graph_t::edge_type; + using weight_t = typename graph_t::weight_type; + + size_t num_vertices = lgraph.get_number_of_vertices(); + size_t num_edges = lgraph.get_number_of_edges(); + + { + size_t r_num_vertices = rgraph.get_number_of_vertices(); + size_t r_num_edges = rgraph.get_number_of_edges(); + + if (num_vertices != r_num_vertices) return std::make_pair(false, std::string("num_vertices")); + + if (num_edges != r_num_edges) return std::make_pair(false, std::string("num_edges")); + } + + if (lgraph.is_symmetric() != rgraph.is_symmetric()) + return std::make_pair(false, std::string("symmetric")); + + if (lgraph.is_multigraph() != rgraph.is_multigraph()) + return std::make_pair(false, std::string("multigraph")); + + bool is_weighted = lgraph.is_weighted(); + if (is_weighted != rgraph.is_weighted()) return std::make_pair(false, std::string("weighted")); + + auto lgraph_view = lgraph.view(); + auto rgraph_view = rgraph.view(); + + std::vector lv_ro(num_vertices + 1); + std::vector lv_ci(num_edges); + + raft::update_host(lv_ro.data(), lgraph_view.offsets(), num_vertices + 1, handle.get_stream()); + raft::update_host(lv_ci.data(), lgraph_view.indices(), num_edges, handle.get_stream()); + + std::vector rv_ro(num_vertices + 1); + std::vector rv_ci(num_edges); + + raft::update_host(rv_ro.data(), rgraph_view.offsets(), num_vertices + 1, handle.get_stream()); + raft::update_host(rv_ci.data(), rgraph_view.indices(), num_edges, handle.get_stream()); + + if (lv_ro != rv_ro) return std::make_pair(false, std::string("offsets")); + + if (lv_ci != rv_ci) return std::make_pair(false, std::string("indices")); + + if (is_weighted) { + std::vector lv_vs(num_edges); + raft::update_host(lv_vs.data(), lgraph_view.weights(), num_edges, handle.get_stream()); + + std::vector rv_vs(num_edges); + raft::update_host(rv_vs.data(), rgraph_view.weights(), num_edges, handle.get_stream()); + + if (lv_vs != rv_vs) return std::make_pair(false, std::string("values")); } - return generate_graph_from_rmat_params( - handle, - scale_, - edge_factor_, - a_, - b_, - c_, - seed_, - undirected_, - scramble_vertex_ids_, - test_weighted, - renumber, - partition_ids, - comm_size); + if (lgraph_view.get_local_adj_matrix_partition_segment_offsets(0) != + rgraph_view.get_local_adj_matrix_partition_segment_offsets(0)) + return std::make_pair(false, std::string("segment offsets")); + + return std::make_pair(true, std::string{}); } +} + +template +bool renumbered_vectors_same(raft::handle_t const& handle, + std::vector const& v1, + std::vector const& v2) +{ + if (v1.size() != v2.size()) return false; + + std::map map; + + auto iter = thrust::make_zip_iterator(thrust::make_tuple(v1.begin(), v2.begin())); + + std::for_each(iter, iter + v1.size(), [&map](auto pair) { + vertex_t e1 = thrust::get<0>(pair); + vertex_t e2 = thrust::get<1>(pair); + + map[e1] = e2; + }); + + auto error_count = std::count_if(iter, iter + v1.size(), [&map](auto pair) { + vertex_t e1 = thrust::get<0>(pair); + vertex_t e2 = thrust::get<1>(pair); + + return (map[e1] != e2); + }); - private: - size_t scale_{}; - size_t edge_factor_{}; - double a_{}; - double b_{}; - double c_{}; - uint64_t seed_{}; - bool undirected_{}; - bool scramble_vertex_ids_{}; - bool multi_gpu_usecase_{}; -}; + return (error_count == 0); +} + +template +bool renumbered_vectors_same(raft::handle_t const& handle, + rmm::device_uvector const& v1, + rmm::device_uvector const& v2) +{ + if (v1.size() != v2.size()) return false; + + std::vector h_v1(v1.size()); + std::vector h_v2(v1.size()); + + raft::update_host(h_v1.data(), v1.data(), v1.size(), handle.get_stream()); + raft::update_host(h_v2.data(), v2.data(), v2.size(), handle.get_stream()); + + return renumbered_vectors_same(handle, h_v1, h_v2); +} } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu index 5d32fb8a5d1..ae36582d18d 100644 --- a/cpp/tests/utilities/thrust_wrapper.cu +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -26,10 +26,8 @@ namespace cugraph { namespace test { template -rmm::device_uvector sort_by_key(raft::handle_t const& handle, - vertex_t const* keys, - value_t const* values, - size_t num_pairs) +std::tuple, rmm::device_uvector> sort_by_key( + raft::handle_t const& handle, vertex_t const* keys, value_t const* values, size_t num_pairs) { rmm::device_uvector sorted_keys(num_pairs, handle.get_stream_view()); rmm::device_uvector sorted_values(num_pairs, handle.get_stream_view()); @@ -44,38 +42,92 @@ rmm::device_uvector sort_by_key(raft::handle_t const& handle, sorted_keys.end(), sorted_values.begin()); - return sorted_values; + return std::make_tuple(std::move(sorted_keys), std::move(sorted_values)); } -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int32_t const* keys, - float const* values, - size_t num_pairs); - -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int32_t const* keys, - double const* values, - size_t num_pairs); - -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int32_t const* keys, - int32_t const* values, - size_t num_pairs); - -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int64_t const* keys, - float const* values, - size_t num_pairs); - -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int64_t const* keys, - double const* values, - size_t num_pairs); - -template rmm::device_uvector sort_by_key(raft::handle_t const& handle, - int64_t const* keys, - int64_t const* values, - size_t num_pairs); +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + float const* values, + size_t num_pairs); + +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + double const* values, + size_t num_pairs); + +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int32_t const* keys, + int32_t const* values, + size_t num_pairs); + +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + float const* values, + size_t num_pairs); + +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + double const* values, + size_t num_pairs); + +template std::tuple, rmm::device_uvector> +sort_by_key(raft::handle_t const& handle, + int64_t const* keys, + int64_t const* values, + size_t num_pairs); + +template +void translate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, + vertex_t vertex_id_offset) +{ + thrust::transform(rmm::exec_policy(handle.get_stream()), + d_src_v.begin(), + d_src_v.end(), + d_src_v.begin(), + [offset = vertex_id_offset] __device__(vertex_t v) { return offset + v; }); + + thrust::transform(rmm::exec_policy(handle.get_stream()), + d_dst_v.begin(), + d_dst_v.end(), + d_dst_v.begin(), + [offset = vertex_id_offset] __device__(vertex_t v) { return offset + v; }); +} + +template +void populate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_vertices_v, + vertex_t vertex_id_offset) +{ + thrust::sequence(rmm::exec_policy(handle.get_stream()), + d_vertices_v.begin(), + d_vertices_v.end(), + vertex_id_offset); +} + +template void translate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, + int32_t vertex_id_offset); + +template void translate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, + int64_t vertex_id_offset); + +template void populate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_vertices_v, + int32_t vertex_id_offset); + +template void populate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_vertices_v, + int64_t vertex_id_offset); } // namespace test } // namespace cugraph diff --git a/cpp/tests/utilities/thrust_wrapper.hpp b/cpp/tests/utilities/thrust_wrapper.hpp index 579dc3c550f..45208a6b921 100644 --- a/cpp/tests/utilities/thrust_wrapper.hpp +++ b/cpp/tests/utilities/thrust_wrapper.hpp @@ -17,14 +17,25 @@ #include #include +#include + namespace cugraph { namespace test { template -rmm::device_uvector sort_by_key(raft::handle_t const& handle, - vertex_t const* keys, - value_t const* values, - size_t num_pairs); +std::tuple, rmm::device_uvector> sort_by_key( + raft::handle_t const& handle, vertex_t const* keys, value_t const* values, size_t num_pairs); + +template +void translate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_src_v, + rmm::device_uvector& d_dst_v, + vertex_t vertex_id_offset); + +template +void populate_vertex_ids(raft::handle_t const& handle, + rmm::device_uvector& d_vertices_v, + vertex_t vertex_id_offset); } // namespace test } // namespace cugraph diff --git a/docs/cugraph/source/api.rst b/docs/cugraph/source/api.rst index e2c2c19cf02..adcf33d1191 100644 --- a/docs/cugraph/source/api.rst +++ b/docs/cugraph/source/api.rst @@ -10,7 +10,7 @@ Structure Graph ----- -.. autoclass:: cugraph.structure.graph.Graph +.. autoclass:: cugraph.structure.graph_classes.Graph :members: :undoc-members: @@ -143,7 +143,7 @@ Core Number .. automodule:: cugraph.cores.core_number :members: :undoc-members: - + K-Core ------ @@ -196,7 +196,7 @@ Pagerank (MG) .. automodule:: cugraph.dask.link_analysis.pagerank :members: pagerank - :undoc-members: + :undoc-members: Link Prediction diff --git a/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb b/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb index be50c075455..65cf9fb59eb 100644 --- a/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/random_walk_benchmark.ipynb @@ -124,7 +124,8 @@ "source": [ "def run_rw(_G, _seeds, _depth):\n", " t1 = time.time()\n", - " _, _ = cugraph.random_walks(_G, _seeds, _depth)\n", + " # cugraph.random_walks() returns a 3-tuple, which is being ignored here.\n", + " cugraph.random_walks(_G, _seeds, _depth)\n", " t2 = time.time() - t1\n", " return t2" ] @@ -450,7 +451,7 @@ "metadata": {}, "outputs": [], "source": [ - "# sequenctial = so also get a single random seed\n", + "# sequential = so also get a single random seed\n", "for i in range (max_seeds) :\n", " for j in range(i):\n", " seeds = random.sample(nodes, 1)\n", diff --git a/notebooks/demo/batch_betweenness.ipynb b/notebooks/demo/batch_betweenness.ipynb index e2ad83ff1c4..885d26c9523 100644 --- a/notebooks/demo/batch_betweenness.ipynb +++ b/notebooks/demo/batch_betweenness.ipynb @@ -138,7 +138,7 @@ "outputs": [], "source": [ "t_start_read_sg = time.perf_counter()\n", - "e_list = cudf.read_csv(input_data_path, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'])\n", + "e_list = cudf.read_csv(input_data_path, delimiter='\\t', names=['src', 'dst'], dtype=['int32', 'int32'], comment='#')\n", "t_stop_read_sg = time.perf_counter()" ] }, diff --git a/notebooks/link_prediction/Jaccard-Similarity.ipynb b/notebooks/link_prediction/Jaccard-Similarity.ipynb index 21835da1cce..7003bdbc98e 100755 --- a/notebooks/link_prediction/Jaccard-Similarity.ipynb +++ b/notebooks/link_prediction/Jaccard-Similarity.ipynb @@ -451,8 +451,9 @@ "metadata": {}, "outputs": [], "source": [ + "pr_df.rename(columns={'pagerank': 'weight'}, inplace=True)", "# Call weighted Jaccard using the Pagerank scores as weights:\n", - "wdf = cugraph.jaccard_w(G, pr_df['pagerank'])" + "wdf = cugraph.jaccard_w(G, pr_df)" ] }, { diff --git a/notebooks/sampling/RandomWalk.ipynb b/notebooks/sampling/RandomWalk.ipynb index 31a521db1c1..84f8e1db07f 100644 --- a/notebooks/sampling/RandomWalk.ipynb +++ b/notebooks/sampling/RandomWalk.ipynb @@ -34,7 +34,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -58,7 +58,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -67,7 +67,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -78,20 +78,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(34, 78)" - ] - }, - "execution_count": 5, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "# some stats on the graph\n", "(G.number_of_nodes(), G.number_of_edges() )" @@ -99,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -109,11 +98,21 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "rw, so = cugraph.random_walks(G, seeds, 4)" + "# random walk path length\n", + "path_length = 4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "rw, so, sz = cugraph.random_walks(G, seeds, path_length, use_padding=True)" ] }, { @@ -131,144 +130,27 @@ }, { "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "0 0\n", - "1 3\n", - "2 6\n", - "dtype: int64" - ] - }, - "execution_count": 8, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "so" - ] - }, - { - "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
srcdstweight
01761.0
16171.0
21761.0
319331.0
433311.0
53121.0
\n", - "
" - ], - "text/plain": [ - " src dst weight\n", - "0 17 6 1.0\n", - "1 6 17 1.0\n", - "2 17 6 1.0\n", - "3 19 33 1.0\n", - "4 33 31 1.0\n", - "5 31 2 1.0" - ] - }, - "execution_count": 9, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ - "rw" + "rw.head(10)" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "seed 17 starts at index 0 and is 3 rows\n", - "seed 19 starts at index 3 and is 3 rows\n" - ] - } - ], + "outputs": [], "source": [ + "idx = 0\n", "for i in range(len(seeds)):\n", - " print(f\"seed {seeds[i]} starts at index {so[i]} and is {so[1 + 1] - so[1]} rows\")" + " for j in range(path_length):\n", + " print(f\"{rw[idx]}\", end=\" \")\n", + " idx += 1\n", + " print(\" \")" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - }, { "cell_type": "markdown", "metadata": {}, @@ -305,7 +187,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.8" + "version": "3.8.10" } }, "nbformat": 4, diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 1a113b93d8d..55c35fa7b4b 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -107,7 +107,7 @@ from cugraph.raft import raft_include_test from cugraph.comms import comms -from cugraph.sampling import random_walks +from cugraph.sampling import random_walks, rw_path # Versioneer from ._version import get_versions diff --git a/python/cugraph/centrality/betweenness_centrality.pxd b/python/cugraph/centrality/betweenness_centrality.pxd index 829d7be37d9..7abc9009cc8 100644 --- a/python/cugraph/centrality/betweenness_centrality.pxd +++ b/python/cugraph/centrality/betweenness_centrality.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void betweenness_centrality[VT, ET, WT, result_t]( const handle_t &handle, diff --git a/python/cugraph/centrality/katz_centrality.pxd b/python/cugraph/centrality/katz_centrality.pxd index ce9ab5291f6..c48a90904da 100644 --- a/python/cugraph/centrality/katz_centrality.pxd +++ b/python/cugraph/centrality/katz_centrality.pxd @@ -19,7 +19,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_katz_centrality[VT,WT]( const handle_t &handle, diff --git a/python/cugraph/comms/comms.pxd b/python/cugraph/comms/comms.pxd index 3984ade9a9c..5bc24c0d639 100644 --- a/python/cugraph/comms/comms.pxd +++ b/python/cugraph/comms/comms.pxd @@ -19,7 +19,7 @@ from cugraph.raft.common.handle cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void init_subcomms(handle_t &handle, size_t row_comm_size) diff --git a/python/cugraph/community/ecg.pxd b/python/cugraph/community/ecg.pxd index 9f1dc269b6f..4f13237eac7 100644 --- a/python/cugraph/community/ecg.pxd +++ b/python/cugraph/community/ecg.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void ecg[VT,ET,WT]( const handle_t &handle, diff --git a/python/cugraph/community/egonet.pxd b/python/cugraph/community/egonet.pxd index cf1c84fb5f7..acf93330447 100644 --- a/python/cugraph/community/egonet.pxd +++ b/python/cugraph/community/egonet.pxd @@ -14,7 +14,7 @@ from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[cy_multi_edgelists_t] call_egonet[vertex_t, weight_t]( const handle_t &handle, const graph_container_t &g, diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx index 23aa159314f..eb62c2aa56c 100644 --- a/python/cugraph/community/egonet_wrapper.pyx +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -55,6 +55,8 @@ def egonet(input_graph, vertices, radius=1): weight_t = np.dtype("float32") is_weighted = False + is_symmetric = not input_graph.is_directed() + # Pointers for egonet vertices = vertices.astype('int32') cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0] @@ -79,6 +81,7 @@ def egonet(input_graph, vertices, radius=1): num_edges, False, is_weighted, + is_symmetric, False, False) if(weight_t==np.dtype("float32")): diff --git a/python/cugraph/community/ktruss_subgraph.pxd b/python/cugraph/community/ktruss_subgraph.pxd index ab3a5189414..d993c31c375 100644 --- a/python/cugraph/community/ktruss_subgraph.pxd +++ b/python/cugraph/community/ktruss_subgraph.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT,ET,WT]] k_truss_subgraph[VT,ET,WT]( const GraphCOOView[VT,ET,WT] &graph, diff --git a/python/cugraph/community/leiden.pxd b/python/cugraph/community/leiden.pxd index 80e0e12f65a..871dc826c06 100644 --- a/python/cugraph/community/leiden.pxd +++ b/python/cugraph/community/leiden.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,7 +21,7 @@ from libcpp.utility cimport pair from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef pair[size_t, weight_t] leiden[vertex_t,edge_t,weight_t]( const handle_t &handle, diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index 1f75c13dbaf..08625047285 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -21,7 +21,7 @@ from libcpp.utility cimport pair from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef pair[size_t, weight_t] call_louvain[weight_t]( const handle_t &handle, diff --git a/python/cugraph/community/spectral_clustering.pxd b/python/cugraph/community/spectral_clustering.pxd index 27ce6130b05..346eb50a157 100644 --- a/python/cugraph/community/spectral_clustering.pxd +++ b/python/cugraph/community/spectral_clustering.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph::ext_raft": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph::ext_raft": cdef void balancedCutClustering[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py index 443e2169711..06294af00c9 100644 --- a/python/cugraph/community/spectral_clustering.py +++ b/python/cugraph/community/spectral_clustering.py @@ -190,8 +190,8 @@ def analyzeClustering_modularity(G, n_clusters, clustering, Specifies the number of clusters in the given clustering clustering : cudf.DataFrame The cluster assignment to analyze. - vertex_col_name : str - The name of the column in the clustering dataframe identifying + vertex_col_name : str or list of str + The names of the column in the clustering dataframe identifying the external vertex id cluster_col_name : str The name of the column in the clustering dataframe identifying @@ -213,8 +213,10 @@ def analyzeClustering_modularity(G, n_clusters, clustering, >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_modularity(G, 5, df) """ - - if type(vertex_col_name) is not str: + if type(vertex_col_name) is list: + if not all(isinstance(name, str) for name in vertex_col_name): + raise Exception("vertex_col_name must be list of string") + elif type(vertex_col_name) is not str: raise Exception("vertex_col_name must be a string") if type(cluster_col_name) is not str: @@ -224,11 +226,11 @@ def analyzeClustering_modularity(G, n_clusters, clustering, if G.renumbered: clustering = G.add_internal_vertex_id(clustering, - vertex_col_name, + 'vertex', vertex_col_name, drop=True) - clustering = clustering.sort_values(vertex_col_name) + clustering = clustering.sort_values('vertex') score = spectral_clustering_wrapper.analyzeClustering_modularity( G, n_clusters, clustering[cluster_col_name] @@ -277,8 +279,10 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, >>> df = cugraph.spectralBalancedCutClustering(G, 5) >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df) """ - - if type(vertex_col_name) is not str: + if type(vertex_col_name) is list: + if not all(isinstance(name, str) for name in vertex_col_name): + raise Exception("vertex_col_name must be list of string") + elif type(vertex_col_name) is not str: raise Exception("vertex_col_name must be a string") if type(cluster_col_name) is not str: @@ -288,11 +292,11 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, if G.renumbered: clustering = G.add_internal_vertex_id(clustering, - vertex_col_name, + 'vertex', vertex_col_name, drop=True) - clustering = clustering.sort_values(vertex_col_name).reset_index(drop=True) + clustering = clustering.sort_values('vertex').reset_index(drop=True) score = spectral_clustering_wrapper.analyzeClustering_edge_cut( G, n_clusters, clustering[cluster_col_name] @@ -339,8 +343,10 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering, >>> score = cugraph.analyzeClustering_ratio_cut(G, 5, df, >>> 'vertex', 'cluster') """ - - if type(vertex_col_name) is not str: + if type(vertex_col_name) is list: + if not all(isinstance(name, str) for name in vertex_col_name): + raise Exception("vertex_col_name must be list of string") + elif type(vertex_col_name) is not str: raise Exception("vertex_col_name must be a string") if type(cluster_col_name) is not str: @@ -348,11 +354,11 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering, if G.renumbered: clustering = G.add_internal_vertex_id(clustering, - vertex_col_name, + 'vertex', vertex_col_name, drop=True) - clustering = clustering.sort_values(vertex_col_name) + clustering = clustering.sort_values('vertex') score = spectral_clustering_wrapper.analyzeClustering_ratio_cut( G, n_clusters, clustering[cluster_col_name] diff --git a/python/cugraph/community/subgraph_extraction.pxd b/python/cugraph/community/subgraph_extraction.pxd index 97a71056006..583e220327d 100644 --- a/python/cugraph/community/subgraph_extraction.pxd +++ b/python/cugraph/community/subgraph_extraction.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,7 +20,7 @@ from cugraph.structure.graph_primtypes cimport * from libcpp.memory cimport unique_ptr -cdef extern from "algorithms.hpp" namespace "cugraph::subgraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph::subgraph": cdef unique_ptr[GraphCOO[VT,ET,WT]] extract_subgraph_vertex[VT,ET,WT]( const GraphCOOView[VT,ET,WT] &graph, diff --git a/python/cugraph/community/triangle_count.pxd b/python/cugraph/community/triangle_count.pxd index 70795a3f43a..55e8114ccbf 100644 --- a/python/cugraph/community/triangle_count.pxd +++ b/python/cugraph/community/triangle_count.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,7 +20,7 @@ from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uint64_t -cdef extern from "algorithms.hpp" namespace "cugraph::triangle": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph::triangle": cdef uint64_t triangle_count[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph) except + diff --git a/python/cugraph/components/connectivity.pxd b/python/cugraph/components/connectivity.pxd index 94fa165969d..678836216b9 100644 --- a/python/cugraph/components/connectivity.pxd +++ b/python/cugraph/components/connectivity.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,9 +17,10 @@ # cython: language_level = 3 from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": ctypedef enum cugraph_cc_t: CUGRAPH_WEAK "cugraph::cugraph_cc_t::CUGRAPH_WEAK" @@ -30,3 +31,9 @@ cdef extern from "algorithms.hpp" namespace "cugraph": const GraphCSRView[VT,ET,WT] &graph, cugraph_cc_t connect_type, VT *labels) except + + +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": + cdef void call_wcc[vertex_t, weight_t]( + const handle_t &handle, + const graph_container_t &g, + vertex_t *identifiers) except + diff --git a/python/cugraph/components/connectivity_wrapper.pyx b/python/cugraph/components/connectivity_wrapper.pyx index ac173de3564..588595644ed 100644 --- a/python/cugraph/components/connectivity_wrapper.pyx +++ b/python/cugraph/components/connectivity_wrapper.pyx @@ -18,6 +18,7 @@ from cugraph.components.connectivity cimport * from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_utilities cimport * from cugraph.structure import utils_wrapper from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t @@ -30,47 +31,59 @@ def weakly_connected_components(input_graph): """ Call connected_components """ - offsets = None - indices = None + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']], + [np.int32]) if type(input_graph) is not type_Graph: # - # Need to create a symmetrized CSR for this local - # computation, don't want to keep it. - # - [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], - input_graph.edgelist.edgelist_df['dst']], - [np.int32]) - src, dst = symmetrize(src, dst) - [offsets, indices] = utils_wrapper.coo2csr(src, dst)[0:2] - else: - if not input_graph.adjlist: - input_graph.view_adj_list() - - [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, - input_graph.adjlist.indices], - [np.int32]) + # Need to create a symmetrized COO for this local + # computation + src, dst = symmetrize(src, dst) + weight_t = np.dtype("float32") + weights = None + num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) df['labels'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] - cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] - cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL cdef uintptr_t c_labels_val = df['labels'].__cuda_array_interface__['data'][0]; - cdef GraphCSRView[int,int,float] g - - g = GraphCSRView[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) - - cdef cugraph_cc_t connect_type=CUGRAPH_WEAK - connected_components(g, connect_type, c_labels_val) - - g.get_vertex_identifiers(c_identifier) + cdef graph_container_t graph_container + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), + num_edges, + num_verts, num_edges, + False, + False, + True, + False, + False) + + call_wcc[int, float](handle_ptr.get()[0], + graph_container, + c_labels_val) return df diff --git a/python/cugraph/cores/core_number.pxd b/python/cugraph/cores/core_number.pxd index cf28720a3e8..17dc1118a5e 100644 --- a/python/cugraph/cores/core_number.pxd +++ b/python/cugraph/cores/core_number.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void core_number[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/cores/k_core.pxd b/python/cugraph/cores/k_core.pxd index 556dbc95ed9..1d22e7ac4d2 100644 --- a/python/cugraph/cores/k_core.pxd +++ b/python/cugraph/cores/k_core.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT,ET,WT]] k_core[VT,ET,WT]( const GraphCOOView[VT,ET,WT] &in_graph, diff --git a/python/cugraph/dask/__init__.py b/python/cugraph/dask/__init__.py index 830de45c50b..60aebaf19b0 100644 --- a/python/cugraph/dask/__init__.py +++ b/python/cugraph/dask/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,3 +17,4 @@ from .common.read_utils import get_chunksize from .community.louvain import louvain from .centrality.katz_centrality import katz_centrality +from .components.connectivity import weakly_connected_components diff --git a/python/cugraph/dask/centrality/mg_katz_centrality.pxd b/python/cugraph/dask/centrality/mg_katz_centrality.pxd index fb1730da13b..5e30530e92b 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality.pxd +++ b/python/cugraph/dask/centrality/mg_katz_centrality.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_katz_centrality[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx index 5fb9de788cf..c072d5ec143 100644 --- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx +++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx @@ -95,6 +95,7 @@ def mg_katz_centrality(input_df, num_global_verts, num_global_edges, True, is_weighted, + False, True, True) df = cudf.DataFrame() diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index 738309dac8a..ab990330028 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -20,7 +20,7 @@ from libcpp.utility cimport pair from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef pair[size_t, weight_t] call_louvain[weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index a3cebeac272..4585270c879 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -98,6 +98,7 @@ def louvain(input_df, num_global_verts, num_global_edges, sorted_by_degree, True, + False, False, True) # store_transposed, multi_gpu # Create the output dataframe, column lengths must be equal to the number of diff --git a/python/cugraph/dask/components/__init__.py b/python/cugraph/dask/components/__init__.py new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/cugraph/dask/components/connectivity.py b/python/cugraph/dask/components/connectivity.py new file mode 100644 index 00000000000..7f3a647a0d9 --- /dev/null +++ b/python/cugraph/dask/components/connectivity.py @@ -0,0 +1,63 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import (get_distributed_data, + get_vertex_partition_offsets) +from cugraph.dask.components import mg_connectivity_wrapper as mg_connectivity +import cugraph.comms.comms as Comms +import dask_cudf + + +def call_wcc(sID, + data, + num_verts, + num_edges, + vertex_partition_offsets): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return mg_connectivity.mg_wcc(data[0], + num_verts, + num_edges, + vertex_partition_offsets, + wid, + handle) + + +def weakly_connected_components(input_graph): + + client = default_client() + + input_graph.compute_renumber_edge_list() + + ddf = input_graph.edgelist.edgelist_df + vertex_partition_offsets = get_vertex_partition_offsets(input_graph) + num_verts = vertex_partition_offsets.iloc[-1] + num_edges = len(ddf) + data = get_distributed_data(ddf) + + result = [client.submit(call_wcc, + Comms.get_session_id(), + wf[1], + num_verts, + num_edges, + vertex_partition_offsets, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + ddf = dask_cudf.from_delayed(result) + + if input_graph.renumbered: + return input_graph.unrenumber(ddf, 'vertex') + + return ddf diff --git a/python/cugraph/dask/components/mg_connectivity.pxd b/python/cugraph/dask/components/mg_connectivity.pxd new file mode 100644 index 00000000000..04f04a9665e --- /dev/null +++ b/python/cugraph/dask/components/mg_connectivity.pxd @@ -0,0 +1,26 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cugraph.structure.graph_utilities cimport * +from libcpp cimport bool + + +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": + + cdef void call_wcc[vertex_t, weight_t]( + const handle_t &handle, + const graph_container_t &g, + vertex_t * components) diff --git a/python/cugraph/dask/components/mg_connectivity_wrapper.pyx b/python/cugraph/dask/components/mg_connectivity_wrapper.pyx new file mode 100644 index 00000000000..156d29a9794 --- /dev/null +++ b/python/cugraph/dask/components/mg_connectivity_wrapper.pyx @@ -0,0 +1,99 @@ +# +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cugraph.structure.utils_wrapper import * +from cugraph.dask.components cimport mg_connectivity as c_connectivity +import cudf +from cugraph.structure.graph_utilities cimport * +import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper +from libc.stdint cimport uintptr_t +from cython.operator cimport dereference as deref +import numpy as np + + +def mg_wcc(input_df, + num_global_verts, + num_global_edges, + vertex_partition_offsets, + rank, + handle): + + cdef size_t handle_size_t = handle.getHandle() + handle_ = handle_size_t + + src = input_df['src'] + dst = input_df['dst'] + vertex_t = src.dtype + if num_global_edges > (2**31 - 1): + edge_t = np.dtype("int64") + else: + edge_t = np.dtype("int32") + + weights = None + weight_t = np.dtype("float32") + is_weighted = False + + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + # FIXME: needs to be edge_t type not int + cdef int num_local_edges = len(src) + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + + # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C + vertex_partition_offsets_host = vertex_partition_offsets.values_host + cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + + cdef graph_container_t graph_container + + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + c_vertex_partition_offsets, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_local_edges, + num_global_verts, num_global_edges, + True, + is_weighted, + True, + False, + True) + + df = cudf.DataFrame() + df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t) + df['labels'] = cudf.Series(np.zeros(len(df['vertex']), dtype=vertex_t)) + + cdef uintptr_t c_labels_val = df['labels'].__cuda_array_interface__['data'][0]; + + if vertex_t == np.int32: + c_connectivity.call_wcc[int, float](handle_[0], + graph_container, + c_labels_val) + else: + c_connectivity.call_wcc[long, float](handle_[0], + graph_container, + c_labels_val) + + return df diff --git a/python/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/dask/link_analysis/mg_pagerank.pxd index 55bbc0dba7e..4b47f43dd87 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank.pxd +++ b/python/cugraph/dask/link_analysis/mg_pagerank.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_pagerank[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index c2f92f0f33b..6b8e18c119c 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -91,6 +91,7 @@ def mg_pagerank(input_df, num_global_verts, num_global_edges, True, is_weighted, + False, True, True) df = cudf.DataFrame() diff --git a/python/cugraph/dask/traversal/mg_bfs.pxd b/python/cugraph/dask/traversal/mg_bfs.pxd index 6a0277f8713..d4f399bf689 100644 --- a/python/cugraph/dask/traversal/mg_bfs.pxd +++ b/python/cugraph/dask/traversal/mg_bfs.pxd @@ -21,7 +21,7 @@ cdef extern from "limits.h": cdef int INT_MAX cdef long LONG_MAX -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_bfs[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index e2f44ada32c..f0a9f2a81db 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -50,6 +50,7 @@ def mg_bfs(input_df, else: weight_t = np.dtype("float32") + # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, @@ -81,6 +82,7 @@ def mg_bfs(input_df, num_global_verts, num_global_edges, True, False, # BFS runs on unweighted graphs + False, False, True) # Generate the cudf.DataFrame result diff --git a/python/cugraph/dask/traversal/mg_sssp.pxd b/python/cugraph/dask/traversal/mg_sssp.pxd index d56575da567..937b42147e6 100644 --- a/python/cugraph/dask/traversal/mg_sssp.pxd +++ b/python/cugraph/dask/traversal/mg_sssp.pxd @@ -17,7 +17,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_sssp[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx index 82a4ebe04d6..c11ec967e05 100644 --- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -85,6 +85,7 @@ def mg_sssp(input_df, num_global_verts, num_global_edges, True, is_weighted, + False, False, True) # Generate the cudf.DataFrame result diff --git a/python/cugraph/generators/__init__.py b/python/cugraph/generators/__init__.py new file mode 100644 index 00000000000..74ecc2384bd --- /dev/null +++ b/python/cugraph/generators/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .rmat import rmat, multi_rmat diff --git a/python/cugraph/generators/rmat.pxd b/python/cugraph/generators/rmat.pxd new file mode 100644 index 00000000000..3c51108c778 --- /dev/null +++ b/python/cugraph/generators/rmat.pxd @@ -0,0 +1,45 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from libcpp cimport bool +from cugraph.structure.graph_utilities cimport * +from libcpp.vector cimport vector + +cdef extern from "cugraph/graph_generators.hpp" namespace "cugraph": + ctypedef enum generator_distribution_t: + POWER_LAW "cugraph::generator_distribution_t::POWER_LAW" + UNIFORM "cugraph::generator_distribution_t::UNIFORM" + + +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": + cdef unique_ptr[graph_generator_t] call_generate_rmat_edgelist[vertex_t] ( + const handle_t &handle, + size_t scale, + size_t num_edges, + double a, + double b, + double c, + int seed, + bool clip_and_flip, + bool scramble_vertex_ids) except + + + cdef vector[pair[unique_ptr[device_buffer], unique_ptr[device_buffer]]] call_generate_rmat_edgelists[vertex_t]( + const handle_t &handle, + size_t n_edgelists, + size_t min_scale, + size_t max_scale, + size_t edge_factor, + generator_distribution_t size_distribution, + generator_distribution_t edge_distribution, + int seed, + bool clip_and_flip, + bool scramble_vertex_ids) except + diff --git a/python/cugraph/generators/rmat.py b/python/cugraph/generators/rmat.py new file mode 100644 index 00000000000..46859ccd42d --- /dev/null +++ b/python/cugraph/generators/rmat.py @@ -0,0 +1,377 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from dask.distributed import default_client +import dask_cudf + +from cugraph.generators import rmat_wrapper +from cugraph.comms import comms as Comms +import cugraph + + +def _ensure_args_rmat( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + create_using, + mg +): + """ + Ensures the args passed in are usable for the rmat() API, raises the + appropriate exception if incorrect, else returns None. + """ + if mg and create_using not in [None, cugraph.DiGraph]: + raise TypeError("Only cugraph.DiGraph and None are supported types " + "for `create_using` for multi-GPU R-MAT") + if create_using not in [None, cugraph.Graph, cugraph.DiGraph]: + raise TypeError("Only cugraph.Graph, cugraph.DiGraph, and None are " + "supported types for 'create_using'") + if not isinstance(scale, int): + raise TypeError("'scale' must be an int") + if not isinstance(num_edges, int): + raise TypeError("'num_edges' must be an int") + if (a+b+c > 1): + raise ValueError( + "a + b + c should be non-negative and no larger than 1.0") + if (clip_and_flip not in [True, False]): + raise ValueError("'clip_and_flip' must be a bool") + if (scramble_vertex_ids not in [True, False]): + raise ValueError("'clip_and_flip' must be a bool") + if not isinstance(seed, int): + raise TypeError("'seed' must be an int") + + +def _ensure_args_multi_rmat( + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids +): + """ + Ensures the args passed in are usable for the multi_rmat() API, raises the + appropriate exception if incorrect, else returns None. + """ + if not isinstance(n_edgelists, int): + raise TypeError("'n_edgelists' must be an int") + if not isinstance(min_scale, int): + raise TypeError("'min_scale' must be an int") + if not isinstance(max_scale, int): + raise TypeError("'max_scale' must be an int") + if not isinstance(edge_factor, int): + raise TypeError("'edge_factor' must be an int") + if (size_distribution not in [0, 1]): + raise TypeError("'size_distribution' must be either 0 or 1") + if (edge_distribution not in [0, 1]): + raise TypeError("'edge_distribution' must be either 0 or 1") + if (clip_and_flip not in [True, False]): + raise ValueError("'clip_and_flip' must be a bool") + if (scramble_vertex_ids not in [True, False]): + raise ValueError("'clip_and_flip' must be a bool") + if not isinstance(seed, int): + raise TypeError("'seed' must be an int") + + +def _sg_rmat( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + create_using=cugraph.DiGraph +): + """ + Calls RMAT on a single GPU and uses the resulting cuDF DataFrame + to initialize and return a cugraph Graph object specified with + create_using. If create_using is None, returns the edgelist df as-is. + """ + df = rmat_wrapper.generate_rmat_edgelist(scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids) + if create_using is None: + return df + + G = create_using() + G.from_cudf_edgelist(df, source='src', destination='dst', renumber=False) + + return G + + +def _mg_rmat( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + create_using=cugraph.DiGraph +): + """ + Calls RMAT on multiple GPUs and uses the resulting Dask cuDF DataFrame to + initialize and return a cugraph Graph object specified with create_using. + If create_using is None, returns the Dask DataFrame edgelist as-is. + + seed is used as the initial seed for the first worker used (worker 0), then + each subsequent worker will receive seed+ as the seed value. + """ + client = default_client() + worker_list = list(client.scheduler_info()['workers'].keys()) + num_workers = len(worker_list) + num_edges_list = _calc_num_edges_per_worker(num_workers, num_edges) + futures = [] + for (i, worker_num_edges) in enumerate(num_edges_list): + unique_worker_seed = seed + i + future = client.submit( + _call_rmat, + Comms.get_session_id(), + scale, + worker_num_edges, + a, + b, + c, + unique_worker_seed, + clip_and_flip, + scramble_vertex_ids, + workers=worker_list[i] + ) + futures.append(future) + + ddf = dask_cudf.from_delayed(futures) + + if create_using is None: + return ddf + + G = create_using() + G.from_dask_cudf_edgelist(ddf, source="src", destination="dst") + + return G + + +def _call_rmat( + sID, + scale, + num_edges_for_worker, + a, + b, + c, + unique_worker_seed, + clip_and_flip, + scramble_vertex_ids +): + """ + Callable passed to dask client.submit calls that extracts the individual + worker handle based on the dask session ID + """ + handle = Comms.get_handle(sID) + + return rmat_wrapper.generate_rmat_edgelist( + scale, + num_edges_for_worker, + a, + b, + c, + unique_worker_seed, + clip_and_flip, + scramble_vertex_ids, + handle=handle + ) + + +def _calc_num_edges_per_worker(num_workers, num_edges): + """ + Returns a list of length num_workers with the individual number of edges + each worker should generate. The sum of all edges in the list is num_edges. + """ + L = [] + w = num_edges // num_workers + r = num_edges % num_workers + for i in range(num_workers): + if (i < r): + L.append(w+1) + else: + L.append(w) + return L + + +############################################################################### + +def rmat( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + create_using=cugraph.DiGraph, + mg=False +): + """ + Generate a Graph object using a Recursive MATrix (R-MAT) graph generation + algorithm. + + Parameters + ---------- + scale : int + Scale factor to set the number of verties in the graph Vertex IDs have + values in [0, V), where V = 1 << 'scale' + + num_edges : int + Number of edges to generate + + a : float + Probability of the first partition + + b : float + Probability of the second partition + + c : float + Probability of the thrid partition + + seed : int + Seed value for the random number generator + + clip_and_flip : bool + Flag controlling whether to generate edges only in the lower triangular + part (including the diagonal) of the graph adjacency matrix + (if set to 'true') or not (if set to 'false). + + scramble_vertex_ids : bool + Flag controlling whether to scramble vertex ID bits (if set to `true`) or + not (if set to `false`); scrambling vertx ID bits breaks correlation + between vertex ID values and vertex degrees + + create_using : cugraph Graph type or None The graph type to construct + containing the generated edges and vertices. If None is specified, the + edgelist cuDF DataFrame (or dask_cudf DataFrame for MG) is returned as-is. + This is useful for benchmarking Graph construction steps that require raw + data that includes potential self-loops, isolated vertices, and duplicated + edges. Default is cugraph.DiGraph. NOTE: only the cugraph.DiGraph type is + supported for multi-GPU + + mg : bool + If True, R-MAT generation occurs across multiple GPUs. If False, only a + single GPU is used. Default is False (single-GPU) + + Returns + ------- + instance of cugraph.Graph + """ + _ensure_args_rmat(scale, num_edges, a, b, c, seed, clip_and_flip, + scramble_vertex_ids, create_using, mg) + + if mg: + return _mg_rmat(scale, num_edges, a, b, c, seed, clip_and_flip, + scramble_vertex_ids, create_using) + else: + return _sg_rmat(scale, num_edges, a, b, c, seed, clip_and_flip, + scramble_vertex_ids, create_using) + + +def multi_rmat( + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids +): + """ + Generate multiple Graph objects using a Recursive MATrix (R-MAT) graph + generation algorithm. + + Parameters + ---------- + n_edgelists : int + Number of edge lists (graphs) to generate + + min_scale : int + Scale factor to set the minimum number of vertices in the graph + + max_scale : int + Scale factor to set the maximum number of vertices in the graph + + edge_factor : int + Average number of edges per vertex to generate + + size_distribution : int + Distribution of the graph sizes, impacts the scale parameter of the R-MAT + generator. + '0' for POWER_LAW distribution and '1' for UNIFORM distribution + + edge_distribution : int + Edges distribution for each graph, impacts how R-MAT parameters a,b,c,d, + are set. + '0' for POWER_LAW distribution and '1' for UNIFORM distribution + + seed : int + Seed value for the random number generator + + clip_and_flip : bool + Flag controlling whether to generate edges only in the lower triangular + part (including the diagonal) of the graph adjacency matrix + (if set to 'true') or not (if set to 'false') + + scramble_vertex_ids : bool + Flag controlling whether to scramble vertex ID bits (if set to 'true') or + not (if set to 'false'); scrambling vertx ID bits breaks correlation + between vertex ID values and vertex degrees + + Returns + ------- + list of cugraph.Graph instances + """ + _ensure_args_multi_rmat(n_edgelists, min_scale, max_scale, edge_factor, + size_distribution, edge_distribution, seed, + clip_and_flip, scramble_vertex_ids) + + dfs = rmat_wrapper.generate_rmat_edgelists( + n_edgelists, min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids) + list_G = [] + + for df in dfs: + G = cugraph.Graph() + G.from_cudf_edgelist(df, source='src', destination='dst') + list_G.append(G) + + return list_G diff --git a/python/cugraph/generators/rmat_wrapper.pyx b/python/cugraph/generators/rmat_wrapper.pyx new file mode 100644 index 00000000000..26f3772ad32 --- /dev/null +++ b/python/cugraph/generators/rmat_wrapper.pyx @@ -0,0 +1,171 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from libcpp cimport bool +from libc.stdint cimport uintptr_t +import numpy as np +import numpy.ctypeslib as ctypeslib +from cython.operator cimport dereference as deref + +import rmm +from rmm._lib.device_buffer cimport DeviceBuffer +import cudf +from cudf.core.buffer import Buffer + +from cugraph.structure.graph_utilities cimport * +from cugraph.generators.rmat cimport * +from libcpp.utility cimport move # This must be imported after graph_utilities + # since graph_utilities also defines move + + +def generate_rmat_edgelist( + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids, + handle=None +): + + vertex_t = np.dtype("int32") + if (2**scale) > (2**31 - 1): + vertex_t = np.dtype("int64") + + cdef unique_ptr[handle_t] handle_ptr + cdef size_t handle_size_t + + if handle is None: + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + else: + handle_size_t = handle.getHandle() + handle_ = handle_size_t + + cdef unique_ptr[graph_generator_t] gg_ret_ptr + + if (vertex_t==np.dtype("int32")): + gg_ret_ptr = move(call_generate_rmat_edgelist[int]( deref(handle_), + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids)) + else: # (vertex_t == np.dtype("int64")) + gg_ret_ptr = move(call_generate_rmat_edgelist[long]( deref(handle_), + scale, + num_edges, + a, + b, + c, + seed, + clip_and_flip, + scramble_vertex_ids)) + + gg_ret = move(gg_ret_ptr.get()[0]) + source_set = DeviceBuffer.c_from_unique_ptr(move(gg_ret.d_source)) + destination_set = DeviceBuffer.c_from_unique_ptr(move(gg_ret.d_destination)) + source_set = Buffer(source_set) + destination_set = Buffer(destination_set) + + set_source = cudf.Series(data=source_set, dtype=vertex_t) + set_destination = cudf.Series(data=destination_set, dtype=vertex_t) + + df = cudf.DataFrame() + df['src'] = set_source + df['dst'] = set_destination + + return df + + +def generate_rmat_edgelists( + n_edgelists, + min_scale, + max_scale, + edge_factor, + size_distribution, + edge_distribution, + seed, + clip_and_flip, + scramble_vertex_ids + ): + + vertex_t = np.dtype("int32") + if (2**max_scale) > (2**31 - 1): + vertex_t = np.dtype("int64") + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + + cdef generator_distribution_t s_distribution + cdef generator_distribution_t e_distribution + if size_distribution == 0: + s_distribution= POWER_LAW + else : + s_distribution= UNIFORM + if edge_distribution == 0: + e_distribution= POWER_LAW + else : + e_distribution= UNIFORM + #cdef unique_ptr[graph_generator_t*] gg_ret_ptr + cdef vector[pair[unique_ptr[device_buffer], unique_ptr[device_buffer]]] gg_ret_ptr + + if (vertex_t==np.dtype("int32")): + #gg_ret_ptr = move(call_generate_rmat_edgelists[int]( deref(handle_), + gg_ret_ptr = move(call_generate_rmat_edgelists[int]( deref(handle_), + n_edgelists, + min_scale, + max_scale, + edge_factor, + s_distribution, + e_distribution, + seed, + clip_and_flip, + scramble_vertex_ids)) + else: # (vertex_t == np.dtype("int64")) + #gg_ret_ptr = move(call_generate_rmat_edgelists[long]( deref(handle_), + gg_ret_ptr = move(call_generate_rmat_edgelists[long]( deref(handle_), + n_edgelists, + min_scale, + max_scale, + edge_factor, + s_distribution, + e_distribution, + seed, + clip_and_flip, + scramble_vertex_ids)) + list_df = [] + + for i in range(n_edgelists): + source_set = DeviceBuffer.c_from_unique_ptr(move(gg_ret_ptr[i].first)) + destination_set = DeviceBuffer.c_from_unique_ptr(move(gg_ret_ptr[i].second)) + source_set = Buffer(source_set) + destination_set = Buffer(destination_set) + + set_source = cudf.Series(data=source_set, dtype=vertex_t) + set_destination = cudf.Series(data=destination_set, dtype=vertex_t) + + df = cudf.DataFrame() + df['src'] = set_source + df['dst'] = set_destination + + list_df.append(df) + + #Return a list of dataframes + return list_df diff --git a/python/cugraph/internals/callbacks_implems.hpp b/python/cugraph/internals/callbacks_implems.hpp index 7b3a27f6bff..79fab937965 100644 --- a/python/cugraph/internals/callbacks_implems.hpp +++ b/python/cugraph/internals/callbacks_implems.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,7 @@ #pragma once #include -#include +#include #include diff --git a/python/cugraph/layout/force_atlas2.pxd b/python/cugraph/layout/force_atlas2.pxd index cda55cda5c5..5496d1b655e 100644 --- a/python/cugraph/layout/force_atlas2.pxd +++ b/python/cugraph/layout/force_atlas2.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,12 +19,13 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "internals.hpp" namespace "cugraph::internals": +cdef extern from "cugraph/internals.hpp" namespace "cugraph::internals": cdef cppclass GraphBasedDimRedCallback -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void force_atlas2[vertex_t, edge_t, weight_t]( + const handle_t &handle, GraphCOOView[vertex_t, edge_t, weight_t] &graph, float *pos, const int max_iter, diff --git a/python/cugraph/layout/force_atlas2.py b/python/cugraph/layout/force_atlas2.py index 0b745d8ca15..d15109249e5 100644 --- a/python/cugraph/layout/force_atlas2.py +++ b/python/cugraph/layout/force_atlas2.py @@ -12,7 +12,6 @@ # limitations under the License. from cugraph.layout import force_atlas2_wrapper -from cugraph.structure.graph_classes import null_check def force_atlas2( @@ -109,13 +108,14 @@ def on_train_end(self, positions): """ if pos_list is not None: - null_check(pos_list["vertex"]) - null_check(pos_list["x"]) - null_check(pos_list["y"]) if input_graph.renumbered is True: + if input_graph.vertex_column_size() > 1: + cols = pos_list.columns[:-2].to_list() + else: + cols = 'vertex' pos_list = input_graph.add_internal_vertex_id(pos_list, "vertex", - "vertex") + cols) if prevent_overlapping: raise Exception("Feature not supported") diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx index 4515c577f78..1644875f034 100644 --- a/python/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/layout/force_atlas2_wrapper.pyx @@ -25,7 +25,7 @@ import cudf from numba import cuda import numpy as np -cdef extern from "internals.hpp" namespace "cugraph::internals": +cdef extern from "cugraph/internals.hpp" namespace "cugraph::internals": cdef cppclass GraphBasedDimRedCallback @@ -49,6 +49,10 @@ def force_atlas2(input_graph, Call force_atlas2 """ + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + if not input_graph.edgelist: input_graph.view_edge_list() @@ -61,12 +65,19 @@ def force_atlas2(input_graph, df = cudf.DataFrame() df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) - cdef uintptr_t c_src_indices = input_graph.edgelist.edgelist_df['src'].__cuda_array_interface__['data'][0] - cdef uintptr_t c_dst_indices = input_graph.edgelist.edgelist_df['dst'].__cuda_array_interface__['data'][0] + src = input_graph.edgelist.edgelist_df['src'] + dst = input_graph.edgelist.edgelist_df['dst'] + + [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) + + cdef uintptr_t c_src_indices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_indices = dst.__cuda_array_interface__['data'][0] cdef uintptr_t c_weights = NULL if input_graph.edgelist.weights: - c_weights = input_graph.edgelist.edgelist_df['weights'].__cuda_array_interface__['data'][0] + weights = input_graph.edgelist.edgelist_df["weights"] + [weights] = graph_primtypes_wrapper.datatype_cast([weights], [np.float32, np.float64]) + c_weights = weights.__cuda_array_interface__['data'][0] cdef uintptr_t x_start = NULL cdef uintptr_t y_start = NULL @@ -100,7 +111,8 @@ def force_atlas2(input_graph, graph_double = GraphCOOView[int,int, double](c_src_indices, c_dst_indices, c_weights, num_verts, num_edges) - c_force_atlas2[int, int, double](graph_double, + c_force_atlas2[int, int, double](handle_[0], + graph_double, pos_ptr, max_iter, x_start, @@ -121,7 +133,8 @@ def force_atlas2(input_graph, graph_float = GraphCOOView[int,int,float](c_src_indices, c_dst_indices, c_weights, num_verts, num_edges) - c_force_atlas2[int, int, float](graph_float, + c_force_atlas2[int, int, float](handle_[0], + graph_float, pos_ptr, max_iter, x_start, diff --git a/python/cugraph/linear_assignment/lap.pxd b/python/cugraph/linear_assignment/lap.pxd index 782d5cfef60..84f5050744d 100644 --- a/python/cugraph/linear_assignment/lap.pxd +++ b/python/cugraph/linear_assignment/lap.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef weight_t hungarian[vertex_t,edge_t,weight_t]( const handle_t &handle, @@ -28,7 +28,7 @@ cdef extern from "algorithms.hpp" namespace "cugraph": vertex_t *assignment) except + -cdef extern from "algorithms.hpp": +cdef extern from "cugraph/algorithms.hpp": cdef weight_t dense_hungarian "cugraph::dense::hungarian" [vertex_t,weight_t]( const handle_t &handle, diff --git a/python/cugraph/link_analysis/hits.pxd b/python/cugraph/link_analysis/hits.pxd index 60d25fd3cdb..9e40f7444f9 100644 --- a/python/cugraph/link_analysis/hits.pxd +++ b/python/cugraph/link_analysis/hits.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,7 +20,7 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "algorithms.hpp" namespace "cugraph::gunrock": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph::gunrock": cdef void hits[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/link_analysis/pagerank.pxd b/python/cugraph/link_analysis/pagerank.pxd index 2c8bea12016..ed8f763b3ca 100644 --- a/python/cugraph/link_analysis/pagerank.pxd +++ b/python/cugraph/link_analysis/pagerank.pxd @@ -20,7 +20,7 @@ from cugraph.structure.graph_utilities cimport * from libcpp cimport bool -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_pagerank[VT,WT]( const handle_t &handle, diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py index 4f5f8f6aae0..94b1491e944 100644 --- a/python/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/link_analysis/pagerank.py @@ -12,7 +12,6 @@ # limitations under the License. from cugraph.link_analysis import pagerank_wrapper -from cugraph.structure.graph_classes import null_check import cugraph @@ -67,6 +66,10 @@ def pagerank( Subset of vertices of graph for initial guess for pagerank values nstart['values'] : cudf.Series Pagerank values for vertices + weight: str + The attribute column to be used as edge weights if Graph is a NetworkX + Graph. This parameter is here for NetworkX compatibility and is ignored + in case of a cugraph.Graph dangling : dict This parameter is here for NetworkX compatibility and ignored @@ -94,17 +97,23 @@ def pagerank( G, isNx = cugraph.utilities.check_nx_graph(G, weight) if personalization is not None: - null_check(personalization["vertex"]) - null_check(personalization["values"]) if G.renumbered is True: + if len(G.renumber_map.implementation.col_names) > 1: + cols = personalization.columns[:-1].to_list() + else: + cols = 'vertex' personalization = G.add_internal_vertex_id( - personalization, "vertex", "vertex" + personalization, "vertex", cols ) if nstart is not None: if G.renumbered is True: + if len(G.renumber_map.implementation.col_names) > 1: + cols = nstart.columns[:-1].to_list() + else: + cols = 'vertex' nstart = G.add_internal_vertex_id( - nstart, "vertex", "vertex" + nstart, "vertex", cols ) df = pagerank_wrapper.pagerank( diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 2c619a052ec..5a312f4f633 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -76,6 +76,8 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. weight_t = np.dtype("float32") is_weighted = False + is_symmetric = not input_graph.is_directed() + # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, @@ -102,6 +104,7 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. num_verts, num_edges, False, is_weighted, + is_symmetric, True, False) diff --git a/python/cugraph/link_prediction/jaccard.pxd b/python/cugraph/link_prediction/jaccard.pxd index bc55bb2cdf0..9e8c82ec3d8 100644 --- a/python/cugraph/link_prediction/jaccard.pxd +++ b/python/cugraph/link_prediction/jaccard.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void jaccard[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/link_prediction/jaccard.py b/python/cugraph/link_prediction/jaccard.py index 2a9e9625050..e69308ac595 100644 --- a/python/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/link_prediction/jaccard.py @@ -13,10 +13,11 @@ import pandas as pd import cudf -from cugraph.structure.graph_classes import Graph, null_check +from cugraph.structure.graph_classes import Graph from cugraph.link_prediction import jaccard_wrapper from cugraph.utilities import check_nx_graph from cugraph.utilities import df_edge_score_to_dictionary +from cugraph.utilities import renumber_vertex_pair def jaccard(input_graph, vertex_pair=None): @@ -108,15 +109,8 @@ def jaccard(input_graph, vertex_pair=None): if type(input_graph) is not Graph: raise Exception("input graph must be undirected") - # FIXME: Add support for multi-column vertices if type(vertex_pair) == cudf.DataFrame: - for col in vertex_pair.columns: - null_check(vertex_pair[col]) - if input_graph.renumbered: - vertex_pair = input_graph.add_internal_vertex_id( - vertex_pair, col, col - ) - + vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is None: pass else: diff --git a/python/cugraph/link_prediction/overlap.pxd b/python/cugraph/link_prediction/overlap.pxd index 970032b56eb..f0654472587 100644 --- a/python/cugraph/link_prediction/overlap.pxd +++ b/python/cugraph/link_prediction/overlap.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef void overlap[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, diff --git a/python/cugraph/link_prediction/overlap.py b/python/cugraph/link_prediction/overlap.py index 077080bda1d..4650f24f181 100644 --- a/python/cugraph/link_prediction/overlap.py +++ b/python/cugraph/link_prediction/overlap.py @@ -13,10 +13,10 @@ import pandas as pd from cugraph.link_prediction import overlap_wrapper -from cugraph.structure.graph_classes import null_check import cudf from cugraph.utilities import check_nx_graph from cugraph.utilities import df_edge_score_to_dictionary +from cugraph.utilities import renumber_vertex_pair def overlap_coefficient(G, ebunch=None): @@ -91,14 +91,8 @@ def overlap(input_graph, vertex_pair=None): >>> df = cugraph.overlap(G) """ - # FIXME: Add support for multi-column vertices if type(vertex_pair) == cudf.DataFrame: - for col in vertex_pair.columns: - null_check(vertex_pair[col]) - if input_graph.renumbered: - vertex_pair = input_graph.add_internal_vertex_id( - vertex_pair, col, col, - ) + vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is None: pass else: diff --git a/python/cugraph/link_prediction/overlap_wrapper.pyx b/python/cugraph/link_prediction/overlap_wrapper.pyx index 4cb17aa21a6..ec0274716fb 100644 --- a/python/cugraph/link_prediction/overlap_wrapper.pyx +++ b/python/cugraph/link_prediction/overlap_wrapper.pyx @@ -68,8 +68,9 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): df = cudf.DataFrame() df['overlap_coeff'] = result - first = vertex_pair['first'] - second = vertex_pair['second'] + cols = vertex_pair.columns.to_list() + first = vertex_pair[cols[0]] + second = vertex_pair[cols[1]] # FIXME: multi column support df['source'] = first diff --git a/python/cugraph/link_prediction/wjaccard.py b/python/cugraph/link_prediction/wjaccard.py index 9679d1ba9cf..9616bfd49a8 100644 --- a/python/cugraph/link_prediction/wjaccard.py +++ b/python/cugraph/link_prediction/wjaccard.py @@ -11,9 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure.graph_classes import Graph, null_check +from cugraph.structure.graph_classes import Graph from cugraph.link_prediction import jaccard_wrapper import cudf +import numpy as np +from cugraph.utilities import renumber_vertex_pair def jaccard_w(input_graph, weights, vertex_pair=None): @@ -35,8 +37,15 @@ def jaccard_w(input_graph, weights, vertex_pair=None): as an edge list (edge weights are not used for this algorithm). The adjacency list will be computed if not already present. - weights : cudf.Series + weights : cudf.DataFrame Specifies the weights to be used for each vertex. + Vertex should be represented by multiple columns for multi-column + vertices. + + weights['vertex'] : cudf.Series + Contains the vertex identifiers + weights['weight'] : cudf.Series + Contains the weights of vertices vertex_pair : cudf.DataFrame A GPU dataframe consisting of two columns representing pairs of @@ -70,20 +79,28 @@ def jaccard_w(input_graph, weights, vertex_pair=None): if type(input_graph) is not Graph: raise Exception("input graph must be undirected") - # FIXME: Add support for multi-column vertices if type(vertex_pair) == cudf.DataFrame: - for col in vertex_pair.columns: - null_check(vertex_pair[col]) - if input_graph.renumbered: - vertex_pair = input_graph.add_internal_vertex_id( - vertex_pair, col, col, - ) + vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is None: pass else: raise ValueError("vertex_pair must be a cudf dataframe") - df = jaccard_wrapper.jaccard(input_graph, weights, vertex_pair) + if input_graph.renumbered: + vertex_size = input_graph.vertex_column_size() + if vertex_size == 1: + weights = input_graph.add_internal_vertex_id( + weights, 'vertex', 'vertex' + ) + else: + cols = weights.columns[:vertex_size].to_list() + weights = input_graph.add_internal_vertex_id( + weights, 'vertex', cols + ) + jaccard_weights = cudf.Series(np.ones(len(weights))) + for i in range(len(weights)): + jaccard_weights[weights['vertex'].iloc[i]] = weights['weight'].iloc[i] + df = jaccard_wrapper.jaccard(input_graph, jaccard_weights, vertex_pair) if input_graph.renumbered: df = input_graph.unrenumber(df, "source") diff --git a/python/cugraph/link_prediction/woverlap.py b/python/cugraph/link_prediction/woverlap.py index fe64f812957..920d3e3f80d 100644 --- a/python/cugraph/link_prediction/woverlap.py +++ b/python/cugraph/link_prediction/woverlap.py @@ -12,8 +12,9 @@ # limitations under the License. from cugraph.link_prediction import overlap_wrapper -from cugraph.structure.graph_classes import null_check import cudf +import numpy as np +from cugraph.utilities import renumber_vertex_pair def overlap_w(input_graph, weights, vertex_pair=None): @@ -67,20 +68,33 @@ def overlap_w(input_graph, weights, vertex_pair=None): >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> df = cugraph.overlap_w(G, M[2]) """ - # FIXME: Add support for multi-column vertices + if type(vertex_pair) == cudf.DataFrame: - for col in vertex_pair.columns: - null_check(vertex_pair[col]) - if input_graph.renumbered: - vertex_pair = input_graph.add_internal_vertex_id( - vertex_pair, col, col - ) + vertex_pair = renumber_vertex_pair(input_graph, vertex_pair) elif vertex_pair is None: pass else: raise ValueError("vertex_pair must be a cudf dataframe") - df = overlap_wrapper.overlap(input_graph, weights, vertex_pair) + if input_graph.renumbered: + vertex_size = input_graph.vertex_column_size() + if vertex_size == 1: + weights = input_graph.add_internal_vertex_id( + weights, 'vertex', 'vertex' + ) + else: + cols = weights.columns[:vertex_size].to_list() + weights = input_graph.add_internal_vertex_id( + weights, 'vertex', cols + ) + + overlap_weights = cudf.Series(np.ones(len(weights))) + for i in range(len(weights)): + overlap_weights[weights['vertex'].iloc[i]] = weights['weight'].iloc[i] + + overlap_weights = overlap_weights.astype('float32') + + df = overlap_wrapper.overlap(input_graph, overlap_weights, vertex_pair) if input_graph.renumbered: df = input_graph.unrenumber(df, "source") diff --git a/python/cugraph/sampling/__init__.py b/python/cugraph/sampling/__init__.py index fd9d072d4f8..ab0bfab0c66 100644 --- a/python/cugraph/sampling/__init__.py +++ b/python/cugraph/sampling/__init__.py @@ -11,4 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.sampling.random_walks import random_walks +from cugraph.sampling.random_walks import random_walks, rw_path diff --git a/python/cugraph/sampling/random_walks.pxd b/python/cugraph/sampling/random_walks.pxd index 3e0e24b4e98..f86d6396c98 100644 --- a/python/cugraph/sampling/random_walks.pxd +++ b/python/cugraph/sampling/random_walks.pxd @@ -13,10 +13,16 @@ #from cugraph.structure.graph_primtypes cimport * from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[random_walk_ret_t] call_random_walks[vertex_t, edge_t]( const handle_t &handle, const graph_container_t &g, const vertex_t *ptr_d_start, edge_t num_paths, - edge_t max_depth) except + + edge_t max_depth, + bool use_padding) except + + + cdef unique_ptr[random_walk_path_t] call_rw_paths[index_t]( + const handle_t &handle, + index_t num_paths, + const index_t* sizes) except + diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py index 84fde262010..fc21abd3bc4 100644 --- a/python/cugraph/sampling/random_walks.py +++ b/python/cugraph/sampling/random_walks.py @@ -14,16 +14,12 @@ import cudf from cugraph.sampling import random_walks_wrapper import cugraph -from collections import defaultdict -# FIXME might be more efficient to return either (df + offset) or 3 cudf.Series - -def random_walks( - G, - start_vertices, - max_depth=None -): +def random_walks(G, + start_vertices, + max_depth=None, + use_padding=False): """ compute random walks for each nodes in 'start_vertices' @@ -43,16 +39,20 @@ def random_walks( max_depth : int The maximum depth of the random walks + use_padding : bool + If True, padded paths are returned else coalesced paths are returned. Returns ------- - random_walks_edge_lists : cudf.DataFrame - GPU data frame containing all random walks sources identifiers, - destination identifiers, edge weights + vertex_paths : cudf.Series or cudf.DataFrame + Series containing the vertices of edges/paths in the random walk. + + edge_weight_paths: cudf.Series + Series containing the edge weights of edges represented by the + returned vertex_paths - seeds_offsets: cudf.Series - Series containing the starting offset in the returned edge list - for each vertex in start_vertices. + sizes: int + The path size in case of coalesced paths. """ if max_depth is None: raise TypeError("must specify a 'max_depth'") @@ -74,7 +74,7 @@ def random_walks( start_vertices = G.lookup_internal_vertex_id(start_vertices) vertex_set, edge_set, sizes = random_walks_wrapper.random_walks( - G, start_vertices, max_depth) + G, start_vertices, max_depth, use_padding) if G.renumbered: df_ = cudf.DataFrame() @@ -82,21 +82,32 @@ def random_walks( df_ = G.unrenumber(df_, 'vertex_set', preserve_order=True) vertex_set = cudf.Series(df_['vertex_set']) - edge_list = defaultdict(list) - next_path_idx = 0 - offsets = [0] - - df = cudf.DataFrame() - for s in sizes.values_host: - for i in range(next_path_idx, s+next_path_idx-1): - edge_list['src'].append(vertex_set.values_host[i]) - edge_list['dst'].append(vertex_set.values_host[i+1]) - next_path_idx += s - df = df.append(edge_list, ignore_index=True) - offsets.append(df.index[-1]+1) - edge_list['src'].clear() - edge_list['dst'].clear() - df['weight'] = edge_set - offsets = cudf.Series(offsets) - - return df, offsets + if use_padding: + edge_set_sz = (max_depth-1)*len(start_vertices) + return vertex_set, edge_set[:edge_set_sz], sizes + + vertex_set_sz = sizes.sum() + edge_set_sz = vertex_set_sz - len(start_vertices) + return vertex_set[:vertex_set_sz], edge_set[:edge_set_sz], sizes + + +def rw_path(num_paths, sizes): + """ + Retrieve more information on the obtained paths in case use_padding + is False. + + parameters + ---------- + num_paths: int + Number of paths in the random walk output. + + sizes: int + Path size returned in random walk output. + + Returns + ------- + path_data : cudf.DataFrame + Dataframe containing vetex path offsets, edge weight offsets and + edge weight sizes for each path. + """ + return random_walks_wrapper.rw_path_retrieval(num_paths, sizes) diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx index 7b16ff14018..64194976e87 100644 --- a/python/cugraph/sampling/random_walks_wrapper.pyx +++ b/python/cugraph/sampling/random_walks_wrapper.pyx @@ -10,7 +10,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.sampling.random_walks cimport call_random_walks +from cugraph.sampling.random_walks cimport call_random_walks, call_rw_paths #from cugraph.structure.graph_primtypes cimport * from cugraph.structure.graph_utilities cimport * from libcpp cimport bool @@ -24,7 +24,9 @@ import numpy.ctypeslib as ctypeslib from rmm._lib.device_buffer cimport DeviceBuffer from cudf.core.buffer import Buffer from cython.operator cimport dereference as deref -def random_walks(input_graph, start_vertices, max_depth): + + +def random_walks(input_graph, start_vertices, max_depth, use_padding): """ Call random_walks """ @@ -58,6 +60,9 @@ def random_walks(input_graph, start_vertices, max_depth): else: weight_t = np.dtype("float32") is_weighted = False + + is_symmetric = not input_graph.is_directed() + # Pointers for random_walks start_vertices = start_vertices.astype('int32') cdef uintptr_t c_start_vertex_ptr = start_vertices.__cuda_array_interface__['data'][0] @@ -78,6 +83,7 @@ def random_walks(input_graph, start_vertices, max_depth): num_edges, False, is_weighted, + is_symmetric, False, False) if(vertex_t == np.dtype("int32")): if(edge_t == np.dtype("int32")): @@ -85,32 +91,71 @@ def random_walks(input_graph, start_vertices, max_depth): graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) else: # (edge_t == np.dtype("int64")): rw_ret_ptr = move(call_random_walks[int, long]( deref(handle_), graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) else: # (vertex_t == edge_t == np.dtype("int64")): rw_ret_ptr = move(call_random_walks[long, long]( deref(handle_), graph_container, c_start_vertex_ptr, num_paths, - max_depth)) + max_depth, + use_padding)) rw_ret= move(rw_ret_ptr.get()[0]) vertex_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_v_)) edge_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_w_)) - sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_)) vertex_set = Buffer(vertex_set) edge_set = Buffer(edge_set) - sizes = Buffer(sizes) set_vertex = cudf.Series(data=vertex_set, dtype=vertex_t) set_edge = cudf.Series(data=edge_set, dtype=weight_t) - set_sizes = cudf.Series(data=sizes, dtype=edge_t) + + if not use_padding: + sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_)) + sizes = Buffer(sizes) + set_sizes = cudf.Series(data=sizes, dtype=edge_t) + else: + set_sizes = None return set_vertex, set_edge, set_sizes - \ No newline at end of file + + +def rw_path_retrieval(num_paths, sizes): + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get() + index_t = sizes.dtype + + cdef unique_ptr[random_walk_path_t] rw_path_ptr + cdef uintptr_t c_sizes = sizes.__cuda_array_interface__['data'][0] + + if index_t == np.dtype("int32"): + rw_path_ptr = move(call_rw_paths[int](deref(handle_), + num_paths, + c_sizes)) + else: # index_t == np.dtype("int64"): + rw_path_ptr = move(call_rw_paths[long](deref(handle_), + num_paths, + c_sizes)) + + rw_path = move(rw_path_ptr.get()[0]) + vertex_offsets = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_v_offsets)) + weight_sizes = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_w_sizes)) + weight_offsets = DeviceBuffer.c_from_unique_ptr(move(rw_path.d_w_offsets)) + vertex_offsets = Buffer(vertex_offsets) + weight_sizes = Buffer(weight_sizes) + weight_offsets = Buffer(weight_offsets) + + df = cudf.DataFrame() + df['vertex_offsets'] = cudf.Series(data=vertex_offsets, dtype=index_t) + df['weight_sizes'] = cudf.Series(data=weight_sizes, dtype=index_t) + df['weight_offsets'] = cudf.Series(data=weight_offsets, dtype=index_t) + return df diff --git a/python/cugraph/structure/graph_classes.py b/python/cugraph/structure/graph_classes.py index 52fcb2ffba4..0fc8b454138 100644 --- a/python/cugraph/structure/graph_classes.py +++ b/python/cugraph/structure/graph_classes.py @@ -86,22 +86,27 @@ def from_cudf_edgelist( in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. + Parameters ---------- input_df : cudf.DataFrame or dask_cudf.DataFrame - A DataFrame that contains edge information - If a dask_cudf.DataFrame is passed it will be reinterpreted as - a cudf.DataFrame. For the distributed path please use - from_dask_cudf_edgelist. + A DataFrame that contains edge information If a dask_cudf.DataFrame is + passed it will be reinterpreted as a cudf.DataFrame. For the + distributed path please use from_dask_cudf_edgelist. + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str or array-like - destination column name or array of column names + destination column name or array of column names + edge_attr : str or None - the weights column name. Default is None + the weights column name. Default is None + renumber : bool - Indicate whether or not to renumber the source and destination - vertex IDs. Default is True. + Indicate whether or not to renumber the source and destination vertex + IDs. Default is True. + Examples -------- >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -135,22 +140,22 @@ def from_cudf_adjlist(self, offset_col, index_col, value_col=None): Parameters ---------- offset_col : cudf.Series - This cudf.Series wraps a gdf_column of size V + 1 (V: number of - vertices). - The gdf column contains the offsets for the vertices in this graph. - Offsets must be in the range [0, E] (E: number of edges). + This cudf.Series wraps a gdf_column of size V + 1 (V: number of + vertices). The gdf column contains the offsets for the vertices in + this graph. Offsets must be in the range [0, E] (E: number of edges). + index_col : cudf.Series - This cudf.Series wraps a gdf_column of size E (E: number of edges). - The gdf column contains the destination index for each edge. - Destination indices must be in the range [0, V) (V: number of - vertices). + This cudf.Series wraps a gdf_column of size E (E: number of edges). + The gdf column contains the destination index for each edge. + Destination indices must be in the range [0, V) + (V: number of vertices). + value_col : cudf.Series, optional - This pointer can be ``None``. - If not, this cudf.Series wraps a gdf_column of size E (E: number of - edges). - The gdf column contains the weight value for each edge. - The expected type of the gdf_column element is floating point - number. + This pointer can be ``None``. If not, this cudf.Series wraps a + gdf_column of size E (E: number of edges). The gdf column contains the + weight value for each edge. The expected type of the gdf_column + element is floating point number. + Examples -------- >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -192,19 +197,24 @@ def from_dask_cudf_edgelist( external vertex ids will be used. Note that the graph object will store a reference to the dask_cudf.DataFrame provided. + Parameters ---------- input_ddf : dask_cudf.DataFrame - The edgelist as a dask_cudf.DataFrame + The edgelist as a dask_cudf.DataFrame + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str - destination column name or array of column names + destination column name or array of column names + edge_attr : str - weights column name. + weights column name. + renumber : bool - If source and destination indices are not in range 0 to V where V - is number of vertices, renumber argument should be True. + If source and destination indices are not in range 0 to V where V is + number of vertices, renumber argument should be True. """ if self._Impl is None: self._Impl = simpleDistributedGraphImpl(self.graph_properties) @@ -237,19 +247,25 @@ def from_pandas_edgelist( in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. + Parameters ---------- input_df : pandas.DataFrame - A DataFrame that contains edge information + A DataFrame that contains edge information + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str or array-like - destination column name or array of column names + destination column name or array of column names + edge_attr : str or None - the weights column name. Default is None + the weights column name. Default is None + renumber : bool - Indicate whether or not to renumber the source and destination - vertex IDs. Default is True. + Indicate whether or not to renumber the source and destination vertex + IDs. Default is True. + Examples -------- >>> df = pandas.read_csv('datasets/karate.csv', delimiter=' ', @@ -306,22 +322,26 @@ def unrenumber(self, df, column_name, preserve_order=False, and does not guarantee order or partitioning in multi-GPU mode. If you wish to preserve ordering, add an index column to df and sort the return by that index column. + Parameters ---------- df: cudf.DataFrame or dask_cudf.DataFrame - A DataFrame containing internal vertex identifiers that will be - converted into external vertex identifiers. + A DataFrame containing internal vertex identifiers that will be + converted into external vertex identifiers. + column_name: string - Name of the column containing the internal vertex id. + Name of the column containing the internal vertex id. + preserve_order: (optional) bool - If True, preserve the order of the rows in the output - DataFrame to match the input DataFrame + If True, preserve the order of the rows in the output DataFrame to + match the input DataFrame + Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame - The original DataFrame columns exist unmodified. The external - vertex identifiers are added to the DataFrame, the internal - vertex identifier column is removed from the dataframe. + The original DataFrame columns exist unmodified. The external vertex + identifiers are added to the DataFrame, the internal vertex identifier + column is removed from the dataframe. """ return self.renumber_map.unrenumber(df, column_name, preserve_order, get_column_names) @@ -333,13 +353,16 @@ def lookup_internal_vertex_id(self, df, column_name=None): Series with the internal vertex ids. Note that this function does not guarantee order in single GPU mode, and does not guarantee order or partitioning in multi-GPU mode. + Parameters ---------- df: cudf.DataFrame, cudf.Series, dask_cudf.DataFrame, dask_cudf.Series - A DataFrame containing external vertex identifiers that will be - converted into internal vertex identifiers. + A DataFrame containing external vertex identifiers that will be + converted into internal vertex identifiers. + column_name: (optional) string - Name of the column containing the external vertex ids + Name of the column containing the external vertex ids + Returns --------- series : cudf.Series or dask_cudf.Series @@ -363,21 +386,27 @@ def add_internal_vertex_id( Parameters ---------- df: cudf.DataFrame or dask_cudf.DataFrame - A DataFrame containing external vertex identifiers that will be - converted into internal vertex identifiers. + A DataFrame containing external vertex identifiers that will be + converted into internal vertex identifiers. + internal_column_name: string - Name of column to contain the internal vertex id + Name of column to contain the internal vertex id + external_column_name: string or list of strings - Name of the column(s) containing the external vertex ids + Name of the column(s) containing the external vertex ids + drop: (optional) bool, defaults to True - Drop the external columns from the returned DataFrame + Drop the external columns from the returned DataFrame + preserve_order: (optional) bool, defaults to False - Preserve the order of the data frame (requires an extra sort) + Preserve the order of the data frame (requires an extra sort) + Returns --------- df : cudf.DataFrame or dask_cudf.DataFrame Original DataFrame with new column containing internal vertex id + """ return self.renumber_map.add_internal_vertex_id( df, @@ -448,11 +477,13 @@ def to_directed(self): Return a directed representation of the graph. This function sets the type of graph as DiGraph() and returns the directed view. + Returns ------- G : DiGraph - A directed graph with the same nodes, and each edge (u,v,weights) - replaced by two directed edges (u,v,weights) and (v,u,weights). + A directed graph with the same nodes, and each edge (u,v,weights) + replaced by two directed edges (u,v,weights) and (v,u,weights). + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -461,6 +492,7 @@ def to_directed(self): >>> G.from_cudf_edgelist(M, '0', '1') >>> DiG = G.to_directed() """ + directed_graph = type(self)() directed_graph.graph_properties.directed = True directed_graph._Impl = type(self._Impl)(directed_graph. @@ -471,11 +503,13 @@ def to_directed(self): def to_undirected(self): """ Return an undirected copy of the graph. + Returns ------- G : Graph - A undirected graph with the same nodes, and each directed edge - (u,v,weights) replaced by an undirected edge (u,v,weights). + A undirected graph with the same nodes, and each directed edge + (u,v,weights) replaced by an undirected edge (u,v,weights). + Examples -------- >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -502,7 +536,7 @@ def add_nodes_from(self, nodes): Parameters ---------- nodes : list or cudf.Series - The nodes of the graph to be stored. + The nodes of the graph to be stored. """ self._Impl._nodes["all_nodes"] = cudf.Series(nodes) @@ -574,22 +608,27 @@ def from_cudf_edgelist( in the range [0, V), renumbering can be disabled and the original external vertex ids will be used. If weights are present, edge_attr argument is the weights column name. + Parameters ---------- input_df : cudf.DataFrame or dask_cudf.DataFrame - A DataFrame that contains edge information - If a dask_cudf.DataFrame is passed it will be reinterpreted as - a cudf.DataFrame. For the distributed path please use - from_dask_cudf_edgelist. + A DataFrame that contains edge information. If a dask_cudf.DataFrame is + passed it will be reinterpreted as a cudf.DataFrame. For the + distributed path please use from_dask_cudf_edgelist. + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str or array-like - destination column name or array of column names + destination column name or array of column names + edge_attr : str or None - the weights column name. Default is None + the weights column name. Default is None + renumber : bool - Indicate whether or not to renumber the source and destination - vertex IDs. Default is True. + Indicate whether or not to renumber the source and destination vertex + IDs. Default is True. + Examples -------- >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', @@ -625,19 +664,24 @@ def from_dask_cudf_edgelist( external vertex ids will be used. Note that the graph object will store a reference to the dask_cudf.DataFrame provided. + Parameters ---------- input_ddf : dask_cudf.DataFrame - The edgelist as a dask_cudf.DataFrame + The edgelist as a dask_cudf.DataFrame + source : str or array-like - source column name or array of column names + source column name or array of column names + destination : str - destination column name or array of column names + destination column name or array of column names + edge_attr : str - weights column name. + weights column name. + renumber : bool - If source and destination indices are not in range 0 to V where V - is number of vertices, renumber argument should be True. + If source and destination indices are not in range 0 to V where V is + number of vertices, renumber argument should be True. """ raise Exception("Distributed N-partite graph not supported") @@ -656,6 +700,7 @@ def add_nodes_from(self, nodes, bipartite=None, multipartite=None): multipartite : str Sets the Graph as multipartite. The nodes are stored as a set of nodes of the partition named as multipartite argument. + """ if self._Impl is None: self._Impl = npartiteGraphImpl(self.graph_properties) diff --git a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py index e85f3b6ab6c..951ea8add5b 100644 --- a/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -12,6 +12,7 @@ # limitations under the License. from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.graph_primtypes_wrapper import Direction from cugraph.structure.number_map import NumberMap import cudf import dask_cudf @@ -211,7 +212,7 @@ def in_degree(self, vertex_subset=None): >>> G.from_cudf_edgelist(M, '0', '1') >>> df = G.in_degree([0,9,12]) """ - return self._degree(vertex_subset, x=1) + return self._degree(vertex_subset, direction=Direction.IN) def out_degree(self, vertex_subset=None): """ @@ -245,8 +246,7 @@ def out_degree(self, vertex_subset=None): >>> G.from_cudf_edgelist(M, '0', '1') >>> df = G.out_degree([0,9,12]) """ - # TODO: Add support - raise Exception("Not supported for distributed graph") + return self._degree(vertex_subset, direction=Direction.OUT) def degree(self, vertex_subset=None): """ @@ -319,14 +319,15 @@ def degrees(self, vertex_subset=None): """ raise Exception("Not supported for distributed graph") - def _degree(self, vertex_subset, x=0): - vertex_col, degree_col = graph_primtypes_wrapper._degree(self, x) + def _degree(self, vertex_subset, direction=Direction.ALL): + vertex_col, degree_col = graph_primtypes_wrapper._mg_degree(self, + direction) df = cudf.DataFrame() df["vertex"] = vertex_col df["degree"] = degree_col - if self.renumbered is True: - df = self.unrenumber(df, "vertex") + if self.properties.renumbered is True: + df = self.renumber_map.unrenumber(df, "vertex") if vertex_subset is not None: df = df[df['vertex'].isin(vertex_subset)] @@ -471,3 +472,9 @@ def compute_renumber_edge_list(self, transposed=False): self.edgelist = self.EdgeList(renumbered_ddf) self.renumber_map = number_map self.properties.store_transposed = transposed + + def vertex_column_size(self): + if self.properties.renumbered: + return self.renumber_map.vertex_column_size() + else: + return 1 diff --git a/python/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/structure/graph_implementation/simpleGraph.py index 4e632a72231..e74b04c00b5 100644 --- a/python/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/structure/graph_implementation/simpleGraph.py @@ -12,6 +12,7 @@ # limitations under the License. from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.graph_primtypes_wrapper import Direction from cugraph.structure.symmetrize import symmetrize from cugraph.structure.number_map import NumberMap import cugraph.dask.common.mg_utils as mg_utils @@ -566,7 +567,7 @@ def in_degree(self, vertex_subset=None): >>> G.from_cudf_edgelist(M, '0', '1') >>> df = G.in_degree([0,9,12]) """ - return self._degree(vertex_subset, x=1) + return self._degree(vertex_subset, direction=Direction.IN) def out_degree(self, vertex_subset=None): """ @@ -600,7 +601,7 @@ def out_degree(self, vertex_subset=None): >>> G.from_cudf_edgelist(M, '0', '1') >>> df = G.out_degree([0,9,12]) """ - return self._degree(vertex_subset, x=2) + return self._degree(vertex_subset, direction=Direction.OUT) def degree(self, vertex_subset=None): """ @@ -690,8 +691,9 @@ def degrees(self, vertex_subset=None): return df - def _degree(self, vertex_subset, x=0): - vertex_col, degree_col = graph_primtypes_wrapper._degree(self, x) + def _degree(self, vertex_subset, direction=Direction.ALL): + vertex_col, degree_col = graph_primtypes_wrapper._degree(self, + direction) df = cudf.DataFrame() df["vertex"] = vertex_col df["degree"] = degree_col @@ -821,3 +823,9 @@ def neighbors(self, n): return self.renumber_map.from_internal_vertex_id(neighbors)["0"] else: return neighbors + + def vertex_column_size(self): + if self.properties.renumbered: + return self.renumber_map.vertex_column_size() + else: + return 1 diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 1e0d9626727..e0db6c31fca 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -23,7 +23,7 @@ from libcpp.vector cimport vector from cugraph.raft.common.handle cimport * from rmm._lib.device_buffer cimport device_buffer -cdef extern from "graph.hpp" namespace "cugraph": +cdef extern from "cugraph/graph.hpp" namespace "cugraph": ctypedef enum PropType: PROP_UNDEF "cugraph::PROP_UNDEF" @@ -123,12 +123,12 @@ cdef extern from "graph.hpp" namespace "cugraph": GraphCSRView[VT,ET,WT] view() -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT, ET, WT]] get_two_hop_neighbors[VT,ET,WT]( const GraphCSRView[VT, ET, WT] &graph) except + -cdef extern from "functions.hpp" namespace "cugraph": +cdef extern from "cugraph/functions.hpp" namespace "cugraph": cdef unique_ptr[device_buffer] renumber_vertices[VT_IN,VT_OUT,ET]( ET number_of_edges, diff --git a/python/cugraph/structure/graph_primtypes_wrapper.pyx b/python/cugraph/structure/graph_primtypes_wrapper.pyx index 7bc62b9a1af..91af28380c3 100644 --- a/python/cugraph/structure/graph_primtypes_wrapper.pyx +++ b/python/cugraph/structure/graph_primtypes_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,6 +21,7 @@ from cugraph.structure.graph_primtypes cimport get_two_hop_neighbors as c_get_tw from cugraph.structure.graph_primtypes cimport renumber_vertices as c_renumber_vertices from cugraph.structure.utils_wrapper import * from libcpp cimport bool +import enum from libc.stdint cimport uintptr_t from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer @@ -45,6 +46,12 @@ def datatype_cast(cols, dtypes): return cols_out +class Direction(enum.Enum): + ALL = 0 + IN = 1 + OUT = 2 + + def renumber(source_col, dest_col): num_edges = len(source_col) @@ -137,7 +144,7 @@ def view_edge_list(input_graph): return src_indices, indices, weights -def _degree_coo(edgelist_df, src_name, dst_name, x=0, num_verts=None, sID=None): +def _degree_coo(edgelist_df, src_name, dst_name, direction=Direction.ALL, num_verts=None, sID=None): # # Computing the degree of the input graph from COO # @@ -146,11 +153,11 @@ def _degree_coo(edgelist_df, src_name, dst_name, x=0, num_verts=None, sID=None): src = edgelist_df[src_name] dst = edgelist_df[dst_name] - if x == 0: + if direction == Direction.ALL: dir = DIRECTION_IN_PLUS_OUT - elif x == 1: + elif direction == Direction.IN: dir = DIRECTION_IN - elif x == 2: + elif direction == Direction.OUT: dir = DIRECTION_OUT else: raise Exception("x should be 0, 1 or 2") @@ -185,17 +192,17 @@ def _degree_coo(edgelist_df, src_name, dst_name, x=0, num_verts=None, sID=None): return vertex_col, degree_col -def _degree_csr(offsets, indices, x=0): +def _degree_csr(offsets, indices, direction=Direction.ALL): cdef DegreeDirection dir - if x == 0: + if direction == Direction.ALL: dir = DIRECTION_IN_PLUS_OUT - elif x == 1: + elif direction == Direction.IN: dir = DIRECTION_IN - elif x == 2: + elif direction == Direction.OUT: dir = DIRECTION_OUT else: - raise Exception("x should be 0, 1 or 2") + raise Exception("direction should be 0, 1 or 2") [offsets, indices] = datatype_cast([offsets, indices], [np.int32]) @@ -220,44 +227,48 @@ def _degree_csr(offsets, indices, x=0): return vertex_col, degree_col -def _degree(input_graph, x=0): - transpose_x = { 0: 0, - 2: 1, - 1: 2 } +def _mg_degree(input_graph, direction=Direction.ALL): + if input_graph.edgelist is None: + input_graph.compute_renumber_edge_list(transposed=False) + input_ddf = input_graph.edgelist.edgelist_df + num_verts = input_ddf[['src', 'dst']].max().max().compute() + 1 + data = DistributedDataHandler.create(data=input_ddf) + comms = Comms.get_comms() + client = default_client() + data.calculate_parts_to_sizes(comms) + if direction==Direction.IN: + degree_ddf = [client.submit(_degree_coo, wf[1][0], 'src', 'dst', Direction.IN, num_verts, comms.sessionId, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] + if direction==Direction.OUT: + degree_ddf = [client.submit(_degree_coo, wf[1][0], 'dst', 'src', Direction.IN, num_verts, comms.sessionId, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] + wait(degree_ddf) + return degree_ddf[0].result() + + +def _degree(input_graph, direction=Direction.ALL): + transpose_direction = { Direction.ALL: Direction.ALL, + Direction.IN: Direction.OUT, + Direction.OUT: Direction.IN } if input_graph.adjlist is not None: return _degree_csr(input_graph.adjlist.offsets, input_graph.adjlist.indices, - x) + direction) if input_graph.transposedadjlist is not None: return _degree_csr(input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices, - transpose_x[x]) - - if input_graph.edgelist is None and input_graph.distributed: - input_graph.compute_renumber_edge_list(transposed=False) + transpose_direction[direction]) if input_graph.edgelist is not None: - if isinstance(input_graph.edgelist.edgelist_df, dc.DataFrame): - input_ddf = input_graph.edgelist.edgelist_df - num_verts = input_ddf[['src', 'dst']].max().max().compute() + 1 - data = DistributedDataHandler.create(data=input_ddf) - comms = Comms.get_comms() - client = default_client() - data.calculate_parts_to_sizes(comms) - degree_ddf = [client.submit(_degree_coo, wf[1][0], 'src', 'dst', x, num_verts, comms.sessionId, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] - wait(degree_ddf) - return degree_ddf[0].result() return _degree_coo(input_graph.edgelist.edgelist_df, - 'src', 'dst', x) + 'src', 'dst', direction) raise Exception("input_graph not COO, CSR or CSC") def _degrees(input_graph): - verts, indegrees = _degree(input_graph,1) - verts, outdegrees = _degree(input_graph, 2) + verts, indegrees = _degree(input_graph, Direction.IN) + verts, outdegrees = _degree(input_graph, Direction.OUT) return verts, indegrees, outdegrees diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd index c9cf1748bfe..2d5b081dd0c 100644 --- a/python/cugraph/structure/graph_utilities.pxd +++ b/python/cugraph/structure/graph_utilities.pxd @@ -25,7 +25,7 @@ from libcpp.vector cimport vector from rmm._lib.device_buffer cimport device_buffer # C++ graph utilities -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": ctypedef enum numberTypeEnum: int32Type "cugraph::cython::numberTypeEnum::int32Type" @@ -51,6 +51,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": size_t num_global_edges, bool sorted_by_degree, bool is_weighted, + bool is_symmetric, bool transposed, bool multi_gpu) except + @@ -83,7 +84,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": unique_ptr[device_buffer] dst_indices unique_ptr[device_buffer] edge_data unique_ptr[device_buffer] subgraph_offsets - + cdef cppclass random_walk_ret_t: size_t coalesced_sz_v_ size_t coalesced_sz_w_ @@ -93,20 +94,27 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": unique_ptr[device_buffer] d_coalesced_w_ unique_ptr[device_buffer] d_sizes_ + cdef cppclass random_walk_path_t: + unique_ptr[device_buffer] d_v_offsets + unique_ptr[device_buffer] d_w_sizes + unique_ptr[device_buffer] d_w_offsets + + cdef cppclass graph_generator_t: + unique_ptr[device_buffer] d_source + unique_ptr[device_buffer] d_destination + cdef extern from "" namespace "std" nogil: cdef device_buffer move(device_buffer) - cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) + cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) cdef cy_multi_edgelists_t move(cy_multi_edgelists_t) cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t]) - #cdef device_buffer move(device_buffer) - #cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) # renumber_edgelist() interface utilities: # # # 1. `cdef extern partition_t`: # -cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental": +cdef extern from "cugraph/experimental/graph_view.hpp" namespace "cugraph::experimental": cdef cppclass partition_t[vertex_t]: pass @@ -114,7 +122,7 @@ cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental" # 2. return type for shuffle: # -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef cppclass major_minor_weights_t[vertex_t, edge_t, weight_t]: major_minor_weights_t(const handle_t &handle) @@ -131,10 +139,10 @@ ctypedef fused shuffled_vertices_t: major_minor_weights_t[int, long, double] major_minor_weights_t[long, long, float] major_minor_weights_t[long, long, double] - + # 3. return type for renumber: # -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef cppclass renum_quad_t[vertex_t, edge_t]: renum_quad_t(const handle_t &handle) @@ -158,11 +166,11 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range() vertex_t get_part_matrix_partition_minor_first() - vertex_t get_part_matrix_partition_minor_last() + vertex_t get_part_matrix_partition_minor_last() # 4. `sort_and_shuffle_values()` wrapper: # -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[major_minor_weights_t[vertex_t, edge_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( const handle_t &handle, @@ -173,7 +181,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": # 5. `renumber_edgelist()` wrapper # -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( const handle_t &handle, diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index d90d7a1fda9..a1ff2f47abf 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -671,3 +671,6 @@ def unrenumber(self, df, column_name, preserve_order=False, return df, col_names else: return df + + def vertex_column_size(self): + return len(self.implementation.col_names) diff --git a/python/cugraph/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx index 99626cdee08..1c302d21220 100644 --- a/python/cugraph/structure/renumber_wrapper.pyx +++ b/python/cugraph/structure/renumber_wrapper.pyx @@ -112,6 +112,7 @@ def renumber(input_df, # maybe use cpdef ? cdef uintptr_t shuffled_major = NULL cdef uintptr_t shuffled_minor = NULL + # FIXME: Fix fails when do_check = True cdef bool do_check = False # ? for now... cdef bool mg_flag = is_multi_gpu # run Single-GPU or MNMG @@ -175,7 +176,7 @@ def renumber(input_df, # maybe use cpdef ? shuffled_major, shuffled_minor, deref(edge_counts_32.get()), - 1, + do_check, mg_flag).release()) pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper diff --git a/python/cugraph/structure/utils.pxd b/python/cugraph/structure/utils.pxd index c22e64841af..350b5890149 100644 --- a/python/cugraph/structure/utils.pxd +++ b/python/cugraph/structure/utils.pxd @@ -20,7 +20,7 @@ from cugraph.structure.graph_primtypes cimport * from libcpp.memory cimport unique_ptr -cdef extern from "functions.hpp" namespace "cugraph": +cdef extern from "cugraph/functions.hpp" namespace "cugraph": cdef unique_ptr[GraphCSR[VT,ET,WT]] coo_to_csr[VT,ET,WT]( const GraphCOOView[VT,ET,WT] &graph) except + diff --git a/python/cugraph/tests/dask/test_mg_connectivity.py b/python/cugraph/tests/dask/test_mg_connectivity.py new file mode 100644 index 00000000000..2f946789471 --- /dev/null +++ b/python/cugraph/tests/dask/test_mg_connectivity.py @@ -0,0 +1,78 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cugraph.dask as dcg +import gc +import pytest +import cugraph +import dask_cudf +import cudf +from cugraph.dask.common.mg_utils import (is_single_gpu, + setup_local_dask_cluster, + teardown_local_dask_cluster) + + +@pytest.fixture(scope="module") +def client_connection(): + (cluster, client) = setup_local_dask_cluster(p2p=True) + yield client + teardown_local_dask_cluster(cluster, client) + + +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +def test_dask_wcc(client_connection): + gc.collect() + + # FIXME: update this to allow dataset to be parameterized and have dataset + # part of test param id (see other tests) + input_data_path = r"../datasets/netscience.csv" + print(f"dataset={input_data_path}") + chunksize = dcg.get_chunksize(input_data_path) + + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + g = cugraph.DiGraph() + g.from_cudf_edgelist(df, "src", "dst", renumber=True) + + dg = cugraph.DiGraph() + dg.from_dask_cudf_edgelist(ddf, "src", "dst") + + expected_dist = cugraph.weakly_connected_components(g) + result_dist = dcg.weakly_connected_components(dg) + + result_dist = result_dist.compute() + compare_dist = expected_dist.merge( + result_dist, on="vertex", suffixes=["_local", "_dask"] + ) + + unique_local_labels = compare_dist['labels_local'].unique() + + for label in unique_local_labels.values.tolist(): + dask_labels_df = compare_dist[compare_dist['labels_local'] == label] + dask_labels = dask_labels_df['labels_dask'] + assert (dask_labels.iloc[0] == dask_labels).all() diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py index 93e8a365dea..bad55df1ca9 100644 --- a/python/cugraph/tests/dask/test_mg_degree.py +++ b/python/cugraph/tests/dask/test_mg_degree.py @@ -36,7 +36,7 @@ def test_dask_mg_degree(client_connection): # FIXME: update this to allow dataset to be parameterized and have dataset # part of test param id (see other tests) - input_data_path = r"../datasets/karate.csv" + input_data_path = r"../datasets/karate-asymmetric.csv" print(f"dataset={input_data_path}") chunksize = cugraph.dask.get_chunksize(input_data_path) @@ -62,10 +62,18 @@ def test_dask_mg_degree(client_connection): g = cugraph.DiGraph() g.from_cudf_edgelist(df, "src", "dst") - merge_df = ( + merge_df_in = ( dg.in_degree() .merge(g.in_degree(), on="vertex", suffixes=["_dg", "_g"]) .compute() ) - assert merge_df["degree_dg"].equals(merge_df["degree_g"]) + merge_df_out = ( + dg.out_degree() + .merge(g.out_degree(), on="vertex", suffixes=["_dg", "_g"]) + .compute() + ) + + assert merge_df_in["degree_dg"].equals(merge_df_in["degree_g"]) + assert merge_df_out["degree_dg"].equals( + merge_df_out["degree_g"]) diff --git a/python/cugraph/tests/dask/test_mg_katz_centrality.py b/python/cugraph/tests/dask/test_mg_katz_centrality.py index eadf0f662d4..8ed604954f4 100644 --- a/python/cugraph/tests/dask/test_mg_katz_centrality.py +++ b/python/cugraph/tests/dask/test_mg_katz_centrality.py @@ -50,21 +50,12 @@ def test_dask_katz_centrality(client_connection): dtype=["int32", "int32", "float32"], ) - df = cudf.read_csv( - input_data_path, - delimiter=" ", - names=["src", "dst", "value"], - dtype=["int32", "int32", "float32"], - ) - - g = cugraph.DiGraph() - g.from_cudf_edgelist(df, "src", "dst") - dg = cugraph.DiGraph() dg.from_dask_cudf_edgelist(ddf, "src", "dst") - largest_out_degree = g.degrees().nlargest(n=1, columns="out_degree") - largest_out_degree = largest_out_degree["out_degree"].iloc[0] + largest_out_degree = dg.out_degree().compute().\ + nlargest(n=1, columns="degree") + largest_out_degree = largest_out_degree["degree"].iloc[0] katz_alpha = 1 / (largest_out_degree + 1) mg_res = dcg.katz_centrality(dg, alpha=katz_alpha, tol=1e-6) diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index bd7374fb75e..c67d8fcb1f9 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -17,7 +17,6 @@ import cugraph import dask_cudf from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than from cugraph.dask.common.mg_utils import (is_single_gpu, setup_local_dask_cluster, teardown_local_dask_cluster) @@ -88,15 +87,11 @@ def test_mg_louvain_with_edgevals(daskGraphFromDataset): # FIXME: daskGraphFromDataset returns a DiGraph, which Louvain is currently # accepting. In the future, an MNMG symmeterize will need to be called to # create a Graph for Louvain. - if is_device_version_less_than((7, 0)): - with pytest.raises(RuntimeError): - parts, mod = dcg.louvain(daskGraphFromDataset) - else: - parts, mod = dcg.louvain(daskGraphFromDataset) + parts, mod = dcg.louvain(daskGraphFromDataset) - # FIXME: either call Nx with the same dataset and compare results, or - # hardcode golden results to compare to. - print() - print(parts.compute()) - print(mod) - print() + # FIXME: either call Nx with the same dataset and compare results, or + # hardcode golden results to compare to. + print() + print(parts.compute()) + print(mod) + print() diff --git a/python/cugraph/tests/generators/test_rmat.py b/python/cugraph/tests/generators/test_rmat.py new file mode 100644 index 00000000000..a7c8701095e --- /dev/null +++ b/python/cugraph/tests/generators/test_rmat.py @@ -0,0 +1,122 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import pytest + +import cudf +import dask_cudf + +from cugraph.dask.common.mg_utils import (is_single_gpu, + get_visible_devices, + setup_local_dask_cluster, + teardown_local_dask_cluster) +from cugraph.generators import rmat +import cugraph + + +############################################################################## +_cluster = None +_client = None +_is_single_gpu = is_single_gpu() +_visible_devices = get_visible_devices() +_scale_values = [2, 4, 16] +_scale_test_ids = [f"scale={x}" for x in _scale_values] +_mg_values = [False, True] +_mg_test_ids = [f"mg={x}" for x in _mg_values] +_graph_types = [cugraph.Graph, cugraph.DiGraph, None, int] +_graph_test_ids = [f"create_using={getattr(x,'__name__',str(x))}" + for x in _graph_types] + + +def _call_rmat(scale, num_edges, create_using, mg): + """ + Simplifies calling RMAT by requiring only specific args that are varied by + these tests and hard-coding all others. + """ + return rmat(scale=scale, + num_edges=num_edges, + a=0.1, + b=0.2, + c=0.3, + seed=24, + clip_and_flip=False, + scramble_vertex_ids=True, + create_using=create_using, + mg=mg) + + +############################################################################### +def setup_module(): + global _cluster + global _client + if not _is_single_gpu: + (_cluster, _client) = setup_local_dask_cluster(p2p=True) + + +def teardown_module(): + if not _is_single_gpu: + teardown_local_dask_cluster(_cluster, _client) + + +############################################################################### +@pytest.mark.parametrize("scale", _scale_values, ids=_scale_test_ids) +@pytest.mark.parametrize("mg", _mg_values, ids=_mg_test_ids) +def test_rmat_edgelist(scale, mg): + """ + Verifies that the edgelist returned by rmat() is valid based on inputs. + """ + if mg and _is_single_gpu: + pytest.skip("skipping MG testing on Single GPU system") + + num_edges = (2**scale)*4 + create_using = None # Returns the edgelist from RMAT + + df = _call_rmat(scale, num_edges, create_using, mg) + + if mg: + assert df.npartitions == len(_visible_devices) + df_to_check = df.compute() + else: + df_to_check = df + + assert len(df_to_check) == num_edges + + +@pytest.mark.parametrize("graph_type", _graph_types, ids=_graph_test_ids) +@pytest.mark.parametrize("mg", _mg_values, ids=_mg_test_ids) +def test_rmat_return_type(graph_type, mg): + """ + Verifies that the return type returned by rmat() is valid (or the proper + exception is raised) based on inputs. + """ + if mg and _is_single_gpu: + pytest.skip("skipping MG testing on Single GPU system") + + scale = 2 + num_edges = (2**scale)*4 + + if (mg and (graph_type not in [cugraph.DiGraph, None])) or \ + (graph_type not in [cugraph.Graph, cugraph.DiGraph, None]): + + with pytest.raises(TypeError): + _call_rmat(scale, num_edges, graph_type, mg) + + else: + G_or_df = _call_rmat(scale, num_edges, graph_type, mg) + + if graph_type is None: + assert type(G_or_df) is dask_cudf.DataFrame if mg \ + else cudf.DataFrame + else: + assert type(G_or_df) is graph_type diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index ba705a787ee..e51ef9b7a98 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -18,7 +18,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than from pathlib import PurePath @@ -61,32 +60,20 @@ def golden_call(graph_file): def test_ecg_clustering(graph_file, min_weight, ensemble_size): gc.collect() - if is_device_version_less_than((7, 0)): - cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + # Read in the graph and get a cugraph object + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - # Get the modularity score for partitioning versus random assignment - with pytest.raises(RuntimeError): - cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) - else: - # Read in the graph and get a cugraph object - cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - - # Get the modularity score for partitioning versus random assignment - cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) - golden_score = golden_call(graph_file) + # Get the modularity score for partitioning versus random assignment + cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + golden_score = golden_call(graph_file) - # Assert that the partitioning has better modularity than the random - # assignment - assert cu_score > (0.95 * golden_score) + # Assert that the partitioning has better modularity than the random + # assignment + assert cu_score > (0.95 * golden_score) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) diff --git a/python/cugraph/tests/test_force_atlas2.py b/python/cugraph/tests/test_force_atlas2.py index f399460e5e6..1128f52904a 100644 --- a/python/cugraph/tests/test_force_atlas2.py +++ b/python/cugraph/tests/test_force_atlas2.py @@ -14,6 +14,7 @@ import time import pytest +import cudf import cugraph from cugraph.internals import GraphBasedDimRedCallback from cugraph.tests import utils @@ -135,3 +136,70 @@ def test_force_atlas2(graph_file, score, max_iter, assert test_callback.on_epoch_end_called_count == max_iter # verify `on_train_end` was only called once assert test_callback.on_train_end_called_count == 1 + + +# FIXME: this test occasionally fails - skipping to prevent CI failures but +# need to revisit ASAP +@pytest.mark.skip(reason="non-deterministric - needs fixing!") +@pytest.mark.parametrize('graph_file, score', DATASETS[:-1]) +@pytest.mark.parametrize('max_iter', MAX_ITERATIONS) +@pytest.mark.parametrize('barnes_hut_optimize', BARNES_HUT_OPTIMIZE) +def test_force_atlas2_multi_column_pos_list(graph_file, score, max_iter, + barnes_hut_optimize): + cu_M = utils.read_csv_file(graph_file) + test_callback = TestCallback() + pos = cugraph_call(cu_M, + max_iter=max_iter, + pos_list=None, + outbound_attraction_distribution=True, + lin_log_mode=False, + prevent_overlapping=False, + edge_weight_influence=1.0, + jitter_tolerance=1.0, + barnes_hut_optimize=False, + barnes_hut_theta=0.5, + scaling_ratio=2.0, + strong_gravity_mode=False, + gravity=1.0, + callback=test_callback) + + cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + cu_M['src_1'] = cu_M['src_0'] + 1000 + cu_M['dst_1'] = cu_M['dst_0'] + 1000 + + G = cugraph.Graph() + G.from_cudf_edgelist( + cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"], + edge_attr="2" + ) + + pos_list = cudf.DataFrame() + pos_list['vertex_0'] = pos['vertex'] + pos_list['vertex_1'] = pos_list['vertex_0'] + 1000 + pos_list['x'] = pos['x'] + pos_list['y'] = pos['y'] + + cu_pos = cugraph.force_atlas2( + G, + max_iter=max_iter, + pos_list=pos_list, + outbound_attraction_distribution=True, + lin_log_mode=False, + prevent_overlapping=False, + edge_weight_influence=1.0, + jitter_tolerance=1.0, + barnes_hut_optimize=False, + barnes_hut_theta=0.5, + scaling_ratio=2.0, + strong_gravity_mode=False, + gravity=1.0, + callback=test_callback) + + cu_pos = cu_pos.sort_values('0_vertex') + matrix_file = graph_file.with_suffix(".mtx") + M = scipy.io.mmread(matrix_file) + M = M.todense() + cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas()) + print(cu_trust, score) + assert cu_trust > score diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index b61101ef1d0..cc2795cb464 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -15,6 +15,7 @@ import time import pytest +import cudf import cugraph from cugraph.tests import utils from pathlib import PurePath @@ -222,3 +223,32 @@ def test_jaccard_nx(graph_file): # FIXME: Nx does a full all-pair Jaccard. # cuGraph does a limited 1-hop Jaccard # assert nx_j == cg_j + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_jaccard_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]] + vertex_pair = vertex_pair[:5] + + df_res = cugraph.jaccard(G1, vertex_pair) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + df_exp = cugraph.jaccard(G2, vertex_pair[["src_0", "dst_0"]]) + + # Calculating mismatch + assert df_res["jaccard_coeff"].equals(df_exp["jaccard_coeff"]) diff --git a/python/cugraph/tests/test_leiden.py b/python/cugraph/tests/test_leiden.py index 89203d5014c..b6c23dad6f2 100644 --- a/python/cugraph/tests/test_leiden.py +++ b/python/cugraph/tests/test_leiden.py @@ -20,8 +20,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than - # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from # 'collections.abc' is deprecated, and in 3.8 it will stop working) for @@ -55,9 +53,6 @@ def cugraph_louvain(G, edgevals=False): return parts, mod -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden(graph_file): gc.collect() @@ -78,9 +73,6 @@ def test_leiden(graph_file): assert leiden_mod >= (0.99 * louvain_mod) -@pytest.mark.skipif( - is_device_version_less_than((7, 0)), reason="Not supported on Pascal" -) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden_nx(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_louvain.py b/python/cugraph/tests/test_louvain.py index 50e9ccaa4c5..fc112b8d657 100644 --- a/python/cugraph/tests/test_louvain.py +++ b/python/cugraph/tests/test_louvain.py @@ -18,7 +18,6 @@ import cugraph from cugraph.tests import utils -from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -72,63 +71,53 @@ def networkx_call(M): def test_louvain_with_edgevals(graph_file): gc.collect() - if is_device_version_less_than((7, 0)): - cu_M = utils.read_csv_file(graph_file) - with pytest.raises(RuntimeError): - cu_parts, cu_mod = cugraph_call(cu_M) - else: - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) - nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", - edge_attr="weight", create_using=nx.Graph() - ) + nx_parts = networkx_call(M) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_louvain(graph_file): gc.collect() - if is_device_version_less_than((7, 0)): - cu_M = utils.read_csv_file(graph_file) - with pytest.raises(RuntimeError): - cu_parts, cu_mod = cugraph_call(cu_M) - else: - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M) - nx_parts = networkx_call(M) + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M) + nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", - edge_attr="weight", create_using=nx.Graph() - ) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 diff --git a/python/cugraph/tests/test_modularity.py b/python/cugraph/tests/test_modularity.py index c1ff95042ed..21b8adae6e6 100644 --- a/python/cugraph/tests/test_modularity.py +++ b/python/cugraph/tests/test_modularity.py @@ -71,6 +71,43 @@ def test_modularity_clustering(graph_file, partitions): assert cu_score > rand_score +# Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("partitions", PARTITIONS) +def test_modularity_clustering_multi_column(graph_file, partitions): + gc.collect() + + # Read in the graph and get a cugraph object + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + cu_M.rename(columns={'0': 'src_0', '1': 'dst_0'}, inplace=True) + cu_M['src_1'] = cu_M['src_0'] + 1000 + cu_M['dst_1'] = cu_M['dst_0'] + 1000 + + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"], + edge_attr="2") + + df1 = cugraph.spectralModularityMaximizationClustering( + G1, partitions, num_eigen_vects=(partitions - 1) + ) + + cu_score = cugraph.analyzeClustering_modularity(G1, partitions, df1, + ['0_vertex', + '1_vertex'], + 'cluster') + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0", + edge_attr="2") + + rand_score = random_call(G2, partitions) + # Assert that the partitioning has better modularity than the random + # assignment + assert cu_score > rand_score + + # Test to ensure DiGraph objs are not accepted # Test all combinations of default/managed and pooled/non-pooled allocation diff --git a/python/cugraph/tests/test_overlap.py b/python/cugraph/tests/test_overlap.py index a0c336c3f16..42bc3ea9808 100644 --- a/python/cugraph/tests/test_overlap.py +++ b/python/cugraph/tests/test_overlap.py @@ -18,6 +18,7 @@ import numpy as np import scipy +import cudf import cugraph from cugraph.tests import utils @@ -148,3 +149,32 @@ def test_overlap_edge_vals(graph_file): else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_overlap_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]] + vertex_pair = vertex_pair[:5] + + df_res = cugraph.overlap(G1, vertex_pair) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + df_exp = cugraph.overlap(G2, vertex_pair[["src_0", "dst_0"]]) + + # Calculating mismatch + assert df_res["overlap_coeff"].equals(df_exp["overlap_coeff"]) diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index 163b2adb967..50be1cd5230 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -241,3 +241,88 @@ def test_pagerank_nx( print(f"{cugraph_pr[i][1]} and {cugraph_pr[i][1]}") print("Mismatches:", err) assert err < (0.01 * len(cugraph_pr)) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("max_iter", MAX_ITERATIONS) +@pytest.mark.parametrize("tol", TOLERANCE) +@pytest.mark.parametrize("alpha", ALPHA) +@pytest.mark.parametrize("personalization_perc", PERSONALIZATION_PERC) +@pytest.mark.parametrize("has_guess", HAS_GUESS) +def test_pagerank_multi_column( + graph_file, max_iter, tol, alpha, personalization_perc, has_guess +): + gc.collect() + + # NetworkX PageRank + M = utils.read_csv_for_nx(graph_file) + nnz_vtx = np.unique(M[['0', '1']]) + + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph() + ) + + networkx_pr, networkx_prsn = networkx_call( + Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx + ) + + cu_nstart = None + if has_guess == 1: + cu_nstart_temp = cudify(networkx_pr) + max_iter = 100 + cu_nstart = cudf.DataFrame() + cu_nstart["vertex_0"] = cu_nstart_temp["vertex"] + cu_nstart["vertex_1"] = cu_nstart["vertex_0"] + 1000 + cu_nstart["values"] = cu_nstart_temp["values"] + + cu_prsn_temp = cudify(networkx_prsn) + if cu_prsn_temp is not None: + cu_prsn = cudf.DataFrame() + cu_prsn["vertex_0"] = cu_prsn_temp["vertex"] + cu_prsn["vertex_1"] = cu_prsn["vertex_0"] + 1000 + cu_prsn["values"] = cu_prsn_temp["values"] + else: + cu_prsn = cu_prsn_temp + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + cu_M["weights"] = cudf.Series(M["weight"]) + + cu_G = cugraph.DiGraph() + cu_G.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"], + edge_attr="weights") + + df = cugraph.pagerank( + cu_G, + alpha=alpha, + max_iter=max_iter, + tol=tol, + personalization=cu_prsn, + nstart=cu_nstart, + ) + + cugraph_pr = [] + + df = df.sort_values("0_vertex").reset_index(drop=True) + + pr_scores = df["pagerank"].to_array() + for i, rank in enumerate(pr_scores): + cugraph_pr.append((i, rank)) + + # Calculating mismatch + networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0]) + err = 0 + assert len(cugraph_pr) == len(networkx_pr) + for i in range(len(cugraph_pr)): + if ( + abs(cugraph_pr[i][1] - networkx_pr[i][1]) > tol * 1.1 + and cugraph_pr[i][0] == networkx_pr[i][0] + ): + err = err + 1 + print("Mismatches:", err) + assert err < (0.01 * len(cugraph_pr)) diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py index ba0cd6eadc9..302a93cd02a 100644 --- a/python/cugraph/tests/test_random_walks.py +++ b/python/cugraph/tests/test_random_walks.py @@ -29,11 +29,10 @@ DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] -def calc_random_walks( - graph_file, - directed=False, - max_depth=None -): +def calc_random_walks(graph_file, + directed=False, + max_depth=None, + use_padding=False): """ compute random walks for each nodes in 'start_vertices' @@ -52,16 +51,20 @@ def calc_random_walks( max_depth : int The maximum depth of the random walks + use_padding : bool + If True, padded paths are returned else coalesced paths are returned. Returns ------- - random_walks_edge_lists : cudf.DataFrame - GPU data frame containing all random walks sources identifiers, - destination identifiers, edge weights + vertex_paths : cudf.Series or cudf.DataFrame + Series containing the vertices of edges/paths in the random walk. + + edge_weight_paths: cudf.Series + Series containing the edge weights of edges represented by the + returned vertex_paths - seeds_offsets: cudf.Series - Series containing the starting offset in the returned edge list - for each vertex in start_vertices. + sizes: int + The path size in case of coalesced paths. """ G = utils.generate_cugraph_graph_from_file( graph_file, directed=directed, edgevals=True) @@ -69,45 +72,47 @@ def calc_random_walks( k = random.randint(1, 10) start_vertices = random.sample(range(G.number_of_vertices()), k) - df, offsets = cugraph.random_walks(G, start_vertices, max_depth) + vertex_paths, edge_weights, vertex_path_sizes = cugraph.random_walks( + G, start_vertices, max_depth, use_padding) - return df, offsets, start_vertices + return (vertex_paths, edge_weights, vertex_path_sizes), start_vertices -def check_random_walks(df, offsets, seeds, df_G=None): +def check_random_walks(path_data, seeds, df_G=None): invalid_edge = 0 invalid_seeds = 0 - invalid_weight = 0 offsets_idx = 0 - for i in range(len(df.index)): - src, dst, weight = df.iloc[i].to_array() - if i == offsets[offsets_idx]: - if df['src'].iloc[i] != seeds[offsets_idx]: + next_path_idx = 0 + v_paths = path_data[0] + sizes = path_data[2].to_array().tolist() + + for s in sizes: + for i in range(next_path_idx, next_path_idx+s-1): + src, dst = v_paths.iloc[i], v_paths.iloc[i+1] + if i == next_path_idx and src != seeds[offsets_idx]: invalid_seeds += 1 print( "[ERR] Invalid seed: " " src {} != src {}" - .format(df['src'].iloc[i], offsets[offsets_idx]) + .format(src, seeds[offsets_idx]) ) - offsets_idx += 1 + offsets_idx += 1 + next_path_idx += s - edge = df.loc[(df['src'] == (src)) & (df['dst'] == (dst))].reset_index( - drop=True) exp_edge = df_G.loc[ (df_G['src'] == (src)) & ( df_G['dst'] == (dst))].reset_index(drop=True) - if not exp_edge.equals(edge[:1]): + if not (exp_edge['src'].loc[0], exp_edge['dst'].loc[0]) == (src, dst): print( "[ERR] Invalid edge: " - "There is no edge src {} dst {} weight {}" - .format(src, dst, weight) + "There is no edge src {} dst {}" + .format(src, dst) ) - invalid_weight += 1 + invalid_edge += 1 assert invalid_edge == 0 assert invalid_seeds == 0 - assert invalid_weight == 0 # ============================================================================= # Pytest Setup / Teardown - called for each test function @@ -121,11 +126,9 @@ def prepare_test(): @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("max_depth", [None]) -def test_random_walks_invalid_max_dept( - graph_file, - directed, - max_depth -): +def test_random_walks_invalid_max_dept(graph_file, + directed, + max_depth): prepare_test() with pytest.raises(TypeError): df, offsets, seeds = calc_random_walks( @@ -137,7 +140,7 @@ def test_random_walks_invalid_max_dept( @pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) -def test_random_walks( +def test_random_walks_coalesced( graph_file, directed ): @@ -145,12 +148,43 @@ def test_random_walks( df_G = utils.read_csv_file(graph_file) df_G.rename( columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) - df, offsets, seeds = calc_random_walks( + path_data, seeds = calc_random_walks( graph_file, directed, max_depth=max_depth ) - check_random_walks(df, offsets, seeds, df_G) + check_random_walks(path_data, seeds, df_G) + + # Check path query output + df = cugraph.rw_path(len(seeds), path_data[2]) + v_offsets = [0] + path_data[2].cumsum()[:-1].to_array().tolist() + w_offsets = [0] + (path_data[2]-1).cumsum()[:-1].to_array().tolist() + + assert df['weight_sizes'].equals(path_data[2]-1) + assert df['vertex_offsets'].to_array().tolist() == v_offsets + assert df['weight_offsets'].to_array().tolist() == w_offsets + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +def test_random_walks_padded( + graph_file, + directed +): + max_depth = random.randint(2, 10) + df_G = utils.read_csv_file(graph_file) + df_G.rename( + columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True) + path_data, seeds = calc_random_walks( + graph_file, + directed, + max_depth=max_depth, + use_padding=True + ) + v_paths = path_data[0] + e_weights = path_data[1] + assert len(v_paths) == max_depth*len(seeds) + assert len(e_weights) == (max_depth - 1)*len(seeds) """@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) diff --git a/python/cugraph/tests/test_wjaccard.py b/python/cugraph/tests/test_wjaccard.py index 9f82857a8d7..f3b3fb9efd6 100644 --- a/python/cugraph/tests/test_wjaccard.py +++ b/python/cugraph/tests/test_wjaccard.py @@ -38,16 +38,19 @@ def cugraph_call(cu_M): # Device data - weights_arr = cudf.Series( + weight_arr = cudf.Series( np.ones(max(cu_M["0"].max(), cu_M["1"].max()) + 1, dtype=np.float32) ) + weights = cudf.DataFrame() + weights['vertex'] = np.arange(len(weight_arr), dtype=np.int32) + weights['weight'] = weight_arr G = cugraph.Graph() G.from_cudf_edgelist(cu_M, source="0", destination="1") # cugraph Jaccard Call t1 = time.time() - df = cugraph.jaccard_w(G, weights_arr) + df = cugraph.jaccard_w(G, weights) t2 = time.time() - t1 print("Time : " + str(t2)) @@ -100,3 +103,56 @@ def test_wjaccard(graph_file): for i in range(len(cu_coeff)): diff = abs(nx_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_wjaccard_multi_column_weights(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + # suppress F841 (local variable is assigned but never used) in flake8 + # no networkX equivalent to compare cu_coeff against... + cu_coeff = cugraph_call(cu_M) # noqa: F841 + nx_coeff = networkx_call(M) + for i in range(len(cu_coeff)): + diff = abs(nx_coeff[i] - cu_coeff[i]) + assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_wjaccard_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + + vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]] + vertex_pair = vertex_pair[:5] + + weight_arr = cudf.Series(np.ones(G2.number_of_vertices(), + dtype=np.float32)) + weights = cudf.DataFrame() + weights['vertex'] = G2.nodes() + weights['vertex_'] = weights['vertex'] + 1000 + weights['weight'] = weight_arr + + df_res = cugraph.jaccard_w(G1, weights, vertex_pair) + + weights = weights[['vertex', 'weight']] + df_exp = cugraph.jaccard_w(G2, weights, vertex_pair[["src_0", "dst_0"]]) + + # Calculating mismatch + assert df_res["jaccard_coeff"].equals(df_exp["jaccard_coeff"]) diff --git a/python/cugraph/tests/test_woverlap.py b/python/cugraph/tests/test_woverlap.py index b6ceda40116..66032ac3f48 100644 --- a/python/cugraph/tests/test_woverlap.py +++ b/python/cugraph/tests/test_woverlap.py @@ -28,13 +28,16 @@ def cugraph_call(cu_M, pairs): weights_arr = cudf.Series( np.ones(max(cu_M["0"].max(), cu_M["1"].max()) + 1, dtype=np.float32) ) + weights = cudf.DataFrame() + weights['vertex'] = np.arange(len(weights_arr), dtype=np.int32) + weights['weight'] = weights_arr G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source="0", destination="1") # cugraph Overlap Call t1 = time.time() - df = cugraph.overlap_w(G, weights_arr, pairs) + df = cugraph.overlap_w(G, weights, pairs) t2 = time.time() - t1 print("Time : " + str(t2)) df = df.sort_values(by=["source", "destination"]) @@ -114,3 +117,42 @@ def test_woverlap(graph_file): else: diff = abs(cpu_coeff[i] - cu_coeff[i]) assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_woverlap_multi_column(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + + cu_M = cudf.DataFrame() + cu_M["src_0"] = cudf.Series(M["0"]) + cu_M["dst_0"] = cudf.Series(M["1"]) + cu_M["src_1"] = cu_M["src_0"] + 1000 + cu_M["dst_1"] = cu_M["dst_0"] + 1000 + G1 = cugraph.Graph() + G1.from_cudf_edgelist(cu_M, source=["src_0", "src_1"], + destination=["dst_0", "dst_1"]) + + G2 = cugraph.Graph() + G2.from_cudf_edgelist(cu_M, source="src_0", + destination="dst_0") + + vertex_pair = cu_M[["src_0", "src_1", "dst_0", "dst_1"]] + vertex_pair = vertex_pair[:5] + + weight_arr = cudf.Series(np.ones(G2.number_of_vertices(), + dtype=np.float32)) + + weights = cudf.DataFrame() + weights['vertex'] = G2.nodes() + weights['vertex_'] = weights['vertex'] + 1000 + weights['weight'] = weight_arr + + df_res = cugraph.overlap_w(G1, weights, vertex_pair) + + weights = weights[['vertex', 'weight']] + df_exp = cugraph.overlap_w(G2, weights, vertex_pair[["src_0", "dst_0"]]) + + # Calculating mismatch + assert df_res["overlap_coeff"].equals(df_exp["overlap_coeff"]) diff --git a/python/cugraph/traversal/bfs.pxd b/python/cugraph/traversal/bfs.pxd index b6465a6698c..ac825deffa6 100644 --- a/python/cugraph/traversal/bfs.pxd +++ b/python/cugraph/traversal/bfs.pxd @@ -22,7 +22,7 @@ from libcpp cimport bool cdef extern from "limits.h": cdef int INT_MAX -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_bfs[vertex_t, weight_t]( const handle_t &handle, const graph_container_t &g, diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx index f524b133d02..05c175c563f 100644 --- a/python/cugraph/traversal/bfs_wrapper.pyx +++ b/python/cugraph/traversal/bfs_wrapper.pyx @@ -78,6 +78,8 @@ def bfs(input_graph, start, depth_limit, direction_optimizing=False): c_distance_ptr = df['distance'].__cuda_array_interface__['data'][0] c_predecessor_ptr = df['predecessor'].__cuda_array_interface__['data'][0] + is_symmetric = not input_graph.is_directed() + # Step 8: Proceed to BFS populate_graph_container(graph_container, handle_[0], @@ -90,6 +92,7 @@ def bfs(input_graph, start, depth_limit, direction_optimizing=False): num_verts, num_edges, False, False, + is_symmetric, False, False) diff --git a/python/cugraph/traversal/ms_bfs.py b/python/cugraph/traversal/ms_bfs.py index e4b799e30e4..3d158524751 100644 --- a/python/cugraph/traversal/ms_bfs.py +++ b/python/cugraph/traversal/ms_bfs.py @@ -14,8 +14,6 @@ import numpy as np import cudf -# from cugraph.structure.graph import Graph, DiGraph -# from cugraph.utilities.utils import get_device_memory_info import warnings diff --git a/python/cugraph/traversal/sssp.pxd b/python/cugraph/traversal/sssp.pxd index 59253a5f1e4..3109668d747 100644 --- a/python/cugraph/traversal/sssp.pxd +++ b/python/cugraph/traversal/sssp.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_utilities cimport * -cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": +cdef extern from "cugraph/utilities/cython.hpp" namespace "cugraph::cython": cdef void call_sssp[vertex_t, weight_t]( const handle_t &handle, diff --git a/python/cugraph/traversal/traveling_salesperson.pxd b/python/cugraph/traversal/traveling_salesperson.pxd index 956c7da0978..b38c18c7633 100644 --- a/python/cugraph/traversal/traveling_salesperson.pxd +++ b/python/cugraph/traversal/traveling_salesperson.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef float traveling_salesperson(const handle_t &handle, int *vtx_ptr, @@ -31,4 +31,3 @@ cdef extern from "algorithms.hpp" namespace "cugraph": int nstart, bool verbose, int *route) except + - diff --git a/python/cugraph/tree/minimum_spanning_tree.pxd b/python/cugraph/tree/minimum_spanning_tree.pxd index a38aee96605..32c76ede554 100644 --- a/python/cugraph/tree/minimum_spanning_tree.pxd +++ b/python/cugraph/tree/minimum_spanning_tree.pxd @@ -19,7 +19,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "cugraph/algorithms.hpp" namespace "cugraph": cdef unique_ptr[GraphCOO[VT,ET,WT]] minimum_spanning_tree[VT,ET,WT](const handle_t &handle, const GraphCSRView[VT,ET,WT] &graph) except + diff --git a/python/cugraph/utilities/__init__.py b/python/cugraph/utilities/__init__.py index 38b46b0fe87..6dc23ff53b7 100644 --- a/python/cugraph/utilities/__init__.py +++ b/python/cugraph/utilities/__init__.py @@ -24,5 +24,6 @@ is_matrix_type, is_cp_matrix_type, is_sp_matrix_type, + renumber_vertex_pair ) from cugraph.utilities.path_retrieval import get_traversed_cost diff --git a/python/cugraph/utilities/path_retrieval.pxd b/python/cugraph/utilities/path_retrieval.pxd index 88f1da8f213..dcbbef5127d 100644 --- a/python/cugraph/utilities/path_retrieval.pxd +++ b/python/cugraph/utilities/path_retrieval.pxd @@ -18,7 +18,7 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph": +cdef extern from "cugraph/utilities/path_retrieval.hpp" namespace "cugraph": cdef void get_traversed_cost[vertex_t, weight_t](const handle_t &handle, const vertex_t *vertices, @@ -27,4 +27,3 @@ cdef extern from "utilities/path_retrieval.hpp" namespace "cugraph": weight_t *out, vertex_t stop_vertex, vertex_t num_vertices) except + - diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index adaec0f9e44..e4e93513630 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -62,8 +62,8 @@ def get_traversed_path(df, id): ---------- df : cudf.DataFrame The dataframe containing the results of a BFS or SSSP call - id : Int - The vertex ID + id : vertex ID + most be the same data types as what is in the dataframe Returns --------- @@ -97,8 +97,9 @@ def get_traversed_path(df, id): "DataFrame does not appear to be a BFS or " "SSP result - 'predecessor' column missing" ) - if type(id) != int: - raise ValueError("The vertex 'id' needs to be an integer") + if isinstance(id, type(df['vertex'].iloc[0])): + raise ValueError( + "The vertex 'id' needs to be the same as df['vertex']") # There is no guarantee that the dataframe has not been filtered # or edited. Therefore we cannot assume that using the vertex ID @@ -161,8 +162,9 @@ def get_traversed_path_list(df, id): "DataFrame does not appear to be a BFS or " "SSP result - 'predecessor' column missing" ) - if type(id) != int: - raise ValueError("The vertex 'id' needs to be an integer") + if isinstance(id, type(df['vertex'].iloc[0])): + raise ValueError( + "The vertex 'id' needs to be the same as df['vertex']") # There is no guarantee that the dataframe has not been filtered # or edited. Therefore we cannot assume that using the vertex ID @@ -333,3 +335,23 @@ def import_optional(mod, import_from=None): pass return namespace.get(mod) + + +def renumber_vertex_pair(input_graph, vertex_pair): + vertex_size = input_graph.vertex_column_size() + columns = vertex_pair.columns.to_list() + if vertex_size == 1: + for col in vertex_pair.columns: + if input_graph.renumbered: + vertex_pair = input_graph.add_internal_vertex_id( + vertex_pair, col, col + ) + else: + if input_graph.renumbered: + vertex_pair = input_graph.add_internal_vertex_id( + vertex_pair, "src", columns[:vertex_size] + ) + vertex_pair = input_graph.add_internal_vertex_id( + vertex_pair, "dst", columns[vertex_size:] + ) + return vertex_pair diff --git a/python/setup.py b/python/setup.py index 799cb805afa..d614eef24d6 100644 --- a/python/setup.py +++ b/python/setup.py @@ -65,8 +65,9 @@ raft_path = get_environment_option('RAFT_PATH') # FIXME: This could clone RAFT, even if it's not needed (eg. running --clean). -raft_include_dir = use_raft_package(raft_path, libcugraph_path, - git_info_file='../cpp/CMakeLists.txt') +# deprecated: This functionality will go away after +# https://github.com/rapidsai/raft/issues/83 +raft_include_dir = use_raft_package(raft_path, libcugraph_path) class CleanCommand(Command): diff --git a/python/setuputils.py b/python/setuputils.py index d93051d05ef..a64e60e1c32 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -20,7 +20,6 @@ import shutil import subprocess import sys -import warnings from pathlib import Path @@ -71,7 +70,7 @@ def clean_folder(path): def use_raft_package(raft_path, cpp_build_path, - git_info_file='../cpp/cmake/Dependencies.cmake'): + git_info_file=None): """ Function to use the python code in RAFT in package.raft @@ -139,10 +138,13 @@ def use_raft_package(raft_path, cpp_build_path, def clone_repo_if_needed(name, cpp_build_path=None, git_info_file=None): if git_info_file is None: - git_info_file = _get_repo_path() + '/cpp/CMakeLists.txt' + git_info_file = \ + _get_repo_path() + '/cpp/cmake/thirdparty/get_{}.cmake'.format( + name + ) if cpp_build_path is None or cpp_build_path is False: - cpp_build_path = _get_repo_path() + '/cpp/build/' + cpp_build_path = _get_repo_path() + '/cpp/build/_deps/' repo_cloned = get_submodule_dependency(name, cpp_build_path=cpp_build_path, @@ -152,7 +154,7 @@ def clone_repo_if_needed(name, cpp_build_path=None, repo_path = ( _get_repo_path() + '/python/_external_repositories/' + name + '/') else: - repo_path = os.path.join(cpp_build_path, '_deps', name + '-src') + repo_path = os.path.join(cpp_build_path, name + '-src/') return repo_path, repo_cloned @@ -192,7 +194,7 @@ def get_submodule_dependency(repo, repo_info = get_repo_cmake_info(repos, git_info_file) - if os.path.exists(cpp_build_path): + if os.path.exists(os.path.join(cpp_build_path, repos[0] + '-src/')): print("-- Third party modules found succesfully in the libcugraph++ " "build folder.") @@ -200,11 +202,11 @@ def get_submodule_dependency(repo, else: - warnings.warn("-- Third party repositories have not been found so they" - "will be cloned. To avoid this set the environment " - "variable CUGRAPH_BUILD_PATH, containing the relative " - "path of the root of the repository to the folder " - "where libcugraph++ was built.") + print("-- Third party repositories have not been found so they" + "will be cloned. To avoid this set the environment " + "variable CUGRAPH_BUILD_PATH, containing the relative " + "path of the root of the repository to the folder " + "where libcugraph++ was built.") for repo in repos: clone_repo(repo, repo_info[repo][0], repo_info[repo][1]) @@ -262,8 +264,8 @@ def get_repo_cmake_info(names, file_path): `ExternalProject_Add(name` file_path : String Relative path of the location of the CMakeLists.txt (or the cmake - module which contains FetchContent_Declare or ExternalProject_Add - definitions) to extract the information. + module which contains ExternalProject_Add definitions) to extract + the information. Returns ------- @@ -272,31 +274,32 @@ def get_repo_cmake_info(names, file_path): where results[name][0] is the url of the repo and repo_info[repo][1] is the tag/commit hash to be cloned as specified by cmake. + """ - with open(file_path, encoding='utf-8') as f: + with open(file_path) as f: s = f.read() results = {} - cmake_ext_proj_decls = ["FetchContent_Declare", "ExternalProject_Add"] - for name in names: - res = None - for decl in cmake_ext_proj_decls: - res = re.search(f'{decl}\(\s*' # noqa: W605 - + '(' + re.escape(name) + ')' - + '\s+.*GIT_REPOSITORY\s+(\S+)\s+.+' # noqa: W605 - + '\s+.*GIT_TAG\s+(\S+)', # noqa: W605 - s) - if res: - break - if res is None: - raise RuntimeError('Could not find any of the following ' - f'statements: {cmake_ext_proj_decls}, for ' - f'module "{name}" in file "{file_path}" with ' - 'GIT_REPOSITORY and GIT_TAG settings') - - results[res.group(1)] = [res.group(2), res.group(3)] + repo = re.findall(r'\s.*GIT_REPOSITORY.*', s) + repo = repo[-1].split()[-1] + fork = re.findall(r'\s.*FORK.*', s) + fork = fork[-1].split()[-1] + repo = repo.replace("${PKG_FORK}", fork) + tag = re.findall(r'\s.*PINNED_TAG.*', s) + tag = tag[-1].split()[-1] + results[name] = [repo, tag] + if tag == 'branch-${CUGRAPH_BRANCH_VERSION_raft}': + loc = _get_repo_path() + '/cpp/CMakeLists.txt' + with open(loc) as f: + cmakelists = f.read() + tag = re.findall(r'\s.*project\(CUGRAPH VERSION.*', cmakelists) + print(tag) + tag = tag[-1].split()[2].split('.') + tag = 'branch-{}.{}'.format(tag[0], tag[1]) + + results[name] = [repo, tag] return results