diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS index d35c5c02218..ebffd18ca5a 100644 --- a/.github/CODEOWNERS +++ b/.github/CODEOWNERS @@ -1,3 +1,17 @@ +# https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners +# Order matters - match of highest importance goes last (last match wins) + +#doc code owners +datasets/ @rapidsai/cugraph-doc-codeowners +notebooks/ @rapidsai/cugraph-doc-codeowners +docs/ @rapidsai/cugraph-doc-codeowners +**/*.txt @rapidsai/cugraph-doc-codeowners +**/*.md @rapidsai/cugraph-doc-codeowners +**/*.rst @rapidsai/cugraph-doc-codeowners +**/*.ipynb @rapidsai/cugraph-doc-codeowners +**/*.pdf @rapidsai/cugraph-doc-codeowners +**/*.png @rapidsai/cugraph-doc-codeowners + #cpp code owners cpp/ @rapidsai/cugraph-cpp-codeowners @@ -9,7 +23,7 @@ python/ @rapidsai/cugraph-python-codeowners **/cmake/ @rapidsai/cugraph-cmake-codeowners #build/ops code owners -.github/ @rapidsai/ops-codeowners +.github/ @rapidsai/ops-codeowners ci/ @rapidsai/ops-codeowners conda/ @rapidsai/ops-codeowners **/Dockerfile @rapidsai/ops-codeowners diff --git a/.github/labeler.yml b/.github/labeler.yml index 621d0fde833..9c3af6de64b 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -5,26 +5,33 @@ python: - 'python/**' - 'notebooks/**' + +benchmarks: - 'benchmarks/**' doc: - 'docs/**' - '**/*.md' + - 'datasets/**' + - 'notebooks/**' + - '**/*.txt' + - '**/*.rst' + - '**/*.ipynb' + - '**/*.pdf' + - '**/*.png' datasets: - 'datasets/**' cuGraph: - 'cpp/**' - + CMake: - '**/CMakeLists.txt' - '**/cmake/**' - -Ops: - - '.github/**' - - 'ci/**' + +gpuCI: + - 'ci/**' + +conda: - 'conda/**' - - '**/Dockerfile' - - '**/.dockerignore' - - 'docker/**' diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml new file mode 100644 index 00000000000..8b65da69aa2 --- /dev/null +++ b/.github/workflows/stale.yaml @@ -0,0 +1,57 @@ +name: Mark inactive issues and pull requests + +on: + schedule: + - cron: "0 * * * *" + +jobs: + mark-inactive-30d: + runs-on: ubuntu-latest + steps: + - name: Mark 30 day inactive issues and pull requests + uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: > + This issue has been labeled `inactive-30d` due to no recent activity in the past 30 days. + Please close this issue if no further response or action is needed. + Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. + This issue will be labeled `inactive-90d` if there is no activity in the next 60 days. + stale-issue-label: "inactive-30d" + exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue" + days-before-issue-stale: 30 + days-before-issue-close: -1 + stale-pr-message: > + This PR has been labeled `inactive-30d` due to no recent activity in the past 30 days. + Please close this PR if it is no longer required. + Otherwise, please respond with a comment indicating any updates. + This PR will be labeled `inactive-90d` if there is no activity in the next 60 days. + stale-pr-label: "inactive-30d" + exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue" + days-before-pr-stale: 30 + days-before-pr-close: -1 + operations-per-run: 50 + mark-inactive-90d: + runs-on: ubuntu-latest + steps: + - name: Mark 90 day inactive issues and pull requests + uses: actions/stale@v3 + with: + repo-token: ${{ secrets.GITHUB_TOKEN }} + stale-issue-message: > + This issue has been labeled `inactive-90d` due to no recent activity in the past 90 days. + Please close this issue if no further response or action is needed. + Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed. + stale-issue-label: "inactive-90d" + exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue" + days-before-issue-stale: 90 + days-before-issue-close: -1 + stale-pr-message: > + This PR has been labeled `inactive-90d` due to no recent activity in the past 90 days. + Please close this PR if it is no longer required. + Otherwise, please respond with a comment indicating any updates. + stale-pr-label: "inactive-90d" + exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue" + days-before-pr-stale: 90 + days-before-pr-close: -1 + operations-per-run: 50 diff --git a/CHANGELOG.md b/CHANGELOG.md index b22d92c902f..2b8247f02d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,12 +1,14 @@ - -# cuGraph 0.18.0 (Date TBD) +# cuGraph 0.19.0 (Date TBD) ## New Features ## Improvements ## Bug Fixes -- PR #1321 Fix benchmark script trap setup to come after the PATH variable update + +# 0.18.0 + +Please see https://github.com/rapidsai/cugraph/releases/tag/branch-0.18-latest for the latest changes to this development branch. # cuGraph 0.17.0 (10 Dec 2020) ## New Features @@ -19,6 +21,7 @@ - PR #1279 Add self loop check variable in graph - PR #1277 SciPy sparse matrix input support for WCC, SCC, SSSP, and BFS - PR #1278 Add support for shortest_path_length and fix graph vertex checks +- PR #1280 Add Multi(Di)Graph support ## Improvements - PR #1227 Pin cmake policies to cmake 3.17 version diff --git a/README.md b/README.md index 8fee5451ac3..62059e9c7b6 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ The [RAPIDS](https://rapids.ai) cuGraph library is a collection of GPU accelerat **NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cugraph/blob/main/README.md) ensure you are on the latest branch. - +As an example, the following Python snippet loads graph data and computes PageRank: ```python import cugraph @@ -30,6 +30,17 @@ for i in range(len(df_page)): " PageRank is " + str(df_page['pagerank'].iloc[i])) ``` +## Getting cuGraph +There are 3 ways to get cuGraph : +1. [Quick start with Docker Repo](#quick) +2. [Conda Installation](#conda) +3. [Build from Source](#source) +

+ +--- +# Currently Supported Features +As of Release 0.18 - including 0.18 nightly + ## Supported Algorithms @@ -40,6 +51,7 @@ for i in range(len(df_page)): | | Betweenness Centrality | Single-GPU | | | | Edge Betweenness Centrality | Single-GPU | | | Community | | | | +| | EgoNet | Single-GPU | | | | Leiden | Single-GPU | | | | Louvain | Multi-GPU | | | | Ensemble Clustering for Graphs | Single-GPU | | @@ -56,17 +68,20 @@ for i in range(len(df_page)): | | Core Number | Single-GPU | | | Layout | | | | | | Force Atlas 2 | Single-GPU | | +| Linear Assignment| | | | +| | Hungarian | Single-GPU | [README](cpp/src/linear_assignment/README-hungarian.md) | | Link Analysis| | | | | | Pagerank | Multi-GPU | | | | Personal Pagerank | Multi-GPU | | -| | HITS | Single-GPU | leverages Gunrock | +| | HITS | Single-GPU | leverages Gunrock | | Link Prediction | | | | | | Jaccard Similarity | Single-GPU | | | | Weighted Jaccard Similarity | Single-GPU | | | | Overlap Similarity | Single-GPU | | | Traversal | | | | -| | Breadth First Search (BFS) | Multi-GPU | | +| | Breadth First Search (BFS) | Multi-GPU | with cutoff support | | | Single Source Shortest Path (SSSP) | Multi-GPU | | +| | Traveling Salesperson Problem (TSP) | Single-GPU | | | Structure | | | | | | Renumbering | Single-GPU | multiple columns, any data type | | | Symmetrize | Multi-GPU | | @@ -74,7 +89,6 @@ for i in range(len(df_page)): | | Hungarian Algorithm | Single-GPU | | | | Minimum Spanning Tree | Single-GPU | | | | Maximum Spanning Tree | Single-GPU | | - | | |

@@ -83,13 +97,13 @@ for i in range(len(df_page)): | --------------- | --------------------------------------------------- | | Graph | An undirected Graph | | DiGraph | A Directed Graph | -| _Multigraph_ | _coming in 0.18_ | -| _MultiDigraph_ | _coming in 0.18_ | +| Multigraph | A Graph with multiple edges between a vertex pair | +| MultiDigraph | A Directed Graph with multiple edges between a vertex pair | | | |

## Supported Data Types -cuGraph supports the creation of a graph several data types: +cuGraph supports graph creation with Source and Destination being expressed as: * cuDF DataFrame * Pandas DataFrame @@ -123,22 +137,14 @@ The amount of memory required is dependent on the graph structure and the analyt The use of managed memory for oversubscription can also be used to exceed the above memory limitations. See the recent blog on _Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs_: https://medium.com/rapids-ai/tackling-large-graphs-with-rapids-cugraph-and-unified-virtual-memory-b5b69a065d4 +

-## Getting cuGraph -### Intro -There are 3 ways to get cuGraph : -1. [Quick start with Docker Demo Repo](#quick) -2. [Conda Installation](#conda) -3. [Build from Source](#source) - - - - +--- ## Quick Start -Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize all of the RAPIDS libraries: cuDF, cuML, and cuGraph. +Please see the [Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize all of the RAPIDS libraries: cuDF, cuML, and cuGraph. -### Conda +## Conda It is easy to install cuGraph using conda. You can get a minimal conda installation with [Miniconda](https://conda.io/miniconda.html) or get the full installation with [Anaconda](https://www.anaconda.com/download). Install and update cuGraph using the conda command: @@ -158,7 +164,7 @@ conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph Note: This conda installation only applies to Linux and Python versions 3.7/3.8. -### Build from Source and Contributing +## Build from Source and Contributing Please see our [guide for building cuGraph from source](SOURCEBUILD.md) diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md index 8acd90c4f7f..0cbf6ccdaa3 100644 --- a/SOURCEBUILD.md +++ b/SOURCEBUILD.md @@ -1,6 +1,6 @@ # Building from Source -The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested. +The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested. The cuGraph package include both a C/C++ CUDA portion and a python portion. Both libraries need to be installed in order for cuGraph to operate correctly. @@ -9,7 +9,7 @@ The cuGraph package include both a C/C++ CUDA portion and a python portion. Bot __Compiler__: * `gcc` version 5.4+ * `nvcc` version 10.0+ -* `cmake` version 3.12+ +* `cmake` version 3.18+ __CUDA:__ * CUDA 10.1+ @@ -97,17 +97,21 @@ There are several other options available on the build script for advanced users `build.sh` options: ```bash build.sh [ ...] [ ...] - clean - remove all existing build artifacts and configuration (start over) - libcugraph - build the cugraph C++ code - cugraph - build the cugraph Python package - + where is: + clean - remove all existing build artifacts and configuration (start over) + libcugraph - build the cugraph C++ code + cugraph - build the cugraph Python package + docs - build the docs and is: -v - verbose build mode -g - build for debug -n - no install step + --allgpuarch - build for all supported GPU architectures --show_depr_warn - show cmake deprecation warnings -h - print this text + default action (no args) is to build and install 'libcugraph' then 'cugraph' then 'docs' targets + examples: $ ./build.sh clean # remove prior build artifacts (start over) $ ./build.sh libcugraph -v # compile and install libcugraph with verbose output @@ -189,7 +193,7 @@ Run either the C++ or the Python tests with datasets ```bash cd $CUGRAPH_HOME/datasets - source get_test_data.sh #This takes about 10 minutes and download 1GB data (>5 GB uncompressed) + source get_test_data.sh #This takes about 10 minutes and downloads 1GB data (>5 GB uncompressed) ``` Run the C++ tests on large input: diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py index 9be636ca480..f9f8bf9cf53 100644 --- a/benchmarks/bench_algos.py +++ b/benchmarks/bench_algos.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,6 +32,7 @@ def setFixtureParamNames(*args, **kwargs): import cugraph from cugraph.structure.number_map import NumberMap from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than import rmm from .params import FIXTURE_PARAMS @@ -212,6 +213,8 @@ def bench_jaccard(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.jaccard, graphWithAdjListComputed) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal") def bench_louvain(gpubenchmark, graphWithAdjListComputed): gpubenchmark(cugraph.louvain, graphWithAdjListComputed) diff --git a/build.sh b/build.sh index b3d3463ed4e..54634e2ca6e 100755 --- a/build.sh +++ b/build.sh @@ -1,6 +1,6 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # cugraph build script @@ -19,18 +19,20 @@ ARGS=$* REPODIR=$(cd $(dirname $0); pwd) LIBCUGRAPH_BUILD_DIR=${LIBCUGRAPH_BUILD_DIR:=${REPODIR}/cpp/build} -VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --show_depr_warn -h --help" +VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help" HELP="$0 [ ...] [ ...] where is: clean - remove all existing build artifacts and configuration (start over) libcugraph - build the cugraph C++ code cugraph - build the cugraph Python package + cpp-mgtests - build libcugraph mnmg tests. Builds MPI communicator, adding MPI as a dependency. docs - build the docs and is: -v - verbose build mode -g - build for debug -n - no install step --allgpuarch - build for all supported GPU architectures + --buildfaiss - build faiss statically into cugraph --show_depr_warn - show cmake deprecation warnings -h - print this text @@ -44,10 +46,12 @@ CUGRAPH_BUILD_DIR=${REPODIR}/python/build BUILD_DIRS="${LIBCUGRAPH_BUILD_DIR} ${CUGRAPH_BUILD_DIR}" # Set defaults for vars modified by flags to this script -VERBOSE="" +VERBOSE_FLAG="" BUILD_TYPE=Release INSTALL_TARGET=install BUILD_DISABLE_DEPRECATION_WARNING=ON +BUILD_CPP_MG_TESTS=OFF +BUILD_STATIC_FAISS=OFF GPU_ARCH="" # Set defaults for vars that may not have been defined externally @@ -82,7 +86,7 @@ fi # Process flags if hasArg -v; then - VERBOSE=1 + VERBOSE_FLAG="-v" fi if hasArg -g; then BUILD_TYPE=Debug @@ -93,9 +97,15 @@ fi if hasArg --allgpuarch; then GPU_ARCH="-DGPU_ARCHS=ALL" fi +if hasArg --buildfaiss; then + BUILD_STATIC_FAISS=ON +fi if hasArg --show_depr_warn; then BUILD_DISABLE_DEPRECATION_WARNING=OFF fi +if hasArg cpp-mgtests; then + BUILD_CPP_MG_TESTS=ON +fi # If clean given, run it prior to any other steps if hasArg clean; then @@ -127,10 +137,13 @@ if buildAll || hasArg libcugraph; then mkdir -p ${LIBCUGRAPH_BUILD_DIR} cd ${LIBCUGRAPH_BUILD_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ - ${GPU_ARCH} \ - -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp - make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} ${INSTALL_TARGET} + ${GPU_ARCH} \ + -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \ + -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} \ + -DBUILD_CUGRAPH_MG_TESTS=${BUILD_CPP_MG_TESTS} \ + ${REPODIR}/cpp + cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target ${INSTALL_TARGET} ${VERBOSE_FLAG} fi # Build and install the cugraph Python package @@ -152,10 +165,11 @@ if buildAll || hasArg docs; then cd ${LIBCUGRAPH_BUILD_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ - -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp + -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp \ + -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} fi cd ${LIBCUGRAPH_BUILD_DIR} - make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} docs_cugraph + cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target docs_cugraph ${VERBOSE_FLAG} cd ${REPODIR}/docs make html fi diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh index 2c6dc899be2..d69448cda4e 100755 --- a/ci/cpu/build.sh +++ b/ci/cpu/build.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ######################################### # cuGraph CPU conda build script for CI # ######################################### @@ -24,6 +24,9 @@ fi export GPUCI_CONDA_RETRY_MAX=1 export GPUCI_CONDA_RETRY_SLEEP=30 +# Use Ninja to build +export CMAKE_GENERATOR="Ninja" + ################################################################################ # SETUP - Check environment ################################################################################ diff --git a/ci/getGTestTimes.sh b/ci/getGTestTimes.sh deleted file mode 100755 index 8a3752d76e2..00000000000 --- a/ci/getGTestTimes.sh +++ /dev/null @@ -1,46 +0,0 @@ -#!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# This script will print the gtest results sorted by runtime. This will print -# the results two ways: first by printing all tests sorted by runtime, then by -# printing all tests grouped by test binary with tests sorted by runtime within -# the group. -# -# To use this script, capture the test run output to a file then run this script -# with the file as the first arg, or just redirect test output to this script. - -awk '/^Running GoogleTest .+$/ { - testbinary = $3 - } - /^\[ OK \].+$/ { - testtime = substr($(NF-1),2) - newtestdata = testbinary ":" substr($0,14) - alltestdata = alltestdata newtestdata "\n" - testdata[testbinary] = testdata[testbinary] newtestdata "\n" - totaltime = totaltime + testtime - } - END { - # Print all tests sorted by time - system("echo \"" alltestdata "\" | sort -r -t\\( -nk2") - print "\n================================================================================" - # Print test binaries with tests sorted by time - print "Tests grouped by test binary:" - for (testbinary in testdata) { - print testbinary - system("echo \"" testdata[testbinary] "\" | sort -r -t\\( -nk2") - } - print "\n================================================================================" - print totaltime " milliseconds = " totaltime/60000 " minutes" - } -' $1 diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 019d03e21da..0fef7b62f8d 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -1,10 +1,10 @@ #!/usr/bin/env bash -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. ########################################## # cuGraph GPU build & testscript for CI # ########################################## -set -e -set -o pipefail +set -e # abort the script on error, this will change for running tests (see below) +set -o pipefail # piped commands propagate their error NUMARGS=$# ARGS=$* @@ -98,10 +98,15 @@ fi # TEST - Run GoogleTest and py.tests for libcugraph and cuGraph ################################################################################ -set +e -Eo pipefail -EXITCODE=0 +# Switch to +e to allow failing commands to continue the script, which is needed +# so all testing commands run regardless of pass/fail. This requires the desired +# exit code to be managed using the ERR trap. +set +e # allow script to continue on error +set -E # ERR traps are inherited by subcommands trap "EXITCODE=1" ERR +EXITCODE=0 + if hasArg --skip-tests; then gpuci_logger "Skipping Tests" else @@ -117,18 +122,19 @@ else TEST_MODE_FLAG="" fi + gpuci_logger "Running cuGraph test.sh..." ${WORKSPACE}/ci/test.sh ${TEST_MODE_FLAG} | tee testoutput.txt + gpuci_logger "Ran cuGraph test.sh : return code was: $?, gpu/build.sh exit code is now: $EXITCODE" - echo -e "\nTOP 20 SLOWEST TESTS:\n" - # Wrap in echo to prevent non-zero exit since this command is non-essential - echo "$(${WORKSPACE}/ci/getGTestTimes.sh testoutput.txt | head -20)" - + gpuci_logger "Running cuGraph notebook test script..." ${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log + gpuci_logger "Ran cuGraph notebook test script : return code was: $?, gpu/build.sh exit code is now: $EXITCODE" python ${WORKSPACE}/ci/utils/nbtestlog2junitxml.py nbtest.log fi -if [ -n "\${CODECOV_TOKEN}" ]; then - codecov -t \$CODECOV_TOKEN +if [ -n "${CODECOV_TOKEN}" ]; then + codecov -t $CODECOV_TOKEN fi +gpuci_logger "gpu/build.sh returning value: $EXITCODE" return ${EXITCODE} diff --git a/ci/gpu/notebook_list.py b/ci/gpu/notebook_list.py new file mode 100644 index 00000000000..bb54913ac8d --- /dev/null +++ b/ci/gpu/notebook_list.py @@ -0,0 +1,48 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import sys +import glob + +from numba import cuda + +# +# Not strictly true... however what we mean is +# Pascal or earlier +# +pascal = False + +device = cuda.get_current_device() +cc = getattr(device, 'COMPUTE_CAPABILITY') +if (cc[0] < 7): + pascal = True + +for filename in glob.iglob('**/*.ipynb', recursive=True): + skip = False + for line in open(filename, 'r'): + if re.search('# Skip notebook test', line): + skip = True + print(f'SKIPPING {filename} (marked as skip)', file=sys.stderr) + break; + elif re.search('dask', line): + print(f'SKIPPING {filename} (suspected Dask usage, not currently automatable)', file=sys.stderr) + skip = True + break; + elif pascal and re.search('# Does not run on Pascal', line): + print(f'SKIPPING {filename} (does not run on Pascal)', file=sys.stderr) + skip = True + break; + + if not skip: + print(filename) diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh index 389d3be0bfd..650132f116d 100755 --- a/ci/gpu/test-notebooks.sh +++ b/ci/gpu/test-notebooks.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,23 +12,27 @@ # See the License for the specific language governing permissions and # limitations under the License. -#RAPIDS_DIR=/rapids +# Any failing command will set EXITCODE to non-zero +set -e # abort the script on error, this will change for running tests (see below) +set -o pipefail # piped commands propagate their error +set -E # ERR traps are inherited by subcommands +trap "EXITCODE=1" ERR + NOTEBOOKS_DIR=${WORKSPACE}/notebooks NBTEST=${WORKSPACE}/ci/utils/nbtest.sh LIBCUDF_KERNEL_CACHE_PATH=${WORKSPACE}/.jitcache +EXITCODE=0 cd ${NOTEBOOKS_DIR} TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u) -# Add notebooks that should be skipped here -# (space-separated list of filenames without paths) - -SKIPNBS="uvm.ipynb bfs_benchmark.ipynb louvain_benchmark.ipynb pagerank_benchmark.ipynb sssp_benchmark.ipynb release.ipynb nx_cugraph_bc_benchmarking.ipynb" - ## Check env env -EXITCODE=0 +# Do not abort the script on error. This allows all tests to run regardless of +# pass/fail but relies on the ERR trap above to manage the EXITCODE for the +# script. +set +e # Always run nbtest in all TOPLEVEL_NB_FOLDERS, set EXITCODE to failure # if any run fails @@ -37,29 +41,20 @@ for folder in ${TOPLEVEL_NB_FOLDERS}; do echo "FOLDER: ${folder}" echo "========================================" cd ${NOTEBOOKS_DIR}/${folder} - for nb in $(find . -name "*.ipynb"); do + NBLIST=$(python ${WORKSPACE}/ci/gpu/notebook_list.py) + for nb in ${NBLIST}; do nbBasename=$(basename ${nb}) - # Skip all NBs that use dask (in the code or even in their name) - if ((echo ${nb}|grep -qi dask) || \ - (grep -q dask ${nb})); then - echo "--------------------------------------------------------------------------------" - echo "SKIPPING: ${nb} (suspected Dask usage, not currently automatable)" - echo "--------------------------------------------------------------------------------" - elif (echo " ${SKIPNBS} " | grep -q " ${nbBasename} "); then - echo "--------------------------------------------------------------------------------" - echo "SKIPPING: ${nb} (listed in skip list)" - echo "--------------------------------------------------------------------------------" - else - cd $(dirname ${nb}) - nvidia-smi - ${NBTEST} ${nbBasename} - EXITCODE=$((EXITCODE | $?)) - rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/* - cd ${NOTEBOOKS_DIR}/${folder} - fi + cd $(dirname ${nb}) + nvidia-smi + ${NBTEST} ${nbBasename} + echo "Ran nbtest for $nb : return code was: $?, test script exit code is now: $EXITCODE" + echo + rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/* + cd ${NOTEBOOKS_DIR}/${folder} done done nvidia-smi +echo "Notebook test script exiting with value: $EXITCODE" exit ${EXITCODE} diff --git a/ci/test.sh b/ci/test.sh index db9390461c0..b0134e97246 100755 --- a/ci/test.sh +++ b/ci/test.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,9 +12,11 @@ # See the License for the specific language governing permissions and # limitations under the License. -# note: do not use set -e in order to allow all gtest invocations to take place, -# and instead keep track of exit status and exit with an overall exit status -set -o pipefail +# Any failing command will set EXITCODE to non-zero +set -e # abort the script on error, this will change for running tests (see below) +set -o pipefail # piped commands propagate their error +set -E # ERR traps are inherited by subcommands +trap "EXITCODE=1" ERR NUMARGS=$# ARGS=$* @@ -22,7 +24,7 @@ THISDIR=$(cd $(dirname $0);pwd) CUGRAPH_ROOT=$(cd ${THISDIR}/..;pwd) GTEST_ARGS="--gtest_output=xml:${CUGRAPH_ROOT}/test-results/" DOWNLOAD_MODE="" -ERRORCODE=0 +EXITCODE=0 export RAPIDS_DATASET_ROOT_DIR=${CUGRAPH_ROOT}/datasets @@ -50,47 +52,61 @@ else echo "Download datasets..." cd ${RAPIDS_DATASET_ROOT_DIR} bash ./get_test_data.sh ${DOWNLOAD_MODE} - ERRORCODE=$((ERRORCODE | $?)) - # no need to run tests if dataset download fails - if (( ${ERRORCODE} != 0 )); then - exit ${ERRORCODE} - fi fi if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then cd ${CUGRAPH_ROOT}/cpp/build else - export LD_LIBRARY_PATH="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build:$LD_LIBRARY_PATH" + export LD_LIBRARY_PATH="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build:$CONDA_PREFIX/lib:$LD_LIBRARY_PATH" cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build fi -for gt in gtests/*; do - test_name=$(basename $gt) - echo "Running GoogleTest $test_name" - ${gt} ${GTEST_FILTER} ${GTEST_ARGS} - ERRORCODE=$((ERRORCODE | $?)) -done - +# FIXME: if possible, any install and build steps should be moved outside this +# script since a failing install/build step is treated as a failing test command +# and will not stop the script. This script is also only expected to run tests +# in a preconfigured environment, and install/build steps are unexpected side +# effects. if [[ "$PROJECT_FLASH" == "1" ]]; then + export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build" + + # Faiss patch + echo "Update libcugraph.so" + cd $LIBCUGRAPH_BUILD_DIR + chrpath -d libcugraph.so + patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so + CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"` CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install echo "Installing $CONDA_FILE" conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE" - export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build" echo "Build cugraph..." $WORKSPACE/build.sh cugraph fi +# Do not abort the script on error from this point on. This allows all tests to +# run regardless of pass/fail, but relies on the ERR trap above to manage the +# EXITCODE for the script. +set +e + +echo "C++ gtests for cuGraph..." +for gt in tests/*_TEST; do + test_name=$(basename $gt) + echo "Running gtest $test_name" + ${gt} ${GTEST_FILTER} ${GTEST_ARGS} + echo "Ran gtest $test_name : return code was: $?, test script exit code is now: $EXITCODE" +done + echo "Python pytest for cuGraph..." cd ${CUGRAPH_ROOT}/python pytest --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph.xml -v --cov-config=.coveragerc --cov=cugraph --cov-report=xml:${WORKSPACE}/python/cugraph/cugraph-coverage.xml --cov-report term --ignore=cugraph/raft --benchmark-disable -ERRORCODE=$((ERRORCODE | $?)) +echo "Ran Python pytest for cugraph : return code was: $?, test script exit code is now: $EXITCODE" echo "Python benchmarks for cuGraph (running as tests)..." cd ${CUGRAPH_ROOT}/benchmarks pytest -v -m "managedmem_on and poolallocator_on and tiny" --benchmark-disable -ERRORCODE=$((ERRORCODE | $?)) +echo "Ran Python benchmarks for cuGraph (running as tests) : return code was: $?, test script exit code is now: $EXITCODE" -exit ${ERRORCODE} +echo "Test script exiting with value: $EXITCODE" +exit ${EXITCODE} diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh index 8c86baeaa09..ae8b52df106 100755 --- a/ci/utils/nbtest.sh +++ b/ci/utils/nbtest.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,6 +12,19 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Any failing command will set EXITCODE to non-zero +set +e # do not abort the script on error +set -o pipefail # piped commands propagate their error +set -E # ERR traps are inherited by subcommands +trap "EXITCODE=1" ERR + +# Prepend the following code to all scripts generated from nbconvert. This +# allows all cell and line magic code to run and update the namespace as if +# running in jupyter, but will also tolerate failures due to running in a +# non-jupyter env. +# Note: depending on the assumptions of the notebook script, ignoring failures +# may not be acceptable (meaning the converted notebook simply cannot run +# outside of jupyter as-is), hence the warning. MAGIC_OVERRIDE_CODE=" def my_run_line_magic(*args, **kwargs): g=globals() @@ -58,7 +71,6 @@ for nb in $*; do NBEXITCODE=$? echo EXIT CODE: ${NBEXITCODE} echo - EXITCODE=$((EXITCODE | ${NBEXITCODE})) done exit ${EXITCODE} diff --git a/conda/environments/builddocs.yml b/conda/environments/builddocs.yml deleted file mode 100644 index 89bd44a5542..00000000000 --- a/conda/environments/builddocs.yml +++ /dev/null @@ -1,19 +0,0 @@ -name: builddocs -channels: -- rapidsai -- pytorch -- conda-forge -- numba -- defaults -dependencies: -- python=3.6* -- cugraph=0.8* -- cudatoolkit=9.2 -- cudf=0.8* -- pyarrow=0.12.1.* -- cython=0.29* -- pip: - - numpydoc - - sphinx - - sphinx-rtd-theme - - sphinxcontrib-websupport diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 067fd0bc4ba..255366b0a82 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx @@ -23,12 +23,14 @@ dependencies: - cudatoolkit=10.1 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.12 +- cmake>=3.18 - python>=3.6,<3.9 - notebook>=0.5.0 - boost - cython>=0.29,<0.30 - pytest +- libfaiss=1.6.3 +- faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet - holoviews @@ -44,3 +46,6 @@ dependencies: - libcypher-parser - rapids-pytest-benchmark - doxygen +- pytest-cov +- gtest +- gmock diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 3371340d8bd..e64d7c77b7d 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx @@ -23,12 +23,14 @@ dependencies: - cudatoolkit=10.2 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.12 +- cmake>=3.18 - python>=3.6,<3.9 - notebook>=0.5.0 - boost - cython>=0.29,<0.30 - pytest +- libfaiss=1.6.3 +- faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet - holoviews @@ -44,3 +46,6 @@ dependencies: - libcypher-parser - rapids-pytest-benchmark - doxygen +- pytest-cov +- gtest +- gmock diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index ee3b57632a1..1f05e4762ef 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -5,17 +5,17 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.18.* -- libcudf=0.18.* -- rmm=0.18.* -- cuxfilter=0.18.* -- librmm=0.18.* +- cudf=0.19.* +- libcudf=0.19.* +- rmm=0.19.* +- cuxfilter=0.19.* +- librmm=0.19.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.18* -- dask-cudf=0.18* +- dask-cuda=0.19* +- dask-cudf=0.19* - nccl>=2.7 -- ucx-py=0.18* +- ucx-py=0.19* - ucx-proc=*=gpu - scipy - networkx @@ -23,12 +23,14 @@ dependencies: - cudatoolkit=11.0 - clang=8.0.1 - clang-tools=8.0.1 -- cmake>=3.12 +- cmake>=3.18 - python>=3.6,<3.9 - notebook>=0.5.0 - boost - cython>=0.29,<0.30 - pytest +- libfaiss=1.6.3 +- faiss-proc=*=cuda - scikit-learn>=0.23.1 - colorcet - holoviews @@ -44,3 +46,6 @@ dependencies: - libcypher-parser - rapids-pytest-benchmark - doxygen +- pytest-cov +- gtest +- gmock diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 211ec920d27..8f7495eab3c 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -1,4 +1,4 @@ -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Usage: # conda build -c nvidia -c rapidsai -c conda-forge -c defaults . @@ -21,6 +21,13 @@ build: - CUDAHOSTCXX - PARALLEL_LEVEL - VERSION_SUFFIX + - CCACHE_DIR + - CCACHE_NOHASHDIR + - CCACHE_COMPILERCHECK + - CMAKE_GENERATOR + - CMAKE_C_COMPILER_LAUNCHER + - CMAKE_CXX_COMPILER_LAUNCHER + - CMAKE_CUDA_COMPILER_LAUNCHER requirements: build: @@ -32,12 +39,18 @@ requirements: - nccl>=2.7 - ucx-py {{ minor_version }} - ucx-proc=*=gpu + - gtest + - faiss-proc=*=cuda + - libfaiss=1.6.3 + - gmock run: - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - nccl>=2.7 - ucx-py {{ minor_version }} - ucx-proc=*=gpu + - faiss-proc=*=cuda + - libfaiss=1.6.3 #test: # commands: diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index bd122fc1fb2..b2d537edaa2 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -14,9 +14,9 @@ # limitations under the License. #============================================================================= -cmake_minimum_required(VERSION 3.12..3.17 FATAL_ERROR) +cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR) -project(CUGRAPH VERSION 0.18.0 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA) ################################################################################################### # - build type ------------------------------------------------------------------------------------ @@ -33,6 +33,18 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES) "Debug" "Release" "MinSizeRel" "RelWithDebInfo") endif() +############################################################################## +# - User Options ------------------------------------------------------------ + +option(BUILD_CUGRAPH_MG_TESTS "Build cuGraph multigpu algorithm tests" OFF) + +################################################################################################### +# - user options ------------------------------------------------------------------------------ + +set(BLAS_LIBRARIES "" CACHE STRING + "Location of BLAS library for FAISS build.") +option(BUILD_STATIC_FAISS "Build the FAISS library for nearest neighbors search on GPU" OFF) + ################################################################################################### # - compiler options ------------------------------------------------------------------------------ @@ -90,10 +102,12 @@ message("-- Building for GPU_ARCHS = ${GPU_ARCHS}") foreach(arch ${GPU_ARCHS}) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${arch},code=sm_${arch}") set(GUNROCK_GENCODE_SM${arch} "ON") + set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${arch},code=sm_${arch}") endforeach() list(GET GPU_ARCHS -1 ptx) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${ptx},code=compute_${ptx}") +set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${ptx},code=compute_${ptx}") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") @@ -152,23 +166,24 @@ if(OpenMP_FOUND) endif(OpenMP_FOUND) +################################################################################################### +# - find blas ------------------------------------------------------------------------------------- + +if(NOT DEFINED BLAS_LIBRARIES) + find_package( BLAS REQUIRED ) +else() + message(STATUS "Manually setting BLAS to ${BLAS_LIBRARIES}") +endif() + ################################################################################################### # - find gtest ------------------------------------------------------------------------------------ if(BUILD_TESTS) - include(ConfigureGoogleTest) - - if(GTEST_FOUND) - message(STATUS - "Google C++ Testing Framework (Google Test) found in ${GTEST_ROOT}") - else() - message(AUTHOR_WARNING - "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.") - endif(GTEST_FOUND) + find_package(GTest REQUIRED) endif(BUILD_TESTS) ################################################################################################### -# - RMM ------------------------------------------------------------------------------------------- +# - find RMM -------------------------------------------------------------------------------------- find_path(RMM_INCLUDE "rmm" HINTS @@ -178,6 +193,24 @@ find_path(RMM_INCLUDE "rmm" message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}") +################################################################################################### +# - find NCCL ------------------------------------------------------------------------------------- + +if(NOT NCCL_PATH) + find_package(NCCL REQUIRED) +else() + message("-- Manually set NCCL PATH to ${NCCL_PATH}") + set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include) + set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so) +endif(NOT NCCL_PATH) + +################################################################################################### +# - find MPI - only enabled if MG tests are to be built + +if(BUILD_CUGRAPH_MG_TESTS) + find_package(MPI REQUIRED) +endif(BUILD_CUGRAPH_MG_TESTS) + ################################################################################################### # - Fetch Content --------------------------------------------------------------------------------- include(FetchContent) @@ -205,7 +238,7 @@ message("Fetching cuco") FetchContent_Declare( cuco GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git - GIT_TAG d965ed8dea8f56da8e260a6130dddf3ca351c45f + GIT_TAG 2196040f0562a0280292eebef5295d914f615e63 ) FetchContent_GetProperties(cuco) @@ -235,26 +268,60 @@ endif() set(LIBCUDACXX_INCLUDE_DIR "${libcudacxx_SOURCE_DIR}/include") message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") +# - CUHORNET +FetchContent_Declare( + cuhornet + GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git + GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad + GIT_SHALLOW true + SOURCE_SUBDIR hornet +) + +FetchContent_GetProperties(cuhornet) +if(NOT cuhornet_POPULATED) + message("populating cuhornet") + FetchContent_Populate(cuhornet) + # We are not using the cuhornet CMake targets, so no need to call `add_subdirectory()`. +endif() +set(CUHORNET_INCLUDE_DIR ${cuhornet_SOURCE_DIR} CACHE STRING "Path to cuhornet includes") + +# - raft - (header only) +# Only cloned if RAFT_PATH env variable is not defined +if(DEFINED ENV{RAFT_PATH}) + message(STATUS "RAFT_PATH environment variable detected.") + message(STATUS "RAFT_DIR set to $ENV{RAFT_PATH}") + set(RAFT_DIR "$ENV{RAFT_PATH}") + +else(DEFINED ENV{RAFT_PATH}) + message(STATUS "RAFT_PATH environment variable NOT detected, cloning RAFT") + + FetchContent_Declare( + raft + GIT_REPOSITORY https://github.com/rapidsai/raft.git + GIT_TAG 4a79adcb0c0e87964dcdc9b9122f242b5235b702 + SOURCE_SUBDIR raft + ) + + FetchContent_GetProperties(raft) + if(NOT raft_POPULATED) + message("populating raft") + FetchContent_Populate(raft) + # We are not using any raft CMake targets, so no need to call `add_subdirectory()`. + endif() + + set(RAFT_DIR "${raft_SOURCE_DIR}") +endif(DEFINED ENV{RAFT_PATH}) ################################################################################################### # - External Projects ----------------------------------------------------------------------------- # https://cmake.org/cmake/help/v3.0/module/ExternalProject.html -include(ExternalProject) - -# - CUHORNET -set(CUHORNET_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuhornet CACHE STRING "Path to cuhornet repo") -set(CUHORNET_INCLUDE_DIR ${CUHORNET_DIR}/src/cuhornet CACHE STRING "Path to cuhornet includes") +# FIXME: gunrock is the only external package still using ExternalProject +# instead of FetchContent. Consider migrating to FetchContent soon (this may +# require updates to the gunrock cmake files to support this). -ExternalProject_Add(cuhornet - GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad - PREFIX ${CUHORNET_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "" -) +include(ExternalProject) # - GUNROCK set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo") @@ -262,7 +329,7 @@ set(GUNROCK_INCLUDE_DIR ${GUNROCK_DIR}/src/gunrock_ext CACHE STRING "Path to gun ExternalProject_Add(gunrock_ext GIT_REPOSITORY https://github.com/gunrock/gunrock.git - GIT_TAG dev + GIT_TAG v1.2 PREFIX ${GUNROCK_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= -DGUNROCK_BUILD_SHARED_LIBS=OFF @@ -280,74 +347,61 @@ ExternalProject_Add(gunrock_ext ) add_library(gunrock STATIC IMPORTED) - add_dependencies(gunrock gunrock_ext) - set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a) -# - NCCL -if(NOT NCCL_PATH) - find_package(NCCL REQUIRED) +# - FAISS +# FIXME: The commit currently being fetched from faiss is using autotools which +# is more convenient to build with ExternalProjectAdd. +# Consider migrating to FetchContent once the tagged commit is changed. + +if(BUILD_STATIC_FAISS) + set(FAISS_DIR ${CMAKE_CURRENT_BINARY_DIR}/faiss CACHE STRING + "Path to FAISS source directory") + ExternalProject_Add(faiss + GIT_REPOSITORY https://github.com/facebookresearch/faiss.git + GIT_TAG a5b850dec6f1cd6c88ab467bfd5e87b0cac2e41d + CONFIGURE_COMMAND LIBS=-pthread + CPPFLAGS=-w + LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib + ${CMAKE_CURRENT_BINARY_DIR}/faiss/src/faiss/configure + --prefix=${CMAKE_CURRENT_BINARY_DIR}/faiss + --with-blas=${BLAS_LIBRARIES} + --with-cuda=${CUDA_TOOLKIT_ROOT_DIR} + --with-cuda-arch=${FAISS_GPU_ARCHS} + -v + PREFIX ${FAISS_DIR} + BUILD_COMMAND make -j${PARALLEL_LEVEL} VERBOSE=1 + BUILD_BYPRODUCTS ${FAISS_DIR}/lib/libfaiss.a + BUILD_ALWAYS 1 + INSTALL_COMMAND make -s install > /dev/null + UPDATE_COMMAND "" + BUILD_IN_SOURCE 1 + PATCH_COMMAND patch -p1 -N < ${CMAKE_CURRENT_SOURCE_DIR}/cmake/faiss_cuda11.patch || true) + + ExternalProject_Get_Property(faiss install_dir) + add_library(FAISS::FAISS STATIC IMPORTED) + add_dependencies(FAISS::FAISS faiss) + set_property(TARGET FAISS::FAISS PROPERTY + IMPORTED_LOCATION ${FAISS_DIR}/lib/libfaiss.a) + set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src") else() - message("-- Manually set NCCL PATH to ${NCCL_PATH}") - set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include) - set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so) -endif(NOT NCCL_PATH) - -# - raft - (header only) ----------------------------------------------------- - -# Only cloned if RAFT_PATH env variable is not defined - -if(DEFINED ENV{RAFT_PATH}) - message(STATUS "RAFT_PATH environment variable detected.") - message(STATUS "RAFT_DIR set to $ENV{RAFT_PATH}") - set(RAFT_DIR "$ENV{RAFT_PATH}") - - ExternalProject_Add(raft - DOWNLOAD_COMMAND "" - SOURCE_DIR ${RAFT_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "") - -else(DEFINED ENV{RAFT_PATH}) - message(STATUS "RAFT_PATH environment variable NOT detected, cloning RAFT") - set(RAFT_DIR ${CMAKE_CURRENT_BINARY_DIR}/raft CACHE STRING "Path to RAFT repo") - - ExternalProject_Add(raft - GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG f75d7b437bf1da3df749108161b8a0505fb6b7b3 - PREFIX ${RAFT_DIR} - CONFIGURE_COMMAND "" - BUILD_COMMAND "" - INSTALL_COMMAND "") - - # Redefining RAFT_DIR so it coincides with the one inferred by env variable. - set(RAFT_DIR "${RAFT_DIR}/src/raft/") -endif(DEFINED ENV{RAFT_PATH}) - + set(FAISS_INSTALL_DIR ENV{FAISS_ROOT}) + find_package(FAISS REQUIRED) +endif(BUILD_STATIC_FAISS) ################################################################################################### # - library targets ------------------------------------------------------------------------------- -# target_link_directories is added in cmake 3.13, and cmake advises to use this instead of -# link_directoires (we should switch to target_link_directories once 3.13 becomes the minimum -# required version). -link_directories( - # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the - # link directories for nvcc. - "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") - add_library(cugraph SHARED src/utilities/spmv_1D.cu src/utilities/cython.cu src/structure/graph.cu src/linear_assignment/hungarian.cu - src/link_analysis/pagerank.cu - src/link_analysis/pagerank_1D.cu src/link_analysis/gunrock_hits.cpp src/traversal/bfs.cu src/traversal/sssp.cu + src/traversal/tsp.cu src/link_prediction/jaccard.cu src/link_prediction/overlap.cu src/layout/force_atlas2.cu @@ -357,9 +411,10 @@ add_library(cugraph SHARED src/community/louvain.cu src/community/leiden.cu src/community/ktruss.cu - src/community/ECG.cu + src/community/ecg.cu src/community/triangles_counting.cu src/community/extract_subgraph_by_vertex.cu + src/community/egonet.cu src/cores/core_number.cu src/traversal/two_hop_neighbors.cu src/components/connectivity.cu @@ -367,6 +422,10 @@ add_library(cugraph SHARED src/centrality/betweenness_centrality.cu src/experimental/graph.cu src/experimental/graph_view.cu + src/experimental/coarsen_graph.cu + src/experimental/renumber_edgelist.cu + src/experimental/relabel.cu + src/experimental/induced_subgraph.cu src/experimental/bfs.cu src/experimental/sssp.cu src/experimental/pagerank.cu @@ -374,12 +433,17 @@ add_library(cugraph SHARED src/tree/mst.cu ) +target_link_directories(cugraph + PRIVATE + # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the + # link directories for nvcc. + "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") + # # NOTE: This dependency will force the building of cugraph to # wait until after cugunrock is constructed. # add_dependencies(cugraph gunrock_ext) -add_dependencies(cugraph raft) ################################################################################################### # - include paths --------------------------------------------------------------------------------- @@ -408,7 +472,7 @@ target_include_directories(cugraph # - link libraries -------------------------------------------------------------------------------- target_link_libraries(cugraph PRIVATE - gunrock cublas cusparse curand cusolver cudart cuda ${NCCL_LIBRARIES}) + gunrock cublas cusparse curand cusolver cudart cuda FAISS::FAISS ${NCCL_LIBRARIES}) if(OpenMP_CXX_FOUND) target_link_libraries(cugraph PRIVATE @@ -461,16 +525,23 @@ target_link_libraries(cugraph PRIVATE ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) +# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the +# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent cmake +# warnings about policy CMP0104. With this setting, arch flags must be manually +# set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism used in +# cpp/CMakeLists.txt for setting arch options). Run "cmake --help-policy +# CMP0104" for policy details. +# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to the +# findcudatoolkit features in cmake 3.17+ +set_target_properties(cugraph PROPERTIES + CUDA_ARCHITECTURES OFF) + ################################################################################################### # - generate tests -------------------------------------------------------------------------------- if(BUILD_TESTS) if(GTEST_FOUND) - # target_link_directories is added in cmake 3.13, and cmake advises to use this instead of - # link_directoires (we should switch to target_link_directories once 3.13 becomes the - # minimum required version). - link_directories(${GTEST_LIBRARY_DIR}) - add_subdirectory(${CMAKE_SOURCE_DIR}/tests) + add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests) endif(GTEST_FOUND) endif(BUILD_TESTS) diff --git a/cpp/cmake/Modules/ConfigureArrow.cmake b/cpp/cmake/Modules/ConfigureArrow.cmake deleted file mode 100644 index b27e53dd415..00000000000 --- a/cpp/cmake/Modules/ConfigureArrow.cmake +++ /dev/null @@ -1,98 +0,0 @@ -set(ARROW_ROOT ${CMAKE_BINARY_DIR}/arrow) - -set(ARROW_CMAKE_ARGS " -DARROW_WITH_LZ4=OFF" - " -DARROW_WITH_ZSTD=OFF" - " -DARROW_WITH_BROTLI=OFF" - " -DARROW_WITH_SNAPPY=OFF" - " -DARROW_WITH_ZLIB=OFF" - " -DARROW_BUILD_STATIC=ON" - " -DARROW_BUILD_SHARED=OFF" - " -DARROW_BOOST_USE_SHARED=ON" - " -DARROW_BUILD_TESTS=OFF" - " -DARROW_TEST_LINKAGE=OFF" - " -DARROW_TEST_MEMCHECK=OFF" - " -DARROW_BUILD_BENCHMARKS=OFF" - " -DARROW_IPC=ON" - " -DARROW_COMPUTE=OFF" - " -DARROW_CUDA=OFF" - " -DARROW_JEMALLOC=OFF" - " -DARROW_BOOST_VENDORED=OFF" - " -DARROW_PYTHON=OFF" - " -DARROW_USE_GLOG=OFF" - " -DCMAKE_VERBOSE_MAKEFILE=ON") - -configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/Arrow.CMakeLists.txt.cmake" - "${ARROW_ROOT}/CMakeLists.txt") - -file(MAKE_DIRECTORY "${ARROW_ROOT}/build") -file(MAKE_DIRECTORY "${ARROW_ROOT}/install") - -execute_process( - COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" . - RESULT_VARIABLE ARROW_CONFIG - WORKING_DIRECTORY ${ARROW_ROOT}) - -if(ARROW_CONFIG) - message(FATAL_ERROR "Configuring Arrow failed: " ${ARROW_CONFIG}) -endif(ARROW_CONFIG) - -set(PARALLEL_BUILD -j) -if($ENV{PARALLEL_LEVEL}) - set(NUM_JOBS $ENV{PARALLEL_LEVEL}) - set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}") -endif($ENV{PARALLEL_LEVEL}) - -if(${NUM_JOBS}) - if(${NUM_JOBS} EQUAL 1) - message(STATUS "ARROW BUILD: Enabling Sequential CMake build") - elseif(${NUM_JOBS} GREATER 1) - message(STATUS "ARROW BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs") - endif(${NUM_JOBS} EQUAL 1) -else() - message(STATUS "ARROW BUILD: Enabling Parallel CMake build with all threads") -endif(${NUM_JOBS}) - -execute_process( - COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD} - RESULT_VARIABLE ARROW_BUILD - WORKING_DIRECTORY ${ARROW_ROOT}/build) - -if(ARROW_BUILD) - message(FATAL_ERROR "Building Arrow failed: " ${ARROW_BUILD}) -endif(ARROW_BUILD) - -set(ARROW_GENERATED_IPC_DIR - "${ARROW_ROOT}/build/src/arrow/ipc") - -configure_file(${ARROW_GENERATED_IPC_DIR}/File_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/File_generated.h COPYONLY) -configure_file(${ARROW_GENERATED_IPC_DIR}/Message_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Message_generated.h COPYONLY) -configure_file(${ARROW_GENERATED_IPC_DIR}/Schema_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Schema_generated.h COPYONLY) -configure_file(${ARROW_GENERATED_IPC_DIR}/Tensor_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Tensor_generated.h COPYONLY) - -message(STATUS "Arrow installed here: " ${ARROW_ROOT}/install) -set(ARROW_LIBRARY_DIR "${ARROW_ROOT}/install/lib") -set(ARROW_INCLUDE_DIR "${ARROW_ROOT}/install/include") - -find_library(ARROW_LIB arrow - NO_DEFAULT_PATH - HINTS "${ARROW_LIBRARY_DIR}") - -if(ARROW_LIB) - message(STATUS "Arrow library: " ${ARROW_LIB}) - set(ARROW_FOUND TRUE) -endif(ARROW_LIB) - -set(FLATBUFFERS_ROOT "${ARROW_ROOT}/build/flatbuffers_ep-prefix/src/flatbuffers_ep-install") - -message(STATUS "FlatBuffers installed here: " ${FLATBUFFERS_ROOT}) -set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_ROOT}/include") -set(FLATBUFFERS_LIBRARY_DIR "${FLATBUFFERS_ROOT}/lib") - -add_definitions(-DARROW_METADATA_V4) -add_definitions(-DARROW_VERSION=1210) - - - - - - diff --git a/cpp/cmake/Modules/ConfigureGoogleTest.cmake b/cpp/cmake/Modules/ConfigureGoogleTest.cmake deleted file mode 100644 index 9fac40f4649..00000000000 --- a/cpp/cmake/Modules/ConfigureGoogleTest.cmake +++ /dev/null @@ -1,49 +0,0 @@ -set(GTEST_ROOT "${CMAKE_BINARY_DIR}/googletest") - -set(GTEST_CMAKE_ARGS "") - #" -Dgtest_build_samples=ON" - #" -DCMAKE_VERBOSE_MAKEFILE=ON") - -configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/GoogleTest.CMakeLists.txt.cmake" - "${GTEST_ROOT}/CMakeLists.txt") - -file(MAKE_DIRECTORY "${GTEST_ROOT}/build") -file(MAKE_DIRECTORY "${GTEST_ROOT}/install") - -execute_process(COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} . - RESULT_VARIABLE GTEST_CONFIG - WORKING_DIRECTORY ${GTEST_ROOT}) - -if(GTEST_CONFIG) - message(FATAL_ERROR "Configuring GoogleTest failed: " ${GTEST_CONFIG}) -endif(GTEST_CONFIG) - -set(PARALLEL_BUILD -j) -if($ENV{PARALLEL_LEVEL}) - set(NUM_JOBS $ENV{PARALLEL_LEVEL}) - set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}") -endif($ENV{PARALLEL_LEVEL}) - -if(${NUM_JOBS}) - if(${NUM_JOBS} EQUAL 1) - message(STATUS "GTEST BUILD: Enabling Sequential CMake build") - elseif(${NUM_JOBS} GREATER 1) - message(STATUS "GTEST BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs") - endif(${NUM_JOBS} EQUAL 1) -else() - message(STATUS "GTEST BUILD: Enabling Parallel CMake build with all threads") -endif(${NUM_JOBS}) - -execute_process(COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD} - RESULT_VARIABLE GTEST_BUILD - WORKING_DIRECTORY ${GTEST_ROOT}/build) - -if(GTEST_BUILD) - message(FATAL_ERROR "Building GoogleTest failed: " ${GTEST_BUILD}) -endif(GTEST_BUILD) - -message(STATUS "GoogleTest installed here: " ${GTEST_ROOT}/install) -set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/install/include") -set(GTEST_LIBRARY_DIR "${GTEST_ROOT}/install/lib") -set(GTEST_FOUND TRUE) - diff --git a/cpp/cmake/Modules/FindFAISS.cmake b/cpp/cmake/Modules/FindFAISS.cmake new file mode 100644 index 00000000000..7c456edfeef --- /dev/null +++ b/cpp/cmake/Modules/FindFAISS.cmake @@ -0,0 +1,98 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Based on FindPNG.cmake from cmake 3.14.3 + +#[=======================================================================[.rst: +FindFAISS +-------- + +Template to generate FindPKG_NAME.cmake CMake modules + +Find FAISS + +Imported targets +^^^^^^^^^^^^^^^^ + +This module defines the following :prop_tgt:`IMPORTED` target: + +``FAISS::FAISS`` + The libFAISS library, if found. + +Result variables +^^^^^^^^^^^^^^^^ + +This module will set the following variables in your project: + +``FAISS_INCLUDE_DIRS`` + where to find FAISS.hpp , etc. +``FAISS_LIBRARIES`` + the libraries to link against to use libFAISS. +``FAISS_FOUND`` + If false, do not try to use FAISS. +``FAISS_VERSION_STRING`` + the version of the FAISS library found + +#]=======================================================================] + +find_path(FAISS_LOCATION faiss/IndexFlat.h + HINTS ${FAISS_INSTALL_DIR} + PATH_SUFFIXES include include/) + +list(APPEND FAISS_NAMES faiss libfaiss) +set(_FAISS_VERSION_SUFFIXES ) + +foreach(v IN LISTS _FAISS_VERSION_SUFFIXES) + list(APPEND FAISS_NAMES faiss${v} libfaiss${v}) + list(APPEND FAISS_NAMES faiss.${v} libfaiss.${v}) +endforeach() +unset(_FAISS_VERSION_SUFFIXES) + +find_library(FAISS_LIBRARY_RELEASE NAMES ${FAISS_NAMES} + HINTS ${FAISS_INSTALL_DIR} + PATH_SUFFIXES lib) + +include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake) +select_library_configurations(FAISS) +mark_as_advanced(FAISS_LIBRARY_RELEASE) +unset(FAISS_NAMES) + +# Set by select_library_configurations(), but we want the one from +# find_package_handle_standard_args() below. +unset(FAISS_FOUND) + +if (FAISS_LIBRARY AND FAISS_LOCATION) + set(FAISS_INCLUDE_DIRS ${FAISS_LOCATION} ) + set(FAISS_LIBRARY ${FAISS_LIBRARY}) + + if(NOT TARGET FAISS::FAISS) + add_library(FAISS::FAISS UNKNOWN IMPORTED) + set_target_properties(FAISS::FAISS PROPERTIES + INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIRS}") + if(EXISTS "${FAISS_LIBRARY}") + set_target_properties(FAISS::FAISS PROPERTIES + IMPORTED_LINK_INTERFACE_LANGUAGES "CXX" + IMPORTED_LOCATION "${FAISS_LIBRARY}") + endif() + endif() +endif () + + +include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake) +find_package_handle_standard_args(FAISS + REQUIRED_VARS FAISS_LIBRARY FAISS_LOCATION + VERSION_VAR FAISS_VERSION_STRING) + +mark_as_advanced(FAISS_LOCATION FAISS_LIBRARY) diff --git a/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake b/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake deleted file mode 100644 index b1eaf3f0efa..00000000000 --- a/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake +++ /dev/null @@ -1,19 +0,0 @@ -cmake_minimum_required(VERSION 3.12) - -include(ExternalProject) - -ExternalProject_Add(Arrow - GIT_REPOSITORY https://github.com/apache/arrow.git - GIT_TAG apache-arrow-0.12.1 - SOURCE_DIR "${ARROW_ROOT}/arrow" - SOURCE_SUBDIR "cpp" - BINARY_DIR "${ARROW_ROOT}/build" - INSTALL_DIR "${ARROW_ROOT}/install" - CMAKE_ARGS ${ARROW_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${ARROW_ROOT}/install) - - - - - - - diff --git a/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake b/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake deleted file mode 100644 index 66e1dc85a50..00000000000 --- a/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake +++ /dev/null @@ -1,19 +0,0 @@ -cmake_minimum_required(VERSION 3.12) - -include(ExternalProject) - -ExternalProject_Add(GoogleTest - GIT_REPOSITORY https://github.com/google/googletest.git - GIT_TAG release-1.8.0 - SOURCE_DIR "${GTEST_ROOT}/googletest" - BINARY_DIR "${GTEST_ROOT}/build" - INSTALL_DIR "${GTEST_ROOT}/install" - CMAKE_ARGS ${GTEST_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${GTEST_ROOT}/install) - - - - - - - - diff --git a/cpp/cmake/faiss_cuda11.patch b/cpp/cmake/faiss_cuda11.patch new file mode 100644 index 00000000000..496ca0e7b23 --- /dev/null +++ b/cpp/cmake/faiss_cuda11.patch @@ -0,0 +1,40 @@ +diff --git a/configure b/configure +index ed40dae..f88ed0a 100755 +--- a/configure ++++ b/configure +@@ -2970,7 +2970,7 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex + ac_compiler_gnu=$ac_cv_cxx_compiler_gnu + + +- ax_cxx_compile_alternatives="11 0x" ax_cxx_compile_cxx11_required=true ++ ax_cxx_compile_alternatives="14 11 0x" ax_cxx_compile_cxx11_required=true + ac_ext=cpp + ac_cpp='$CXXCPP $CPPFLAGS' + ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5' +diff --git a/gpu/utils/DeviceDefs.cuh b/gpu/utils/DeviceDefs.cuh +index 89d3dda..bc0f9b5 100644 +--- a/gpu/utils/DeviceDefs.cuh ++++ b/gpu/utils/DeviceDefs.cuh +@@ -13,7 +13,7 @@ + namespace faiss { namespace gpu { + + #ifdef __CUDA_ARCH__ +-#if __CUDA_ARCH__ <= 750 ++#if __CUDA_ARCH__ <= 800 + constexpr int kWarpSize = 32; + #else + #error Unknown __CUDA_ARCH__; please define parameters for compute capability +diff --git a/gpu/utils/MatrixMult-inl.cuh b/gpu/utils/MatrixMult-inl.cuh +index ede225e..4f7eb44 100644 +--- a/gpu/utils/MatrixMult-inl.cuh ++++ b/gpu/utils/MatrixMult-inl.cuh +@@ -51,6 +51,9 @@ rawGemm(cublasHandle_t handle, + auto cBT = GetCudaType::Type; + + // Always accumulate in f32 ++# if __CUDACC_VER_MAJOR__ >= 11 ++ cublasSetMathMode(handle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION); ++# endif + return cublasSgemmEx(handle, transa, transb, m, n, k, + &fAlpha, A, cAT, lda, + B, cBT, ldb, diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index a57e550521e..c666bce23ad 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,7 +14,7 @@ * limitations under the License. */ #pragma once - +#include #include #include #include @@ -22,78 +22,6 @@ namespace cugraph { -/** - * @brief Find the PageRank vertex values for a graph. - * - * cuGraph computes an approximation of the Pagerank eigenvector using the power method. - * The number of iterations depends on the properties of the network itself; it increases - * when the tolerance descreases and/or alpha increases toward the limiting value of 1. - * The user is free to use default values or to provide inputs for the initial guess, - * tolerance and maximum number of iterations. - - * - * @throws cugraph::logic_error with a custom message when an error - occurs. - * - * @tparam VT Type of vertex identifiers. Supported value : int (signed, - 32-bit) - * @tparam ET Type of edge identifiers. Supported value : int (signed, - 32-bit) - * @tparam WT Type of edge weights. Supported value : float or double. - * - * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, - the multi GPU version will be selected. - * @param[in] graph cuGraph graph descriptor, should contain the connectivity - information as a transposed adjacency list (CSC). Edge weights are not used for this algorithm. - * @param[in] alpha The damping factor alpha represents the probability to follow - an outgoing edge, standard value is 0.85. Thus, 1.0-alpha is the probability to “teleport” to a - random vertex. Alpha should be greater than 0.0 and strictly lower than 1.0. - * The initial guess must not be the vector of 0s. Any value other - than 1 or 0 is treated as an invalid value. - * @param[in] pagerank Array of size V. Should contain the initial guess if - has_guess=true. In this case the initial guess cannot be the vector of 0s. Memory is provided and - owned by the caller. - * @param[in] personalization_subset_size (optional) Supported on single-GPU, on the roadmap for - Multi-GPU. The number of vertices for to personalize. Initialized to 0 by default. - * @param[in] personalization_subset (optional) Supported on single-GPU, on the roadmap for - Multi-GPU..= Array of size personalization_subset_size containing vertices for running personalized - pagerank. Initialized to nullptr by default. Memory is provided and owned by the caller. - * @param[in] personalization_values (optional) Supported on single-GPU, on the roadmap for - Multi-GPU. Array of size personalization_subset_size containing values associated with - personalization_subset vertices. Initialized to nullptr by default. Memory is provided and owned by - the caller. - * @param[in] tolerance Supported on single-GPU. Set the tolerance the approximation, - this parameter should be a small magnitude value. - * The lower the tolerance the better the approximation. If this - value is 0.0f, cuGraph will use the default value which is 1.0E-5. - * Setting too small a tolerance can lead to non-convergence due - to numerical roundoff. Usually values between 0.01 and 0.00001 are acceptable. - * @param[in] max_iter (optional) The maximum number of iterations before an answer is - returned. This can be used to limit the execution time and do an early exit before the solver - reaches the convergence tolerance. - * If this value is lower or equal to 0 cuGraph will use the - default value, which is 500. - * @param[in] has_guess (optional) Supported on single-GPU. This parameter is used to - notify cuGraph if it should use a user-provided initial guess. False means the user does not have a - guess, in this case cuGraph will use a uniform vector set to 1/V. - * If the value is True, cuGraph will read the pagerank parameter - and use this as an initial guess. - * @param[out] *pagerank The PageRank : pagerank[i] is the PageRank of vertex i. Memory - remains provided and owned by the caller. - * - */ -template -void pagerank(raft::handle_t const &handle, - GraphCSCView const &graph, - WT *pagerank, - VT personalization_subset_size = 0, - VT *personalization_subset = nullptr, - WT *personalization_values = nullptr, - double alpha = 0.85, - double tolerance = 1e-5, - int64_t max_iter = 500, - bool has_guess = false); - /** * @brief Compute jaccard similarity coefficient for all vertices * @@ -264,6 +192,44 @@ void force_atlas2(GraphCOOView &graph, bool verbose = false, internals::GraphBasedDimRedCallback *callback = nullptr); +/** + * @brief Finds an approximate solution to the traveling salesperson problem (TSP). + * cuGraph computes an approximation of the TSP problem using hill climbing + * optimization. + * + * The current implementation does not support a weighted graph. + * + * @throws cugraph::logic_error when an error occurs. + * @param[in] handle Library handle (RAFT). If a communicator is set in the + * handle, the multi GPU version will be selected. + * @param[in] vtx_ptr Device array containing the vertex identifiers used + * to initialize the route. + * @param[in] x_pos Device array containing starting x-axis positions. + * @param[in] y_pos Device array containing starting y-axis positions. + * @param[in] nodes Number of cities. + * @param[in] restarts Number of starts to try. The more restarts, + * the better the solution will be approximated. The number of restarts depends on the problem + * size and should be kept low for instances above 2k cities. + * @param[in] beam_search Specify if the initial solution should use KNN + * for an approximation solution. + * @param[in] k Beam width to use in the search. + * @param[in] nstart Start from a specific position. + * @param[in] verbose Logs configuration and iterative improvement. + * @param[out] route Device array containing the returned route. + * + */ +float traveling_salesperson(raft::handle_t &handle, + int const *vtx_ptr, + float const *x_pos, + float const *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route); + /** * @brief Compute betweenness centrality for a graph * @@ -815,6 +781,7 @@ template std::unique_ptr> extract_subgraph_vertex(GraphCOOView const &graph, VT const *vertices, VT num_vertices); +} // namespace subgraph /** * @brief Wrapper function for Nvgraph balanced cut clustering @@ -837,7 +804,6 @@ std::unique_ptr> extract_subgraph_vertex(GraphCOOView @@ -1191,6 +1157,35 @@ void katz_centrality(raft::handle_t const &handle, bool has_initial_guess = false, bool normalize = false, bool do_expensive_check = false); - +/** + * @brief returns induced EgoNet subgraph(s) of neighbors centered at nodes in source_vertex within + * a given radius. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object of, we extract induced egonet subgraphs from @p graph_view. + * @param source_vertex Pointer to egonet center vertices (size == @p n_subgraphs). + * @param n_subgraphs Number of induced EgoNet subgraphs to extract (ie. number of elements in @p + * source_vertex). + * @param radius Include all neighbors of distance <= radius from @p source_vertex. + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector, rmm::device_uvector> Quadraplet of edge source vertices, + * edge destination vertices, edge weights, and edge offsets for each induced EgoNet subgraph. + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t *source_vertex, + vertex_t n_subgraphs, + vertex_t radius); } // namespace experimental } // namespace cugraph diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index bf56b2e6f80..3ac2e2163c6 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include #include +#include +#include #include #include @@ -24,8 +26,10 @@ #include #include +#include #include +#include #include namespace cugraph { @@ -137,6 +141,38 @@ struct degree_from_offsets_t { __device__ edge_t operator()(vertex_t v) { return offsets[v + 1] - offsets[v]; } }; +template +struct compute_gpu_id_from_vertex_t { + int comm_size{0}; + + __device__ int operator()(vertex_t v) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + return hash_func(v) % comm_size; + } +}; + +template +struct compute_gpu_id_from_edge_t { + bool hypergraph_partitioned{false}; + int comm_size{0}; + int row_comm_size{0}; + int col_comm_size{0}; + + __device__ int operator()(vertex_t major, vertex_t minor) const + { + cuco::detail::MurmurHash3_32 hash_func{}; + auto major_comm_rank = static_cast(hash_func(major) % comm_size); + auto minor_comm_rank = static_cast(hash_func(minor) % comm_size); + if (hypergraph_partitioned) { + return (minor_comm_rank / col_comm_size) * row_comm_size + (major_comm_rank % row_comm_size); + } else { + return (major_comm_rank - (major_comm_rank % row_comm_size)) + + (minor_comm_rank / col_comm_size); + } + } +}; + } // namespace detail } // namespace experimental } // namespace cugraph diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index 592294c8967..cc21f7c5013 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -62,7 +62,7 @@ class graph_t> const &edge_lists, + std::vector> const &edgelists, partition_t const &partition, vertex_t number_of_vertices, edge_t number_of_edges, @@ -70,7 +70,7 @@ class graph_t view() + graph_view_t view() const { std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr); std::vector indices(adj_matrix_partition_indices_.size(), nullptr); @@ -124,7 +124,7 @@ class graph_t const &edge_list, + edgelist_t const &edgelist, vertex_t number_of_vertices, graph_properties_t properties, bool sorted_by_degree, @@ -132,7 +132,7 @@ class graph_tget_number_of_vertices(); } - graph_view_t view() + graph_view_t view() const { return graph_view_t( *(this->get_handle_ptr()), diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp new file mode 100644 index 00000000000..7b4bb466b97 --- /dev/null +++ b/cpp/include/experimental/graph_functions.hpp @@ -0,0 +1,291 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief renumber edgelist (multi-GPU) + * + * This function assumes that edges are pre-shuffled to their target processes using the + * compute_gpu_id_from_edge_t functor. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex + * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to + * every (major, minor) pair should return the local GPU ID for this function to work (edges should + * be pre-shuffled). + * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is + * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). + * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t + * functor to every (major, minor) pair should return the local GPU ID for this function to work + * (edges should be pre-shuffled). + * @param num_edgelist_edges Number of edges in the edgelist. + * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning + * (this flag will be removed in the future). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return std::tuple, partition_t, vertex_t, edge_t> + * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to + * this process in multi-GPU), partition_t object storing graph partitioning information, total + * number of vertices, and total number of edges. + */ +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const& handle, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check = false); + +/** + * @brief renumber edgelist (single-GPU) + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex + * IDs are updated in-place ([INOUT] parameter). + * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is + * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). + * Vertex IDs are updated in-place ([INOUT] parameter). + * @param num_edgelist_edges Number of edges in the edgelist. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set + * of vertices. + */ +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const& handle, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check = false); + +/** + * @brief renumber edgelist (multi-GPU) + * + * This version takes the vertex set in addition; this allows renumbering to include isolated + * vertices. This function assumes that vertices and edges are pre-shuffled to their target + * processes using the compute_gpu_id_from_vertex_t & compute_gpu_id_from_edge_t functors, + * respectively. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param local_vertices Part of the entire set of vertices in the graph to be renumbered. Applying + * the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function + * to work (vertices should be pre-shuffled). + * @param num_local_vertices Number of local vertices. + * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex + * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to + * every (major, minor) pair should return the local GPU ID for this function to work (edges should + * be pre-shuffled). + * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is + * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). + * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t + * functor to every (major, minor) pair should return the local GPU ID for this function to work + * (edges should be pre-shuffled). + * @param num_edgelist_edges Number of edges in the edgelist. + * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning + * (this flag will be removed in the future). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return std::tuple, partition_t, vertex_t, edge_t> + * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to + * this process in multi-GPU), partition_t object storing graph partitioning information, total + * number of vertices, and total number of edges. + */ +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const& handle, + vertex_t const* local_vertices, + vertex_t num_local_vertices, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check = false); + +/** + * @brief renumber edgelist (single-GPU) + * + * This version takes the vertex set in addition; this allows renumbering to include isolated + * vertices. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices The entire set of vertices in the graph to be renumbered. + * @param num_vertices Number of vertices. + * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as + * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex + * IDs are updated in-place ([INOUT] parameter). + * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is + * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored). + * Vertex IDs are updated in-place ([INOUT] parameter). + * @param num_edgelist_edges Number of edges in the edgelist. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set + * of vertices. + */ +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const& handle, + vertex_t const* vertices, + vertex_t num_vertices, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check = false); + +/** + * @brief Compute the coarsened graph. + * + * Aggregates the vertices with the same label to a new vertex in the output coarsened graph. + * Multi-edges in the coarsened graph are collapsed to a single edge with its weight equal to the + * sum of multi-edge weights. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as + * transposed. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object of the input graph to be coarsened. + * @param labels Vertex labels (assigned to this process in multi-GPU) to be used in coarsening. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return std::tuple>, rmm::device_uvector> Tuple of the coarsened graph and labels mapped to the + * vertices (assigned to this process in multi-GPU) in the coarsened graph. + */ +template +std::tuple>, + rmm::device_uvector> +coarsen_graph( + raft::handle_t const& handle, + graph_view_t const& graph_view, + vertex_t const* labels, + bool do_expensive_check = false); + +/** + * @brief Relabel old labels to new labels. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param old_new_label_pairs Pairs of an old label and the corresponding new label (each process + * holds only part of the entire old labels and the corresponding new labels; partitioning can be + * arbitrary). + * @param num_label_pairs Number of (old, new) label pairs. + * @param labels Labels to be relabeled. This initially holds old labels. Old labels are updated to + * new labels in-place ([INOUT] parameter). + * @param num_labels Number of labels to be relabeled. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return rmm::device_uvector New labels corresponding to the @p old_labels. + */ +template +void relabel(raft::handle_t const& handle, + std::tuple old_new_label_pairs, + vertex_t num_label_pairs, + vertex_t* labels /* [INOUT] */, + vertex_t num_labels, + bool do_expensive_check = false); + +/** + * @brief extract induced subgraph(s). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. + * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as + * transposed. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * or multi-GPU (true). + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object, we extract induced subgraphs from @p graph_view. + * @param subgraph_offsets Pointer to subgraph vertex offsets (size == @p num_subgraphs + 1). + * @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p + * num_subgraphs]). The elements of @p subgraph_vertices for each subgraph should be sorted in + * ascending order and unique. + * @param num_subgraphs Number of induced subgraphs to extract. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return std::tuple, rmm::device_uvector, + * rmm::device_uvector, rmm::device_uvector> Quadraplet of edge major (destination + * if @p store_transposed is true, source otherwise) vertices, edge minor (source if @p + * store_transposed is true, destination otherwise) vertices, edge weights, and edge offsets for + * each induced subgraphs (size == num_subgraphs + 1). The sizes of the edge major & minor vertices + * are edge_offsets[num_subgraphs]. The size of the edge weights is either + * edge_offsets[num_subgraphs] (if @p graph_view is weighted) or 0 (if @p graph_view is unweighted). + */ +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs( + raft::handle_t const& handle, + graph_view_t const& graph_view, + size_t const* subgraph_offsets /* size == num_subgraphs + 1 */, + vertex_t const* subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */, + size_t num_subgraphs, + bool do_expensive_check = false); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index ba327047b1d..d2ae1150970 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -332,6 +332,7 @@ class graph_view_t 0; } + // FIXME: this should be removed once MNMG Louvain is updated to use graph primitives partition_t get_partition() const { return partition_; } vertex_t get_number_of_local_vertices() const @@ -399,6 +400,11 @@ class graph_view_t adj_matrix_partition_offsets_{}; std::vector adj_matrix_partition_indices_{}; std::vector adj_matrix_partition_weights_{}; + std::vector adj_matrix_partition_number_of_edges_{}; partition_t partition_{}; @@ -567,6 +574,12 @@ class graph_view_tget_number_of_vertices(); } + edge_t get_number_of_local_adj_matrix_partition_edges(size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return this->get_number_of_edges(); + } + vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const { assert(adj_matrix_partition_idx == 0); @@ -629,6 +642,7 @@ class graph_view_t segment_offsets_{}; // segment offsets based on vertex degree, relevant // only if sorted_by_global_degree is true }; diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/matrix_partition_device.cuh index 53796530f60..b41119e7be6 100644 --- a/cpp/include/matrix_partition_device.cuh +++ b/cpp/include/matrix_partition_device.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -30,11 +30,14 @@ class matrix_partition_device_base_t { public: matrix_partition_device_base_t(edge_t const* offsets, vertex_t const* indices, - weight_t const* weights) - : offsets_(offsets), indices_(indices), weights_(weights) + weight_t const* weights, + edge_t number_of_edges) + : offsets_(offsets), indices_(indices), weights_(weights), number_of_edges_(number_of_edges) { } + __host__ __device__ edge_t get_number_of_edges() const { return number_of_edges_; } + __device__ thrust::tuple get_local_edges( vertex_t major_offset) const noexcept { @@ -50,11 +53,17 @@ class matrix_partition_device_base_t { return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset); } + __device__ edge_t get_local_offset(vertex_t major_offset) const noexcept + { + return *(offsets_ + major_offset); + } + private: // should be trivially copyable to device edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; weight_t const* weights_{nullptr}; + edge_t number_of_edges_{0}; }; template @@ -73,7 +82,8 @@ class matrix_partition_device_t( graph_view.offsets(partition_idx), graph_view.indices(partition_idx), - graph_view.weights(partition_idx)), + graph_view.weights(partition_idx), + graph_view.get_number_of_local_adj_matrix_partition_edges(partition_idx)), major_first_(GraphViewType::is_adj_matrix_transposed ? graph_view.get_local_adj_matrix_partition_col_first(partition_idx) : graph_view.get_local_adj_matrix_partition_row_first(partition_idx)), @@ -93,7 +103,7 @@ class matrix_partition_device_t( - graph_view.offsets(), graph_view.indices(), graph_view.weights()), + graph_view.offsets(), + graph_view.indices(), + graph_view.weights(), + graph_view.get_number_of_edges()), number_of_vertices_(graph_view.get_number_of_vertices()) { assert(partition_idx == 0); diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/patterns/any_of_adj_matrix_row.cuh index 199e7c230ef..a367ec2a50c 100644 --- a/cpp/include/patterns/any_of_adj_matrix_row.cuh +++ b/cpp/include/patterns/any_of_adj_matrix_row.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include #include diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh index 760775c03d4..d4559de06af 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh +++ b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,10 @@ #include #include #include -#include +#include +#include #include +#include #include #include @@ -120,10 +122,10 @@ void copy_to_matrix_major(raft::handle_t const& handle, for (int i = 0; i < row_comm_size; ++i) { rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i], handle.get_stream()); - auto rx_tmp_buffer = - allocate_comm_buffer::value_type>( - rx_counts[i], handle.get_stream()); - auto rx_value_first = get_comm_buffer_begin< + auto rx_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_counts[i], + handle.get_stream()); + auto rx_value_first = get_dataframe_buffer_begin< typename std::iterator_traits::value_type>(rx_tmp_buffer); if (row_comm_rank == i) { @@ -173,12 +175,6 @@ void copy_to_matrix_major(raft::handle_t const& handle, map_first, matrix_major_value_output_first); } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope - // once control flow exits this block (FIXME: we can reduce stream - // synchronization if we compute the maximum rx_counts and - // allocate rx_tmp_buffer outside the loop) } } } else { @@ -219,7 +215,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle, // partitioning auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - // FIXME: this branch may no longer necessary with NCCL backend + // FIXME: this branch may be no longer necessary with NCCL backend if (comm_src_rank == comm_rank) { assert(comm_dst_rank == comm_rank); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -314,12 +310,11 @@ void copy_to_matrix_minor(raft::handle_t const& handle, vertex_partition_device_t vertex_partition(graph_view); rmm::device_uvector dst_vertices(rx_count, handle.get_stream()); - auto dst_tmp_buffer = - allocate_comm_buffer::value_type>( - rx_count, handle.get_stream()); - auto dst_value_first = - get_comm_buffer_begin::value_type>( - dst_tmp_buffer); + auto dst_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_count, + handle.get_stream()); + auto dst_value_first = get_dataframe_buffer_begin< + typename std::iterator_traits::value_type>(dst_tmp_buffer); if (comm_src_rank == comm_rank) { thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertex_first, @@ -335,10 +330,10 @@ void copy_to_matrix_minor(raft::handle_t const& handle, vertex_value_input_first, dst_value_first); } else { - auto src_tmp_buffer = - allocate_comm_buffer::value_type>( - tx_count, handle.get_stream()); - auto src_value_first = get_comm_buffer_begin< + auto src_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(tx_count, + handle.get_stream()); + auto src_value_first = get_dataframe_buffer_begin< typename std::iterator_traits::value_type>(src_tmp_buffer); auto map_first = @@ -369,10 +364,6 @@ void copy_to_matrix_minor(raft::handle_t const& handle, rx_count, comm_src_rank, handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary src_tmp_buffer will become out-of-scope - // once control flow exits this block } // FIXME: now we can clear tx_tmp_buffer @@ -383,10 +374,10 @@ void copy_to_matrix_minor(raft::handle_t const& handle, for (int i = 0; i < col_comm_size; ++i) { rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i], handle.get_stream()); - auto rx_tmp_buffer = - allocate_comm_buffer::value_type>( - rx_counts[i], handle.get_stream()); - auto rx_value_first = get_comm_buffer_begin< + auto rx_tmp_buffer = allocate_dataframe_buffer< + typename std::iterator_traits::value_type>(rx_counts[i], + handle.get_stream()); + auto rx_value_first = get_dataframe_buffer_begin< typename std::iterator_traits::value_type>(rx_tmp_buffer); // FIXME: these broadcast operations can be placed between ncclGroupStart() and @@ -423,17 +414,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle, map_first, matrix_minor_value_output_first); } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope - // once control flow exits this block (FIXME: we can reduce stream - // synchronization if we compute the maximum rx_counts and - // allocate rx_tmp_buffer outside the loop) } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary dst_tmp_buffer will become out-of-scope once - // control flow exits this block } } else { assert(graph_view.get_number_of_local_vertices() == diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh index f3c36897dd6..3059cf95852 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,7 +19,8 @@ #include #include #include -#include +#include +#include #include #include @@ -377,8 +378,8 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, ? graph_view.get_number_of_local_adj_matrix_partition_rows() : graph_view.get_number_of_local_adj_matrix_partition_cols() : vertex_t{0}; - auto minor_tmp_buffer = allocate_comm_buffer(minor_tmp_buffer_size, handle.get_stream()); - auto minor_buffer_first = get_comm_buffer_begin(minor_tmp_buffer); + auto minor_tmp_buffer = allocate_dataframe_buffer(minor_tmp_buffer_size, handle.get_stream()); + auto minor_buffer_first = get_dataframe_buffer_begin(minor_tmp_buffer); if (in != GraphViewType::is_adj_matrix_transposed) { auto minor_init = init; @@ -424,8 +425,9 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i) : vertex_t{0}; } - auto major_tmp_buffer = allocate_comm_buffer(major_tmp_buffer_size, handle.get_stream()); - auto major_buffer_first = get_comm_buffer_begin(major_tmp_buffer); + auto major_tmp_buffer = + allocate_dataframe_buffer(major_tmp_buffer_size, handle.get_stream()); + auto major_buffer_first = get_dataframe_buffer_begin(major_tmp_buffer); auto major_init = T{}; if (in == GraphViewType::is_adj_matrix_transposed) { @@ -523,12 +525,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, handle.get_stream()); } } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary major_tmp_buffer will become out-of-scope once - // control flow exits this block (FIXME: we can reduce stream - // synchronization if we compute the maximum major_tmp_buffer_size and - // allocate major_tmp_buffer outside the loop) } if (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) { @@ -590,10 +586,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, } } } - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is as necessary minor_tmp_buffer will become out-of-scope once - // control flow exits this block } } // namespace detail @@ -627,7 +619,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. - * @param init Initial value to be added to the reduced @e_op return values for each vertex. + * @param init Initial value to be added to the reduced @p e_op return values for each vertex. * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p @@ -689,7 +681,7 @@ void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle, * adj_matrix_col_value_input_first + j) (where i is in [0, * graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. - * @param init Initial value to be added to the reduced @e_op return values for each vertex. + * @param init Initial value to be added to the reduced @p e_op return values for each vertex. * @param vertex_value_output_first Iterator pointing to the vertex property variables for the * first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` * (exclusive) is deduced as @p vertex_value_output_first + @p diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh new file mode 100644 index 00000000000..785f8197aff --- /dev/null +++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size = 128; + +template +__global__ void for_all_major_for_all_nbr_low_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + VertexIterator adj_matrix_minor_key_first, + typename GraphViewType::vertex_type* major_vertices, + typename GraphViewType::vertex_type* minor_keys, + typename GraphViewType::weight_type* key_aggregated_edge_weights, + typename GraphViewType::vertex_type invalid_vertex) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid); + + while (idx < static_cast(major_last - major_first)) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + auto major_offset = major_start_offset + idx; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_offset)); + if (local_degree > 0) { + auto local_offset = matrix_partition.get_local_offset(major_offset); + auto minor_key_first = thrust::make_transform_iterator( + indices, [matrix_partition, adj_matrix_minor_key_first] __device__(auto minor) { + return *(adj_matrix_minor_key_first + + matrix_partition.get_minor_offset_from_minor_nocheck(minor)); + }); + thrust::copy( + thrust::seq, minor_key_first, minor_key_first + local_degree, minor_keys + local_offset); + if (weights == nullptr) { + thrust::sort( + thrust::seq, minor_keys + local_offset, minor_keys + local_offset + local_degree); + } else { + thrust::copy( + thrust::seq, weights, weights + local_degree, key_aggregated_edge_weights + local_offset); + thrust::sort_by_key(thrust::seq, + minor_keys + local_offset, + minor_keys + local_offset + local_degree, + key_aggregated_edge_weights + local_offset); + } + // in-place reduce_by_key + vertex_t key_idx{0}; + key_aggregated_edge_weights[local_offset + key_idx] = + weights != nullptr ? weights[0] : weight_t{1.0}; + for (edge_t i = 1; i < local_degree; ++i) { + if (minor_keys[local_offset + i] == minor_keys[local_offset + key_idx]) { + key_aggregated_edge_weights[local_offset + key_idx] += + weights != nullptr ? weights[i] : weight_t{1.0}; + } else { + ++key_idx; + minor_keys[local_offset + key_idx] = minor_keys[local_offset + i]; + key_aggregated_edge_weights[local_offset + key_idx] = + weights != nullptr ? weights[i] : weight_t{1.0}; + } + } + thrust::fill(thrust::seq, + major_vertices + local_offset, + major_vertices + local_offset + key_idx, + matrix_partition.get_major_from_major_offset_nocheck(major_offset)); + thrust::fill(thrust::seq, + major_vertices + local_offset + key_idx, + major_vertices + local_offset + local_degree, + invalid_vertex); + } + + idx += gridDim.x * blockDim.x; + } +} + +} // namespace detail + +/** + * @brief Iterate over every vertex's key-aggregated outgoing edges to update vertex properties. + * + * This function is inspired by thrust::transfrom_reduce() (iteration over the outgoing edges + * part) and thrust::copy() (update vertex properties part, take transform_reduce output as copy + * input). + * Unlike copy_v_transform_reduce_out_nbr, this function first aggregates outgoing edges by key to + * support two level reduction for every vertex. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam VertexIterator Type of the iterator for graph adjacency matrix column key values for + * aggregation (key type should coincide with vertex type). + * @tparam ValueIterator Type of the iterator for values in (key, value) pairs. + * @tparam KeyAggregatedEdgeOp Type of the quinary key-aggregated edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @tparam T Type of the initial value for reduction over the key-aggregated outgoing edges. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + * + @p graph_view.get_number_of_local_adj_matrix_partition_rows(). + * @param adj_matrix_col_key_first Iterator pointing to the adjacency matrix column key (for + * aggregation) for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_key_last` (exclusive) is deduced as @p adj_matrix_col_key_first + @p + * graph_view.get_number_of_local_adj_matrix_partition_cols(). + * @param map_key_first Iterator pointing to the first (inclusive) key in (key, value) pairs + * (assigned to this process in multi-GPU, + * `cugraph::experimental::detail::compute_gpu_id_from_vertex_t` is used to map keys to processes). + * (Key, value) pairs may be provided by transform_reduce_by_adj_matrix_row_key_e() or + * transform_reduce_by_adj_matrix_col_key_e(). + * @param map_key_last Iterator pointing to the last (exclusive) key in (key, value) pairs (assigned + * to this process in multi-GPU). + * @param map_value_first Iterator pointing to the first (inclusive) value in (key, value) pairs + * (assigned to this process in multi-GPU). `map_value_last` (exclusive) is deduced as @p + * map_value_first + thrust::distance(@p map_key_first, @p map_key_last). + * @param key_aggregated_e_op Quinary operator takes edge source, key, aggregated edge weight, *(@p + * adj_matrix_row_value_input_first + i), and value for the key stored in the input (key, value) + * pairs provided by @p map_key_first, @p map_key_last, and @p map_value_first (aggregated over the + * entire set of processes in multi-GPU). + * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. + * @param init Initial value to be added to the reduced @p key_aggregated_e_op return values for + * each vertex. + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the + * first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.get_number_of_local_vertices(). + */ +template +void copy_v_transform_reduce_key_aggregated_out_nbr( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + VertexIterator adj_matrix_col_key_first, + VertexIterator map_key_first, + VertexIterator map_key_last, + ValueIterator map_value_first, + KeyAggregatedEdgeOp key_aggregated_e_op, + ReduceOp reduce_op, + T init, + VertexValueOutputIterator vertex_value_output_first) +{ + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + static_assert(std::is_same::value_type, + typename GraphViewType::vertex_type>::value); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using value_t = typename std::iterator_traits::value_type; + + double constexpr load_factor = 0.7; + + // 1. build a cuco::static_map object for the k, v pairs. + + auto kv_map_ptr = std::make_unique>( + static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) / + load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last)); + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + rmm::device_uvector unique_keys( + graph_view.get_number_of_local_adj_matrix_partition_cols(), handle.get_stream()); + thrust::copy( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + adj_matrix_col_key_first, + adj_matrix_col_key_first + graph_view.get_number_of_local_adj_matrix_partition_cols(), + unique_keys.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_keys.begin(), + unique_keys.end()); + auto last = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_keys.begin(), + unique_keys.end()); + unique_keys.resize(thrust::distance(unique_keys.begin(), last), handle.get_stream()); + + rmm::device_uvector rx_unique_keys(0, handle.get_stream()); + std::vector rx_value_counts{}; + std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values( + comm, + unique_keys.begin(), + unique_keys.end(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { + return key_func(val); + }, + handle.get_stream()); + + rmm::device_uvector values_for_unique_keys(rx_unique_keys.size(), handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + kv_map_ptr->find(rx_unique_keys.begin(), rx_unique_keys.end(), values_for_unique_keys.begin()); + + rmm::device_uvector rx_values_for_unique_keys(0, handle.get_stream()); + + std::tie(rx_values_for_unique_keys, std::ignore) = + shuffle_values(comm, values_for_unique_keys.begin(), rx_value_counts, handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + kv_map_ptr.reset(); + + kv_map_ptr = std::make_unique>( + static_cast(static_cast(unique_keys.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_keys.begin(), rx_values_for_unique_keys.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + kv_map_ptr->insert(pair_first, pair_first + unique_keys.size()); + } + + // 2. aggregate each vertex out-going edges based on keys and transform-reduce. + + auto loop_count = size_t{1}; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + loop_count = graph_view.is_hypergraph_partitioned() + ? graph_view.get_number_of_local_adj_matrix_partitions() + : static_cast(row_comm_size); + } + + rmm::device_uvector major_vertices(0, handle.get_stream()); + auto e_op_result_buffer = allocate_dataframe_buffer(0, handle.get_stream()); + for (size_t i = 0; i < loop_count; ++i) { + matrix_partition_device_t matrix_partition( + graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + + int comm_root_rank = 0; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank + : col_comm_rank * row_comm_size + i; + } + + auto num_edges = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(graph_view.get_vertex_partition_first(comm_root_rank)), + thrust::make_counting_iterator(graph_view.get_vertex_partition_last(comm_root_rank)), + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()); + + rmm::device_uvector tmp_major_vertices(num_edges, handle.get_stream()); + rmm::device_uvector tmp_minor_keys(tmp_major_vertices.size(), handle.get_stream()); + rmm::device_uvector tmp_key_aggregated_edge_weights(tmp_major_vertices.size(), + handle.get_stream()); + + if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { + raft::grid_1d_thread_t update_grid( + graph_view.get_vertex_partition_size(comm_root_rank), + detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + auto constexpr invalid_vertex = invalid_vertex_id::value; + + // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber + // vertices to insure that rows within a partition are sorted by their out-degree in + // decreasing order, we will apply this kernel only to low out-degree vertices. + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + adj_matrix_col_key_first, + tmp_major_vertices.data(), + tmp_minor_keys.data(), + tmp_key_aggregated_edge_weights.data(), + invalid_vertex); + } + + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); + auto last = + thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + triplet_first, + triplet_first + tmp_major_vertices.size(), + [] __device__(auto val) { + return thrust::get<0>(val) == invalid_vertex_id::value; + }); + tmp_major_vertices.resize(thrust::distance(triplet_first, last), handle.get_stream()); + tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream()); + + if (GraphViewType::is_multi_gpu) { + auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned() + ? cugraph::partition_2d::key_naming_t().col_name() + : cugraph::partition_2d::key_naming_t().row_name()); + auto const sub_comm_size = sub_comm.get_size(); + + triplet_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_major_vertices.begin(), + tmp_minor_keys.begin(), + tmp_key_aggregated_edge_weights.begin())); + rmm::device_uvector rx_major_vertices(0, handle.get_stream()); + rmm::device_uvector rx_minor_keys(0, handle.get_stream()); + rmm::device_uvector rx_key_aggregated_edge_weights(0, handle.get_stream()); + std::forward_as_tuple( + std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights), std::ignore) = + groupby_gpuid_and_shuffle_values( + sub_comm, + triplet_first, + triplet_first + tmp_major_vertices.size(), + [key_func = detail::compute_gpu_id_from_vertex_t{sub_comm_size}] __device__( + auto val) { return key_func(thrust::get<1>(val)); }, + handle.get_stream()); + + tmp_major_vertices = std::move(rx_major_vertices); + tmp_minor_keys = std::move(rx_minor_keys); + tmp_key_aggregated_edge_weights = std::move(rx_key_aggregated_edge_weights); + } + + auto tmp_e_op_result_buffer = + allocate_dataframe_buffer(tmp_major_vertices.size(), handle.get_stream()); + auto tmp_e_op_result_buffer_first = get_dataframe_buffer_begin(tmp_e_op_result_buffer); + + triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin())); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + triplet_first, + triplet_first + major_vertices.size(), + tmp_e_op_result_buffer_first, + [adj_matrix_row_value_input_first, + key_aggregated_e_op, + matrix_partition, + kv_map = kv_map_ptr->get_device_view()] __device__(auto val) { + auto major = thrust::get<0>(val); + auto key = thrust::get<1>(val); + auto w = thrust::get<2>(val); + return key_aggregated_e_op( + major, + key, + w, + *(adj_matrix_row_value_input_first + + matrix_partition.get_major_offset_from_major_nocheck(major)), + kv_map.find(key)->second); + }); + tmp_minor_keys.resize(0, handle.get_stream()); + tmp_key_aggregated_edge_weights.resize(0, handle.get_stream()); + tmp_minor_keys.shrink_to_fit(handle.get_stream()); + tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream()); + + if (GraphViewType::is_multi_gpu) { + auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned() + ? cugraph::partition_2d::key_naming_t().col_name() + : cugraph::partition_2d::key_naming_t().row_name()); + auto const sub_comm_rank = sub_comm.get_rank(); + auto const sub_comm_size = sub_comm.get_size(); + + // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op + // can be mapped to ncclRedOp_t). + + auto rx_sizes = + host_scalar_gather(sub_comm, tmp_major_vertices.size(), i, handle.get_stream()); + std::vector rx_displs( + static_cast(sub_comm_rank) == i ? sub_comm_size : int{0}, size_t{0}); + if (static_cast(sub_comm_rank) == i) { + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + } + rmm::device_uvector rx_major_vertices( + static_cast(sub_comm_rank) == i + ? std::accumulate(rx_sizes.begin(), rx_sizes.end(), size_t{0}) + : size_t{0}, + handle.get_stream()); + auto rx_tmp_e_op_result_buffer = + allocate_dataframe_buffer(rx_major_vertices.size(), handle.get_stream()); + + device_gatherv(sub_comm, + tmp_major_vertices.data(), + rx_major_vertices.data(), + tmp_major_vertices.size(), + rx_sizes, + rx_displs, + i, + handle.get_stream()); + device_gatherv(sub_comm, + tmp_e_op_result_buffer_first, + get_dataframe_buffer_begin(rx_tmp_e_op_result_buffer), + tmp_major_vertices.size(), + rx_sizes, + rx_displs, + i, + handle.get_stream()); + + if (static_cast(sub_comm_rank) == i) { + major_vertices = std::move(rx_major_vertices); + e_op_result_buffer = std::move(rx_tmp_e_op_result_buffer); + } + } else { + major_vertices = std::move(tmp_major_vertices); + e_op_result_buffer = std::move(tmp_e_op_result_buffer); + } + } + + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_number_of_local_vertices(), + T{}); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_vertices.begin(), + major_vertices.end(), + get_dataframe_buffer_begin(e_op_result_buffer)); + + auto num_uniques = thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(major_vertices.size()), + [major_vertices = major_vertices.data()] __device__(auto i) { + return ((i == 0) || (major_vertices[i] != major_vertices[i - 1])) ? true : false; + }); + rmm::device_uvector unique_major_vertices(num_uniques, handle.get_stream()); + + auto major_vertex_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + [major_vertices = major_vertices.data()] __device__(auto i) { + return ((i == 0) || (major_vertices[i] == major_vertices[i - 1])) + ? major_vertices[i] + : invalid_vertex_id::value; + }); + thrust::copy_if( + major_vertex_first, + major_vertex_first + major_vertices.size(), + unique_major_vertices.begin(), + [] __device__(auto major) { return major != invalid_vertex_id::value; }); + thrust::reduce_by_key( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_vertices.begin(), + major_vertices.end(), + get_dataframe_buffer_begin(e_op_result_buffer), + thrust::make_discard_iterator(), + thrust::make_permutation_iterator( + vertex_value_output_first, + thrust::make_transform_iterator( + major_vertices.begin(), + [vertex_partition = vertex_partition_device_t(graph_view)] __device__( + auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); })), + reduce_op); + + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_number_of_local_vertices(), + vertex_value_output_first, + [reduce_op, init] __device__(auto val) { return reduce_op(val, init); }); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 4f0f0a7a43e..63b31f9c44e 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,8 +18,8 @@ #include #include #include -#include #include +#include #include #include diff --git a/cpp/include/patterns/count_if_v.cuh b/cpp/include/patterns/count_if_v.cuh index c90b259cdde..6b28cd7ae12 100644 --- a/cpp/include/patterns/count_if_v.cuh +++ b/cpp/include/patterns/count_if_v.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include #include diff --git a/cpp/include/patterns/reduce_op.cuh b/cpp/include/patterns/reduce_op.cuh index e9011914292..d92d3352d08 100644 --- a/cpp/include/patterns/reduce_op.cuh +++ b/cpp/include/patterns/reduce_op.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -29,6 +29,7 @@ struct any { __host__ __device__ T operator()(T const& lhs, T const& rhs) const { return lhs; } }; +// FIXME: thrust::minimum can replace this. // reducing N elements (operator < should be defined between any two elements), the minimum element // should be selected. template diff --git a/cpp/include/patterns/reduce_v.cuh b/cpp/include/patterns/reduce_v.cuh index 12224dc55f4..b232d37b78d 100644 --- a/cpp/include/patterns/reduce_v.cuh +++ b/cpp/include/patterns/reduce_v.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh new file mode 100644 index 00000000000..70b6dc92752 --- /dev/null +++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh @@ -0,0 +1,453 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include + +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr transform_reduce_by_key_e_for_all_block_size = 128; + +template +__global__ void for_all_major_for_all_nbr_low_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_col_key_first, + EdgeOp e_op, + typename GraphViewType::vertex_type* keys, + T* values) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid); + + while (idx < static_cast(major_last - major_first)) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + auto major_offset = major_start_offset + idx; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(major_offset)); + if (local_degree > 0) { + auto transform_op = [&matrix_partition, + &adj_matrix_row_value_input_first, + &adj_matrix_col_value_input_first, + &adj_matrix_row_col_key_first, + &e_op, + major_offset, + indices, + weights] __device__(auto i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) + : minor; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; + + auto key = + *(adj_matrix_row_col_key_first + + ((GraphViewType::is_adj_matrix_transposed != adj_matrix_row_key) ? major_offset + : minor_offset)); + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + + return thrust::make_tuple(key, e_op_result); + }; + + auto local_offset = matrix_partition.get_local_offset(major_offset); + thrust::transform( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + thrust::make_zip_iterator(thrust::make_tuple(keys + local_offset, values + local_offset)), + transform_op); + } + + idx += gridDim.x * blockDim.x; + } +} + +template +std::tuple, + decltype(allocate_dataframe_buffer(0, cudaStream_t{nullptr}))> +transform_reduce_by_adj_matrix_row_col_key_e( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_col_key_first, + EdgeOp e_op, + T init) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + static_assert(std::is_same::value_type, + typename GraphViewType::vertex_type>::value); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto loop_count = size_t{1}; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + loop_count = graph_view.is_hypergraph_partitioned() + ? graph_view.get_number_of_local_adj_matrix_partitions() + : static_cast(row_comm_size); + } + + rmm::device_uvector keys(0, handle.get_stream()); + auto value_buffer = allocate_dataframe_buffer(0, handle.get_stream()); + for (size_t i = 0; i < loop_count; ++i) { + matrix_partition_device_t matrix_partition( + graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + + int comm_root_rank = 0; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank + : col_comm_rank * row_comm_size + i; + } + + auto num_edges = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(graph_view.get_vertex_partition_first(comm_root_rank)), + thrust::make_counting_iterator(graph_view.get_vertex_partition_last(comm_root_rank)), + [matrix_partition] __device__(auto row) { + auto major_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(major_offset); + }, + edge_t{0}, + thrust::plus()); + + rmm::device_uvector tmp_keys(num_edges, handle.get_stream()); + auto tmp_value_buffer = allocate_dataframe_buffer(tmp_keys.size(), handle.get_stream()); + + if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { + raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), + detail::transform_reduce_by_key_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber + // vertices to insure that rows within a partition are sorted by their out-degree in + // decreasing order, we will apply this kernel only to low out-degree vertices. + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_row_col_key_first, + e_op, + tmp_keys.data(), + get_dataframe_buffer_begin(tmp_value_buffer)); + } + + if (GraphViewType::is_multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_keys.begin(), + tmp_keys.end(), + get_dataframe_buffer_begin(tmp_value_buffer)); + + auto num_uniques = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(tmp_keys.size()), + [tmp_keys = tmp_keys.data()] __device__(auto i) { + return ((i == 0) || (tmp_keys[i] != tmp_keys[i - 1])) ? true : false; + }); + rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); + auto value_for_unique_key_buffer = + allocate_dataframe_buffer(unique_keys.size(), handle.get_stream()); + + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_keys.begin(), + tmp_keys.end(), + get_dataframe_buffer_begin(tmp_value_buffer), + unique_keys.begin(), + get_dataframe_buffer_begin(value_for_unique_key_buffer)); + + rmm::device_uvector rx_unique_keys(0, handle.get_stream()); + auto rx_value_for_unique_key_buffer = allocate_dataframe_buffer(0, handle.get_stream()); + std::tie(rx_unique_keys, rx_value_for_unique_key_buffer, std::ignore) = + groupby_gpuid_and_shuffle_kv_pairs( + comm, + unique_keys.begin(), + unique_keys.end(), + get_dataframe_buffer_begin(value_for_unique_key_buffer), + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( + auto val) { return key_func(val); }, + handle.get_stream()); + + // FIXME: we can reduce after shuffle + + tmp_keys = std::move(rx_unique_keys); + tmp_value_buffer = std::move(rx_value_for_unique_key_buffer); + } + + auto cur_size = keys.size(); + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can + // reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + keys.resize(cur_size + tmp_keys.size(), handle.get_stream()); + resize_dataframe_buffer(value_buffer, keys.size(), handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_keys.begin(), + tmp_keys.end(), + keys.begin() + cur_size); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + get_dataframe_buffer_begin(tmp_value_buffer), + get_dataframe_buffer_begin(tmp_value_buffer) + tmp_keys.size(), + get_dataframe_buffer_begin(value_buffer) + cur_size); + } + + if (GraphViewType::is_multi_gpu) { + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + keys.begin(), + keys.end(), + get_dataframe_buffer_begin(value_buffer)); + + auto num_uniques = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(keys.size()), + [keys = keys.data()] __device__(auto i) { + return ((i == 0) || (keys[i] != keys[i - 1])) ? true : false; + }); + rmm::device_uvector unique_keys(num_uniques, handle.get_stream()); + auto value_for_unique_key_buffer = + allocate_dataframe_buffer(unique_keys.size(), handle.get_stream()); + + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + keys.begin(), + keys.end(), + get_dataframe_buffer_begin(value_buffer), + unique_keys.begin(), + get_dataframe_buffer_begin(value_for_unique_key_buffer)); + + keys = std::move(unique_keys); + value_buffer = std::move(value_for_unique_key_buffer); + } + + // FIXME: add init + + return std::make_tuple(std::move(keys), std::move(value_buffer)); +} + +} // namespace detail + +// FIXME: EdgeOp & VertexOp in update_frontier_v_push_if_out_nbr concatenates push inidicator or +// bucket idx with the value while EdgeOp here does not. This is inconsistent. Better be fixed. +/** + * @brief Iterate over the entire set of edges and reduce @p edge_op outputs to (key, value) pairs. + * + * This function is inspired by thrust::transform_reduce() and thrust::reduce_by_key(). Keys for + * edges are determined by the graph adjacency matrix rows. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam VertexIterator Type of the iterator for keys in (key, value) pairs (key type should + * coincide with vertex type). + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam T Type of the values in (key, value) pairs. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). + * @param adj_matrix_row_key_first Iterator pointing to the adjacency matrix row key for the first + * (inclusive) column (assigned to this process in multi-GPU). `adj_matrix_row_key_last` (exclusive) + * is deduced as @p adj_matrix_row_key_first + @p graph_view.get_number_of_local_adj_matrix_rows(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a transformed value to be reduced. + * @param init Initial value to be added to the value in each transform-reduced (key, value) pair. + * @return std::tuple Tuple of rmm::device_uvector and + * rmm::device_uvector (if T is arithmetic scalar) or a tuple of rmm::device_uvector objects (if + * T is a thrust::tuple type of arithmetic scalar types, one rmm::device_uvector object per scalar + * type). + */ +template +auto transform_reduce_by_adj_matrix_row_key_e( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_row_key_first, + EdgeOp e_op, + T init) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + static_assert(std::is_same::value_type, + typename GraphViewType::vertex_type>::value); + + return detail::transform_reduce_by_adj_matrix_row_col_key_e( + handle, + graph_view, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_row_key_first, + e_op, + init); +} + +// FIXME: EdgeOp & VertexOp in update_frontier_v_push_if_out_nbr concatenates push inidicator or +// bucket idx with the value while EdgeOp here does not. This is inconsistent. Better be fixed. +/** + * @brief Iterate over the entire set of edges and reduce @p edge_op outputs to (key, value) pairs. + * + * This function is inspired by thrust::transform_reduce() and thrust::reduce_by_key(). Keys for + * edges are determined by the graph adjacency matrix columns. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam VertexIterator Type of the iterator for keys in (key, value) pairs (key type should + * coincide with vertex type). + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam T Type of the values in (key, value) pairs. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). + * @param adj_matrix_col_key_first Iterator pointing to the adjacency matrix column key for the + * first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_key_last` (exclusive) is deduced as @p adj_matrix_col_key_first + @p + * graph_view.get_number_of_local_adj_matrix_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a transformed value to be reduced. + * @param init Initial value to be added to the value in each transform-reduced (key, value) pair. + * @return std::tuple Tuple of rmm::device_uvector and + * rmm::device_uvector (if T is arithmetic scalar) or a tuple of rmm::device_uvector objects (if + * T is a thrust::tuple type of arithmetic scalar types, one rmm::device_uvector object per scalar + * type). + */ +template +auto transform_reduce_by_adj_matrix_col_key_e( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + VertexIterator adj_matrix_col_key_first, + EdgeOp e_op, + T init) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + static_assert(std::is_same::value_type, + typename GraphViewType::vertex_type>::value); + + return detail::transform_reduce_by_adj_matrix_row_col_key_e( + handle, + graph_view, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + adj_matrix_col_key_first, + e_op, + init); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh index 797facd4657..946c15a16a0 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/cpp/include/patterns/transform_reduce_v.cuh b/cpp/include/patterns/transform_reduce_v.cuh index 02538c36f47..17ffb89206a 100644 --- a/cpp/include/patterns/transform_reduce_v.cuh +++ b/cpp/include/patterns/transform_reduce_v.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include diff --git a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh index f5af03d647c..39aca7cacae 100644 --- a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh +++ b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +16,8 @@ #pragma once #include -#include #include +#include #include diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index a2250482c68..4c76322fa79 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,8 +21,9 @@ #include #include #include -#include +#include #include +#include #include #include @@ -155,7 +156,7 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, // FIXME: actually, we can find how many unique keys are here by now. // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the // vertex unless reduce_op is a pure function. - rmm::device_vector keys(num_buffer_elements); + rmm::device_uvector keys(num_buffer_elements, handle.get_stream()); rmm::device_vector values(num_buffer_elements); auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), buffer_key_output_first, @@ -175,9 +176,10 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, values.begin(), values.begin() + num_reduced_buffer_elements, buffer_payload_output_first); - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is necessary as kyes & values will become out-of-scope once - // this function returns + // FIXME: this is unecessary if we use a tuple of rmm::device_uvector objects for values + CUDA_TRY( + cudaStreamSynchronize(handle.get_stream())); // this is necessary as values will become + // out-of-scope once this function returns return num_reduced_buffer_elements; } } @@ -400,7 +402,7 @@ void update_frontier_v_push_if_out_nbr( frontier_size = thrust::distance(vertex_first, vertex_last); } - edge_t max_pushes = + auto max_pushes = frontier_size > 0 ? frontier_rows.size() > 0 ? thrust::transform_reduce( diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index ccb9e1a5a0d..2126a27ee5a 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once -#include #include +#include #include #include diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index cd621a516ea..e94190897b8 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,6 +18,7 @@ #include #include #include +#include namespace cugraph { namespace cython { @@ -109,6 +110,197 @@ struct graph_container_t { experimental::graph_properties_t graph_props; }; +/** + * @brief Owning struct. Allows returning multiple edge lists and edge offsets. + * cython only + * + * @param number_of_vertices The total number of vertices + * @param number_of_edges The total number of edges (number of elements in src_indices, + dst_indices and edge_data) + * @param number_of_subgraph The number of subgraphs, number of elements in subgraph_offsets - 1 + * @param source_indices This array of size E (number of edges) contains + * the index of the + * source for each edge. Indices must be in the range [0, V-1]. + * @param destination_indices This array of size E (number of edges) contains + * the index of the + * destination for each edge. Indices must be in the range [0, V-1]. + * @param edge_data This array size E (number of edges) contains + * the weight for each + * edge. This array can be null in which case the graph is considered + * unweighted. + * @param subgraph_offsets This array size number_of_subgraph + 1 contains edge offsets + for each subgraph + + + */ +struct cy_multi_edgelists_t { + size_t number_of_vertices; + size_t number_of_edges; + size_t number_of_subgraph; + std::unique_ptr src_indices; + std::unique_ptr dst_indices; + std::unique_ptr edge_data; + std::unique_ptr subgraph_offsets; +}; + +// replacement for std::tuple<,,>, since std::tuple is not +// supported in cython +// +template +struct major_minor_weights_t { + explicit major_minor_weights_t(raft::handle_t const& handle) + : shuffled_major_vertices_(0, handle.get_stream()), + shuffled_minor_vertices_(0, handle.get_stream()), + shuffled_weights_(0, handle.get_stream()) + { + } + rmm::device_uvector& get_major(void) { return shuffled_major_vertices_; } + + rmm::device_uvector& get_minor(void) { return shuffled_minor_vertices_; } + + rmm::device_uvector& get_weights(void) { return shuffled_weights_; } + + std::pair, size_t> get_major_wrap( + void) // const: triggers errors in Cython autogen-ed C++ + { + return std::make_pair(std::make_unique(shuffled_major_vertices_.release()), + sizeof(vertex_t)); + } + + std::pair, size_t> get_minor_wrap(void) // const + { + return std::make_pair(std::make_unique(shuffled_minor_vertices_.release()), + sizeof(vertex_t)); + } + + std::pair, size_t> get_weights_wrap(void) // const + { + return std::make_pair(std::make_unique(shuffled_weights_.release()), + sizeof(weight_t)); + } + + private: + rmm::device_uvector shuffled_major_vertices_; + rmm::device_uvector shuffled_minor_vertices_; + rmm::device_uvector shuffled_weights_; +}; + +// wrapper for renumber_edgelist() return +// (unrenumbering maps, etc.) +// +template +struct renum_quad_t { + explicit renum_quad_t(raft::handle_t const& handle) + : dv_(0, handle.get_stream()), part_(std::vector(), false, 0, 0, 0, 0) + { + } + + rmm::device_uvector& get_dv(void) { return dv_; } + + std::pair, size_t> get_dv_wrap( + void) // const: see above explanation + { + return std::make_pair(std::make_unique(dv_.release()), sizeof(vertex_t)); + } + + cugraph::experimental::partition_t& get_partition(void) { return part_; } + vertex_t& get_num_vertices(void) { return nv_; } + edge_t& get_num_edges(void) { return ne_; } + + // `partition_t` pass-through getters + // + int get_part_row_size() const { return part_.get_row_size(); } + + int get_part_col_size() const { return part_.get_col_size(); } + + int get_part_comm_rank() const { return part_.get_comm_rank(); } + + // FIXME: part_.get_vertex_partition_offsets() returns a std::vector + // + std::unique_ptr> get_partition_offsets(void) // const + { + return std::make_unique>(part_.get_vertex_partition_offsets()); + } + + std::pair get_part_local_vertex_range() const + { + auto tpl_v = part_.get_local_vertex_range(); + return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); + } + + vertex_t get_part_local_vertex_first() const { return part_.get_local_vertex_first(); } + + vertex_t get_part_local_vertex_last() const { return part_.get_local_vertex_last(); } + + std::pair get_part_vertex_partition_range(size_t vertex_partition_idx) const + { + auto tpl_v = part_.get_vertex_partition_range(vertex_partition_idx); + return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); + } + + vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) const + { + return part_.get_vertex_partition_first(vertex_partition_idx); + } + + vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) const + { + return part_.get_vertex_partition_last(vertex_partition_idx); + } + + vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) const + { + return part_.get_vertex_partition_size(vertex_partition_idx); + } + + size_t get_part_number_of_matrix_partitions() const + { + return part_.get_number_of_matrix_partitions(); + } + + std::pair get_part_matrix_partition_major_range(size_t partition_idx) const + { + auto tpl_v = part_.get_matrix_partition_major_range(partition_idx); + return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); + } + + vertex_t get_part_matrix_partition_major_first(size_t partition_idx) const + { + return part_.get_matrix_partition_major_first(partition_idx); + } + + vertex_t get_part_matrix_partition_major_last(size_t partition_idx) const + { + return part_.get_matrix_partition_major_last(partition_idx); + } + + vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) const + { + return part_.get_part_matrix_partition_major_value_start_offset(partition_idx); + } + + std::pair get_part_matrix_partition_minor_range() const + { + auto tpl_v = part_.get_matrix_partition_minor_range(); + return std::make_pair(std::get<0>(tpl_v), std::get<1>(tpl_v)); + } + + vertex_t get_part_matrix_partition_minor_first() const + { + return part_.get_matrix_partition_minor_first(); + } + + vertex_t get_part_matrix_partition_minor_last() const + { + return part_.get_matrix_partition_minor_last(); + } + + private: + rmm::device_uvector dv_; + cugraph::experimental::partition_t part_; + vertex_t nv_; + edge_t ne_; +}; // FIXME: finish description for vertex_partition_offsets // // Factory function for populating an empty graph container with a new graph @@ -246,6 +438,38 @@ void call_sssp(raft::handle_t const& handle, vertex_t* predecessors, const vertex_t source_vertex); +// Wrapper for calling egonet through a graph container +template +std::unique_ptr call_egonet(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* source_vertex, + vertex_t n_subgraphs, + vertex_t radius); + +// wrapper for shuffling: +// +template +std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + vertex_t* + edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place + vertex_t* edgelist_minor_vertices, // [IN / OUT] + weight_t* edgelist_weights, // [IN / OUT] + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned); // = false + +// Wrapper for calling renumber_edeglist() inplace: +// +template +std::unique_ptr> call_renumber( + raft::handle_t const& handle, + vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, + vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu); + // Helper for setting up subcommunicators, typically called as part of the // user-initiated comms initialization in Python. // diff --git a/cpp/include/utilities/dataframe_buffer.cuh b/cpp/include/utilities/dataframe_buffer.cuh new file mode 100644 index 00000000000..06352b8e217 --- /dev/null +++ b/cpp/include/utilities/dataframe_buffer.cuh @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include + +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +auto allocate_dataframe_buffer_tuple_element_impl(size_t buffer_size, cudaStream_t stream) +{ + using element_t = typename thrust::tuple_element::type; + return rmm::device_uvector(buffer_size, stream); +} + +template +auto allocate_dataframe_buffer_tuple_impl(std::index_sequence, + size_t buffer_size, + cudaStream_t stream) +{ + return std::make_tuple( + allocate_dataframe_buffer_tuple_element_impl(buffer_size, stream)...); +} + +template +void resize_dataframe_buffer_tuple_element_impl(BufferType& buffer, + size_t new_buffer_size, + cudaStream_t stream) +{ + std::get(buffer).resize(new_buffer_size, stream); + resize_dataframe_buffer_tuple_element_impl( + buffer, new_buffer_size, stream); +} + +template +void resize_dataframe_buffer_tuple_impl(BufferType& buffer, + size_t new_buffer_size, + cudaStream_t stream) +{ +} + +template +auto get_dataframe_buffer_begin_tuple_element_impl(BufferType& buffer) +{ + using element_t = typename thrust::tuple_element::type; + return std::get(buffer).begin(); +} + +template +auto get_dataframe_buffer_begin_tuple_impl(std::index_sequence, BufferType& buffer) +{ + // thrust::make_tuple instead of std::make_tuple as this is fed to thrust::make_zip_iterator. + return thrust::make_tuple( + get_dataframe_buffer_begin_tuple_element_impl(buffer)...); +} + +} // namespace detail + +template ::value>* = nullptr> +auto allocate_dataframe_buffer(size_t buffer_size, cudaStream_t stream) +{ + return rmm::device_uvector(buffer_size, stream); +} + +template ::value>* = nullptr> +auto allocate_dataframe_buffer(size_t buffer_size, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return detail::allocate_dataframe_buffer_tuple_impl( + std::make_index_sequence(), buffer_size, stream); +} + +template ::value>* = nullptr> +void resize_dataframe_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +{ + buffer.resize(new_buffer_size, stream); +} + +template ::value>* = nullptr> +void resize_dataframe_buffer(BufferType& buffer, size_t new_buffer_size, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + detail::resize_dataframe_buffer_tuple_impl( + buffer, new_buffer_size, stream); +} + +template ::value>* = nullptr> +auto get_dataframe_buffer_begin(BufferType& buffer) +{ + return buffer.begin(); +} + +template ::value>* = nullptr> +auto get_dataframe_buffer_begin(BufferType& buffer) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return thrust::make_zip_iterator(detail::get_dataframe_buffer_begin_tuple_impl( + std::make_index_sequence(), buffer)); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/device_comm.cuh similarity index 75% rename from cpp/include/utilities/comm_utils.cuh rename to cpp/include/utilities/device_comm.cuh index fb69fff49c9..8c3b0f86a47 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/device_comm.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -24,7 +24,6 @@ #include #include -#include #include namespace cugraph { @@ -32,66 +31,6 @@ namespace experimental { namespace detail { -template -struct update_vector_of_tuple_scalar_elements_from_tuple_impl { - void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const - { - using element_t = typename thrust::tuple_element::type; - static_assert(sizeof(element_t) <= sizeof(int64_t)); - auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); - *ptr = thrust::get(tuple); - update_vector_of_tuple_scalar_elements_from_tuple_impl().update( - tuple_scalar_elements, tuple); - } -}; - -template -struct update_vector_of_tuple_scalar_elements_from_tuple_impl { - void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const { return; } -}; - -template -struct update_tuple_from_vector_of_tuple_scalar_elements_impl { - void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const - { - using element_t = typename thrust::tuple_element::type; - static_assert(sizeof(element_t) <= sizeof(int64_t)); - auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); - thrust::get(tuple) = *ptr; - update_tuple_from_vector_of_tuple_scalar_elements_impl().update( - tuple, tuple_scalar_elements); - } -}; - -template -struct update_tuple_from_vector_of_tuple_scalar_elements_impl { - void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const { return; } -}; - -template -struct host_allreduce_tuple_scalar_element_impl { - void run(raft::comms::comms_t const& comm, - rmm::device_uvector& tuple_scalar_elements, - cudaStream_t stream) const - { - using element_t = typename thrust::tuple_element::type; - static_assert(sizeof(element_t) <= sizeof(int64_t)); - auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); - comm.allreduce(ptr, ptr, 1, raft::comms::op_t::SUM, stream); - host_allreduce_tuple_scalar_element_impl().run( - comm, tuple_scalar_elements, stream); - } -}; - -template -struct host_allreduce_tuple_scalar_element_impl { - void run(raft::comms::comms_t const& comm, - rmm::device_uvector& tuple_scalar_elements, - cudaStream_t stream) const - { - } -}; - template T* iter_to_raw_ptr(T* ptr) { @@ -621,183 +560,88 @@ struct device_allgatherv_tuple_iterator_element_impl -auto allocate_comm_buffer_tuple_element_impl(size_t buffer_size, cudaStream_t stream) -{ - using element_t = typename thrust::tuple_element::type; - return rmm::device_uvector(buffer_size, stream); -} - -template -auto allocate_comm_buffer_tuple_impl(std::index_sequence, - size_t buffer_size, - cudaStream_t stream) -{ - return thrust::make_tuple( - allocate_comm_buffer_tuple_element_impl(buffer_size, stream)...); -} - -template -auto get_comm_buffer_begin_tuple_element_impl(BufferType& buffer) -{ - using element_t = typename thrust::tuple_element::type; - return thrust::get(buffer).begin(); -} - -template -auto get_comm_buffer_begin_tuple_impl(std::index_sequence, BufferType& buffer) -{ - return thrust::make_tuple(get_comm_buffer_begin_tuple_element_impl(buffer)...); -} - -} // namespace detail - -template -std::enable_if_t::value, T> host_scalar_allreduce( - raft::comms::comms_t const& comm, T input, cudaStream_t stream) -{ - rmm::device_uvector d_input(1, stream); - raft::update_device(d_input.data(), &input, 1, stream); - comm.allreduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); - T h_input{}; - raft::update_host(&h_input, d_input.data(), 1, stream); - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return h_input; -} - -template -std::enable_if_t::value, T> -host_scalar_allreduce(raft::comms::comms_t const& comm, T input, cudaStream_t stream) +template +std::enable_if_t::value, void> +device_gatherv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) { - size_t constexpr tuple_size = thrust::tuple_size::value; - std::vector h_tuple_scalar_elements(tuple_size); - rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); - T ret{}; - - detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( - h_tuple_scalar_elements, input); - raft::update_device( - d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); - detail::host_allreduce_tuple_scalar_element_impl().run( - comm, d_tuple_scalar_elements, stream); - raft::update_host( - h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl().update( - ret, h_tuple_scalar_elements); - - return ret; + // no-op } -template -std::enable_if_t::value, T> host_scalar_bcast( - raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_gatherv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) { - rmm::device_uvector d_input(1, stream); - if (comm.get_rank() == root) { raft::update_device(d_input.data(), &input, 1, stream); } - comm.bcast(d_input.data(), 1, root, stream); - auto h_input = input; - if (comm.get_rank() != root) { raft::update_host(&h_input, d_input.data(), 1, stream); } - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return h_input; + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); + // FIXME: should be enabled once the RAFT gather & gatherv PR is merged +#if 1 + CUGRAPH_FAIL("Unimplemented."); +#else + comm.gatherv(iter_to_raw_ptr(input_first), + iter_to_raw_ptr(output_first), + sendcount, + recvcounts.data(), + displacements.data(), + root, + stream); +#endif } -template -std::enable_if_t::value, T> -host_scalar_bcast(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - std::vector h_tuple_scalar_elements(tuple_size); - rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); - auto ret = input; - - if (comm.get_rank() == root) { - detail::update_vector_of_tuple_scalar_elements_from_tuple_impl() - .update(h_tuple_scalar_elements, input); - raft::update_device( - d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); - } - comm.bcast(d_tuple_scalar_elements.data(), d_tuple_scalar_elements.size(), root, stream); - if (comm.get_rank() != root) { - raft::update_host( - h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); - } - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - if (comm.get_rank() != root) { - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() - .update(ret, h_tuple_scalar_elements); +template +struct device_gatherv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) const + { + device_gatherv_impl(comm, + thrust::get(input_first.get_iterator_tuple()), + thrust::get(output_first.get_iterator_tuple()), + sendcount, + recvcounts, + displacements, + root, + stream); + device_gatherv_tuple_iterator_element_impl().run( + comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); } +}; - return ret; -} - -template -std::enable_if_t::value, std::vector> host_scalar_allgather( - raft::comms::comms_t const& comm, T input, cudaStream_t stream) -{ - std::vector rx_counts(comm.get_size(), size_t{1}); - std::vector displacements(rx_counts.size(), size_t{0}); - std::iota(displacements.begin(), displacements.end(), size_t{0}); - rmm::device_uvector d_outputs(rx_counts.size(), stream); - raft::update_device(d_outputs.data() + comm.get_rank(), &input, 1, stream); - comm.allgatherv(d_outputs.data() + comm.get_rank(), - d_outputs.data(), - rx_counts.data(), - displacements.data(), - stream); - std::vector h_outputs(rx_counts.size(), size_t{0}); - raft::update_host(h_outputs.data(), d_outputs.data(), rx_counts.size(), stream); - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - return h_outputs; -} - -template -std::enable_if_t::value, std::vector> -host_scalar_allgather(raft::comms::comms_t const& comm, T input, cudaStream_t stream) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - std::vector rx_counts(comm.get_size(), tuple_size); - std::vector displacements(rx_counts.size(), size_t{0}); - for (size_t i = 0; i < displacements.size(); ++i) { displacements[i] = i * tuple_size; } - std::vector h_tuple_scalar_elements(tuple_size); - rmm::device_uvector d_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size, - stream); - - detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( - h_tuple_scalar_elements, input); - raft::update_device(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, - h_tuple_scalar_elements.data(), - tuple_size, - stream); - comm.allgatherv(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, - d_allgathered_tuple_scalar_elements.data(), - rx_counts.data(), - displacements.data(), - stream); - std::vector h_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size); - raft::update_host(h_allgathered_tuple_scalar_elements.data(), - d_allgathered_tuple_scalar_elements.data(), - comm.get_size() * tuple_size, - stream); - auto status = comm.sync_stream(stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); - - std::vector ret(comm.get_size()); - for (size_t i = 0; i < ret.size(); ++i) { - std::vector h_tuple_scalar_elements( - h_allgathered_tuple_scalar_elements.data() + i * tuple_size, - h_allgathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); - detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() - .update(ret[i], h_tuple_scalar_elements); +template +struct device_gatherv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) const + { } +}; - return ret; -} +} // namespace detail template std::enable_if_t< @@ -1114,36 +958,49 @@ device_allgatherv(raft::comms::comms_t const& comm, .run(comm, input_first, output_first, recvcounts, displacements, stream); } -template ::value>* = nullptr> -auto allocate_comm_buffer(size_t buffer_size, cudaStream_t stream) +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_gatherv(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) { - return rmm::device_uvector(buffer_size, stream); + detail::device_gatherv_impl( + comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); } -template ::value>* = nullptr> -auto allocate_comm_buffer(size_t buffer_size, cudaStream_t stream) +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_gatherv(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t sendcount, + std::vector const& recvcounts, + std::vector const& displacements, + int root, + cudaStream_t stream) { - size_t constexpr tuple_size = thrust::tuple_size::value; - return detail::allocate_comm_buffer_tuple_impl( - std::make_index_sequence(), buffer_size, stream); -} + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); -template ::value>* = nullptr> -auto get_comm_buffer_begin(BufferType& buffer) -{ - return buffer.begin(); -} + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; -template ::value>* = nullptr> -auto get_comm_buffer_begin(BufferType& buffer) -{ - size_t constexpr tuple_size = thrust::tuple_size::value; - return thrust::make_zip_iterator( - detail::get_comm_buffer_begin_tuple_impl(std::make_index_sequence(), buffer)); + detail::device_allgatherv_tuple_iterator_element_impl() + .run(comm, input_first, output_first, sendcount, recvcounts, displacements, root, stream); } } // namespace experimental diff --git a/cpp/include/utilities/host_scalar_comm.cuh b/cpp/include/utilities/host_scalar_comm.cuh new file mode 100644 index 00000000000..dda0ce1f091 --- /dev/null +++ b/cpp/include/utilities/host_scalar_comm.cuh @@ -0,0 +1,399 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +struct update_vector_of_tuple_scalar_elements_from_tuple_impl { + void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + *ptr = thrust::get(tuple); + update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + tuple_scalar_elements, tuple); + } +}; + +template +struct update_vector_of_tuple_scalar_elements_from_tuple_impl { + void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const { return; } +}; + +template +struct update_tuple_from_vector_of_tuple_scalar_elements_impl { + void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + thrust::get(tuple) = *ptr; + update_tuple_from_vector_of_tuple_scalar_elements_impl().update( + tuple, tuple_scalar_elements); + } +}; + +template +struct update_tuple_from_vector_of_tuple_scalar_elements_impl { + void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const { return; } +}; + +template +struct host_allreduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + cudaStream_t stream) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + comm.allreduce(ptr, ptr, 1, raft::comms::op_t::SUM, stream); + host_allreduce_tuple_scalar_element_impl().run( + comm, tuple_scalar_elements, stream); + } +}; + +template +struct host_allreduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + cudaStream_t stream) const + { + } +}; + +template +struct host_reduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + int root, + cudaStream_t stream) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + comm.reduce(ptr, ptr, 1, raft::comms::op_t::SUM, root, stream); + host_reduce_tuple_scalar_element_impl().run( + comm, tuple_scalar_elements, root, stream); + } +}; + +template +struct host_reduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + int root, + cudaStream_t stream) const + { + } +}; + +} // namespace detail + +template +std::enable_if_t::value, T> host_scalar_allreduce( + raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + raft::update_device(d_input.data(), &input, 1, stream); + comm.allreduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); + T h_input{}; + raft::update_host(&h_input, d_input.data(), 1, stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +template +std::enable_if_t::value, T> +host_scalar_allreduce(raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + T ret{}; + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + detail::host_allreduce_tuple_scalar_element_impl().run( + comm, d_tuple_scalar_elements, stream); + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl().update( + ret, h_tuple_scalar_elements); + + return ret; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, T> host_scalar_reduce( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + raft::update_device(d_input.data(), &input, 1, stream); + comm.reduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); + T h_input{}; + if (comm.get_rank() == root) { raft::update_host(&h_input, d_input.data(), 1, stream); } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, T> +host_scalar_reduce(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + T ret{}; + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + detail::host_reduce_tuple_scalar_element_impl().run( + comm, d_tuple_scalar_elements, root, stream); + if (comm.get_rank() == root) { + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + if (comm.get_rank() == root) { + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret, h_tuple_scalar_elements); + } + + return ret; +} + +template +std::enable_if_t::value, T> host_scalar_bcast( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + if (comm.get_rank() == root) { raft::update_device(d_input.data(), &input, 1, stream); } + comm.bcast(d_input.data(), 1, root, stream); + auto h_input = input; + if (comm.get_rank() != root) { raft::update_host(&h_input, d_input.data(), 1, stream); } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +template +std::enable_if_t::value, T> +host_scalar_bcast(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + auto ret = input; + + if (comm.get_rank() == root) { + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl() + .update(h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + } + comm.bcast(d_tuple_scalar_elements.data(), d_tuple_scalar_elements.size(), root, stream); + if (comm.get_rank() != root) { + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + if (comm.get_rank() != root) { + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret, h_tuple_scalar_elements); + } + + return ret; +} + +template +std::enable_if_t::value, std::vector> host_scalar_allgather( + raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + std::vector rx_counts(comm.get_size(), size_t{1}); + std::vector displacements(rx_counts.size(), size_t{0}); + std::iota(displacements.begin(), displacements.end(), size_t{0}); + rmm::device_uvector d_outputs(rx_counts.size(), stream); + raft::update_device(d_outputs.data() + comm.get_rank(), &input, 1, stream); + // FIXME: better use allgather + comm.allgatherv(d_outputs.data() + comm.get_rank(), + d_outputs.data(), + rx_counts.data(), + displacements.data(), + stream); + std::vector h_outputs(rx_counts.size()); + raft::update_host(h_outputs.data(), d_outputs.data(), rx_counts.size(), stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_outputs; +} + +template +std::enable_if_t::value, std::vector> +host_scalar_allgather(raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector rx_counts(comm.get_size(), tuple_size); + std::vector displacements(rx_counts.size(), size_t{0}); + for (size_t i = 0; i < displacements.size(); ++i) { displacements[i] = i * tuple_size; } + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size, + stream); + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, + h_tuple_scalar_elements.data(), + tuple_size, + stream); + // FIXME: better use allgather + comm.allgatherv(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, + d_allgathered_tuple_scalar_elements.data(), + rx_counts.data(), + displacements.data(), + stream); + std::vector h_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size); + raft::update_host(h_allgathered_tuple_scalar_elements.data(), + d_allgathered_tuple_scalar_elements.data(), + comm.get_size() * tuple_size, + stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + + std::vector ret(comm.get_size()); + for (size_t i = 0; i < ret.size(); ++i) { + std::vector h_tuple_scalar_elements( + h_allgathered_tuple_scalar_elements.data() + i * tuple_size, + h_allgathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret[i], h_tuple_scalar_elements); + } + + return ret; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, std::vector> host_scalar_gather( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_outputs(comm.get_rank() == root ? comm.get_size() : int{1}, stream); + raft::update_device( + comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), + &input, + 1, + stream); + // FIXME: should be enabled once the RAFT gather & gatherv PR is merged +#if 1 + CUGRAPH_FAIL("Unimplemented."); +#else + comm.gather(comm.get_rank() == root ? d_outputs.data() + comm.get_rank() : d_outputs.data(), + d_outputs.data(), + size_t{1}, + root, + stream); +#endif + std::vector h_outputs(comm.get_rank() == root ? comm.get_size() : 0); + if (comm.get_rank() == root) { + raft::update_host(h_outputs.data(), d_outputs.data(), comm.get_size(), stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_outputs; +} + +// Return value is valid only in root (return value may better be std::optional in C++17 or later) +template +std::enable_if_t::value, std::vector> +host_scalar_gather(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_gathered_tuple_scalar_elements( + comm.get_rank() == root ? comm.get_size() * tuple_size : tuple_size, stream); + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device(comm.get_rank() == root + ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size + : d_gathered_tuple_scalar_elements.data(), + h_tuple_scalar_elements.data(), + tuple_size, + stream); + // FIXME: should be enabled once the RAFT gather & gatherv PR is merged +#if 1 + CUGRAPH_FAIL("Unimplemented."); +#else + comm.gather(comm.get_rank() == root + ? d_gathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size + : d_gathered_tuple_scalar_elements.data(), + d_gathered_tuple_scalar_elements.data(), + tuple_size, + root, + stream); +#endif + std::vector h_gathered_tuple_scalar_elements( + comm.get_rank() == root ? comm.get_size() * tuple_size : size_t{0}); + if (comm.get_rank() == root) { + raft::update_host(h_gathered_tuple_scalar_elements.data(), + d_gathered_tuple_scalar_elements.data(), + comm.get_size() * tuple_size, + stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + + std::vector ret(comm.get_size()); + if (comm.get_rank() == root) { + for (size_t i = 0; i < ret.size(); ++i) { + std::vector h_tuple_scalar_elements( + h_gathered_tuple_scalar_elements.data() + i * tuple_size, + h_gathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret[i], h_tuple_scalar_elements); + } + } + + return ret; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh new file mode 100644 index 00000000000..7e04c7e1972 --- /dev/null +++ b/cpp/include/utilities/shuffle_comm.cuh @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, + ValueIterator tx_value_first /* [INOUT */, + ValueIterator tx_value_last /* [INOUT */, + ValueToGPUIdOp value_to_gpu_id_op, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + thrust::sort(rmm::exec_policy(stream)->on(stream), + tx_value_first, + tx_value_last, + [value_to_gpu_id_op] __device__(auto lhs, auto rhs) { + return value_to_gpu_id_op(lhs) < value_to_gpu_id_op(rhs); + }); + + auto gpu_id_first = thrust::make_transform_iterator( + tx_value_first, + [value_to_gpu_id_op] __device__(auto value) { return value_to_gpu_id_op(value); }); + rmm::device_uvector d_tx_dst_ranks(comm_size, stream); + rmm::device_uvector d_tx_value_counts(comm_size, stream); + auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + gpu_id_first, + gpu_id_first + thrust::distance(tx_value_first, tx_value_last), + thrust::make_constant_iterator(size_t{1}), + d_tx_dst_ranks.begin(), + d_tx_value_counts.begin()); + if (thrust::distance(d_tx_value_counts.begin(), thrust::get<1>(last)) < comm_size) { + rmm::device_uvector d_counts(comm_size, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream)->on(stream), + d_tx_value_counts.begin(), + thrust::get<1>(last), + d_tx_dst_ranks.begin(), + d_counts.begin()); + d_tx_value_counts = std::move(d_counts); + } + + return std::move(d_tx_value_counts); +} + +template +rmm::device_uvector sort_and_count(raft::comms::comms_t const &comm, + VertexIterator tx_key_first /* [INOUT */, + VertexIterator tx_key_last /* [INOUT */, + ValueIterator tx_value_first /* [INOUT */, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + tx_key_first, + tx_key_last, + tx_value_first, + [key_to_gpu_id_op] __device__(auto lhs, auto rhs) { + return key_to_gpu_id_op(lhs) < key_to_gpu_id_op(rhs); + }); + + auto gpu_id_first = thrust::make_transform_iterator( + tx_key_first, [key_to_gpu_id_op] __device__(auto key) { return key_to_gpu_id_op(key); }); + rmm::device_uvector d_tx_dst_ranks(comm_size, stream); + rmm::device_uvector d_tx_value_counts(comm_size, stream); + auto last = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), + gpu_id_first, + gpu_id_first + thrust::distance(tx_key_first, tx_key_last), + thrust::make_constant_iterator(size_t{1}), + d_tx_dst_ranks.begin(), + d_tx_value_counts.begin()); + if (thrust::distance(d_tx_value_counts.begin(), thrust::get<1>(last)) < comm_size) { + rmm::device_uvector d_counts(comm_size, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), d_counts.begin(), d_counts.end(), size_t{0}); + thrust::scatter(rmm::exec_policy(stream)->on(stream), + d_tx_value_counts.begin(), + thrust::get<1>(last), + d_tx_dst_ranks.begin(), + d_counts.begin()); + d_tx_value_counts = std::move(d_counts); + } + + return std::move(d_tx_value_counts); +} + +// inline to suppress a complaint about ODR violation +inline std::tuple, + std::vector, + std::vector, + std::vector, + std::vector, + std::vector> +compute_tx_rx_counts_offsets_ranks(raft::comms::comms_t const &comm, + rmm::device_uvector const &d_tx_value_counts, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + rmm::device_uvector d_rx_value_counts(comm_size, stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released. + std::vector tx_counts(comm_size, size_t{1}); + std::vector tx_offsets(comm_size); + std::iota(tx_offsets.begin(), tx_offsets.end(), size_t{0}); + std::vector tx_dst_ranks(comm_size); + std::iota(tx_dst_ranks.begin(), tx_dst_ranks.end(), int{0}); + std::vector rx_counts(comm_size, size_t{1}); + std::vector rx_offsets(comm_size); + std::iota(rx_offsets.begin(), rx_offsets.end(), size_t{0}); + std::vector rx_src_ranks(comm_size); + std::iota(rx_src_ranks.begin(), rx_src_ranks.end(), int{0}); + device_multicast_sendrecv(comm, + d_tx_value_counts.data(), + tx_counts, + tx_offsets, + tx_dst_ranks, + d_rx_value_counts.data(), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + raft::update_host(tx_counts.data(), d_tx_value_counts.data(), comm_size, stream); + raft::update_host(rx_counts.data(), d_rx_value_counts.data(), comm_size, stream); + + CUDA_TRY(cudaStreamSynchronize(stream)); // rx_counts should be up-to-date + + std::partial_sum(tx_counts.begin(), tx_counts.end() - 1, tx_offsets.begin() + 1); + std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, rx_offsets.begin() + 1); + + int num_tx_dst_ranks{0}; + int num_rx_src_ranks{0}; + for (int i = 0; i < comm_size; ++i) { + if (tx_counts[i] != 0) { + tx_counts[num_tx_dst_ranks] = tx_counts[i]; + tx_offsets[num_tx_dst_ranks] = tx_offsets[i]; + tx_dst_ranks[num_tx_dst_ranks] = tx_dst_ranks[i]; + ++num_tx_dst_ranks; + } + if (rx_counts[i] != 0) { + rx_counts[num_rx_src_ranks] = rx_counts[i]; + rx_offsets[num_rx_src_ranks] = rx_offsets[i]; + rx_src_ranks[num_rx_src_ranks] = rx_src_ranks[i]; + ++num_rx_src_ranks; + } + } + tx_counts.resize(num_tx_dst_ranks); + tx_offsets.resize(num_tx_dst_ranks); + tx_dst_ranks.resize(num_tx_dst_ranks); + rx_counts.resize(num_rx_src_ranks); + rx_offsets.resize(num_rx_src_ranks); + rx_src_ranks.resize(num_rx_src_ranks); + + return std::make_tuple(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks); +} + +} // namespace detail + +template +auto shuffle_values(raft::comms::comms_t const &comm, + TxValueIterator tx_value_first, + std::vector const &tx_value_counts, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + rmm::device_uvector d_tx_value_counts(comm_size, stream); + raft::update_device(d_tx_value_counts.data(), tx_value_counts.data(), comm_size, stream); + + CUDA_TRY(cudaStreamSynchronize(stream)); // tx_value_counts should be up-to-date + + std::vector tx_counts{}; + std::vector tx_offsets{}; + std::vector tx_dst_ranks{}; + std::vector rx_counts{}; + std::vector rx_offsets{}; + std::vector rx_src_ranks{}; + std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + + auto rx_value_buffer = + allocate_dataframe_buffer::value_type>( + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv( + comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + get_dataframe_buffer_begin::value_type>( + rx_value_buffer), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + return std::make_tuple(std::move(rx_value_buffer), rx_counts); +} + +template +auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm, + ValueIterator tx_value_first /* [INOUT */, + ValueIterator tx_value_last /* [INOUT */, + ValueToGPUIdOp value_to_gpu_id_op, + cudaStream_t stream) +{ + auto const comm_size = comm.get_size(); + + auto d_tx_value_counts = + detail::sort_and_count(comm, tx_value_first, tx_value_last, value_to_gpu_id_op, stream); + + std::vector tx_counts{}; + std::vector tx_offsets{}; + std::vector tx_dst_ranks{}; + std::vector rx_counts{}; + std::vector rx_offsets{}; + std::vector rx_src_ranks{}; + std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + + auto rx_value_buffer = + allocate_dataframe_buffer::value_type>( + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv( + comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + get_dataframe_buffer_begin::value_type>( + rx_value_buffer), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + return std::make_tuple(std::move(rx_value_buffer), rx_counts); +} + +template +auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm, + VertexIterator tx_key_first /* [INOUT */, + VertexIterator tx_key_last /* [INOUT */, + ValueIterator tx_value_first /* [INOUT */, + KeyToGPUIdOp key_to_gpu_id_op, + cudaStream_t stream) +{ + auto d_tx_value_counts = detail::sort_and_count( + comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream); + + std::vector tx_counts{}; + std::vector tx_offsets{}; + std::vector tx_dst_ranks{}; + std::vector rx_counts{}; + std::vector rx_offsets{}; + std::vector rx_src_ranks{}; + std::tie(tx_counts, tx_offsets, tx_dst_ranks, rx_counts, rx_offsets, rx_src_ranks) = + detail::compute_tx_rx_counts_offsets_ranks(comm, d_tx_value_counts, stream); + + rmm::device_uvector::value_type> rx_keys( + rx_offsets.size() > 0 ? rx_offsets.back() + rx_counts.back() : size_t{0}, stream); + auto rx_value_buffer = + allocate_dataframe_buffer::value_type>( + rx_keys.size(), stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv(comm, + tx_key_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + rx_keys.begin(), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + // FIXME: this needs to be replaced with AlltoAll once NCCL 2.8 is released + // (if num_tx_dst_ranks == num_rx_src_ranks == comm_size). + device_multicast_sendrecv( + comm, + tx_value_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + get_dataframe_buffer_begin::value_type>( + rx_value_buffer), + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + + return std::make_tuple(std::move(rx_keys), std::move(rx_value_buffer), rx_counts); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/centrality/betweenness_centrality.cu b/cpp/src/centrality/betweenness_centrality.cu index 8ff62f7ddb6..c0a34de5f70 100644 --- a/cpp/src/centrality/betweenness_centrality.cu +++ b/cpp/src/centrality/betweenness_centrality.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -116,7 +116,7 @@ void verify_betweenness_centrality_input(result_t *result, static_assert(std::is_same::value || std::is_same::value, "result_t should be float or double"); - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: betwenness pointer is NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: betwenness pointer is NULL"); CUGRAPH_EXPECTS(number_of_sources >= 0, "Number of sources must be positive or equal to 0."); if (number_of_sources != 0) { CUGRAPH_EXPECTS(sources != nullptr, diff --git a/cpp/src/community/dendrogram.cuh b/cpp/src/community/dendrogram.cuh new file mode 100644 index 00000000000..414f5f3854d --- /dev/null +++ b/cpp/src/community/dendrogram.cuh @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +namespace cugraph { + +template +class Dendrogram { + public: + void add_level(vertex_t num_verts, + cudaStream_t stream = 0, + rmm::mr::device_memory_resource *mr = rmm::mr::get_current_device_resource()) + { + level_ptr_.push_back( + std::make_unique(num_verts * sizeof(vertex_t), stream, mr)); + level_size_.push_back(num_verts); + } + + size_t current_level() const { return level_size_.size() - 1; } + + size_t num_levels() const { return level_size_.size(); } + + vertex_t const *get_level_ptr_nocheck(size_t level) const + { + return static_cast(level_ptr_[level]->data()); + } + + vertex_t *get_level_ptr_nocheck(size_t level) + { + return static_cast(level_ptr_[level]->data()); + } + + vertex_t get_level_size_nocheck(size_t level) const { return level_size_[level]; } + + vertex_t const *current_level_begin() const { return get_level_ptr_nocheck(current_level()); } + + vertex_t const *current_level_end() const { return current_level_begin() + current_level_size(); } + + vertex_t *current_level_begin() { return get_level_ptr_nocheck(current_level()); } + + vertex_t *current_level_end() { return current_level_begin() + current_level_size(); } + + vertex_t current_level_size() const { return get_level_size_nocheck(current_level()); } + + private: + std::vector level_size_; + std::vector> level_ptr_; +}; + +} // namespace cugraph diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ecg.cu similarity index 70% rename from cpp/src/community/ECG.cu rename to cpp/src/community/ecg.cu index ce7e9dd1ad2..994204ecd32 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ecg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,13 +15,15 @@ */ #include +#include +#include +#include +#include #include #include -#include + #include -#include -#include "utilities/graph_utils.cuh" namespace { template @@ -41,26 +43,23 @@ binsearch_maxle(const IndexType *vec, const IndexType val, IndexType low, IndexT } } +// FIXME: This shouldn't need to be a custom kernel, this +// seems like it should just be a thrust::transform template -__global__ void match_check_kernel(IdxT size, - IdxT num_verts, - IdxT *offsets, - IdxT *indices, - IdxT *permutation, - IdxT *parts, - ValT *weights) +__global__ void match_check_kernel( + IdxT size, IdxT num_verts, IdxT *offsets, IdxT *indices, IdxT *parts, ValT *weights) { IdxT tid = blockIdx.x * blockDim.x + threadIdx.x; while (tid < size) { IdxT source = binsearch_maxle(offsets, tid, (IdxT)0, num_verts); IdxT dest = indices[tid]; - if (parts[permutation[source]] == parts[permutation[dest]]) weights[tid] += 1; + if (parts[source] == parts[dest]) weights[tid] += 1; tid += gridDim.x * blockDim.x; } } struct prg { - __host__ __device__ float operator()(int n) + __device__ float operator()(int n) { thrust::default_random_engine rng; thrust::uniform_real_distribution dist(0.0, 1.0); @@ -93,7 +92,7 @@ struct update_functor { template void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream) { - rmm::device_vector randoms_v(size); + rmm::device_uvector randoms_v(size, stream); thrust::counting_iterator index(seed); thrust::transform( @@ -103,6 +102,31 @@ void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream) rmm::exec_policy(stream)->on(stream), randoms_v.begin(), randoms_v.end(), permutation); } +template +class EcgLouvain : public cugraph::Louvain { + public: + using graph_t = graph_type; + using vertex_t = typename graph_type::vertex_type; + using edge_t = typename graph_type::edge_type; + using weight_t = typename graph_type::weight_type; + + EcgLouvain(raft::handle_t const &handle, graph_type const &graph, vertex_t seed) + : cugraph::Louvain(handle, graph), seed_(seed) + { + } + + void initialize_dendrogram_level(vertex_t num_vertices) override + { + this->dendrogram_->add_level(num_vertices); + + get_permutation_vector( + num_vertices, seed_, this->dendrogram_->current_level_begin(), this->stream_); + } + + private: + vertex_t seed_; +}; + } // anonymous namespace namespace cugraph { @@ -114,36 +138,34 @@ void ecg(raft::handle_t const &handle, vertex_t ensemble_size, vertex_t *clustering) { - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL"); + using graph_type = GraphCSRView; + + CUGRAPH_EXPECTS(graph.edge_data != nullptr, + "Invalid input argument: ecg expects a weighted graph"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is NULL, should be a device pointer to " + "memory for storing the result"); cudaStream_t stream{0}; - rmm::device_vector ecg_weights_v(graph.edge_data, - graph.edge_data + graph.number_of_edges); + rmm::device_uvector ecg_weights_v(graph.number_of_edges, handle.get_stream()); + + thrust::copy(rmm::exec_policy(stream)->on(stream), + graph.edge_data, + graph.edge_data + graph.number_of_edges, + ecg_weights_v.data()); vertex_t size{graph.number_of_vertices}; - vertex_t seed{1}; - auto permuted_graph = std::make_unique>( - size, graph.number_of_edges, graph.has_data()); + // FIXME: This seed should be a parameter + vertex_t seed{1}; // Iterate over each member of the ensemble for (vertex_t i = 0; i < ensemble_size; i++) { - // Take random permutation of the graph - rmm::device_vector permutation_v(size); - vertex_t *d_permutation = permutation_v.data().get(); - - get_permutation_vector(size, seed, d_permutation, stream); + EcgLouvain runner(handle, graph, seed); seed += size; - detail::permute_graph(graph, d_permutation, permuted_graph->view()); - - // Run one level of Louvain clustering on the random permutation - rmm::device_vector parts_v(size); - vertex_t *d_parts = parts_v.data().get(); - - cugraph::louvain(handle, permuted_graph->view(), d_parts, size_t{1}); + weight_t wt = runner(size_t{1}, weight_t{1}); // For each edge in the graph determine whether the endpoints are in the same partition // Keep a sum for each edge of the total number of times its endpoints are in the same partition @@ -154,17 +176,16 @@ void ecg(raft::handle_t const &handle, graph.number_of_vertices, graph.offsets, graph.indices, - permutation_v.data().get(), - d_parts, - ecg_weights_v.data().get()); + runner.get_dendrogram().get_level_ptr_nocheck(0), + ecg_weights_v.data()); } // Set weights = min_weight + (1 - min-weight)*sum/ensemble_size update_functor uf(min_weight, ensemble_size); thrust::transform(rmm::exec_policy(stream)->on(stream), - ecg_weights_v.data().get(), - ecg_weights_v.data().get() + graph.number_of_edges, - ecg_weights_v.data().get(), + ecg_weights_v.begin(), + ecg_weights_v.end(), + ecg_weights_v.begin(), uf); // Run Louvain on the original graph using the computed weights @@ -172,7 +193,7 @@ void ecg(raft::handle_t const &handle, GraphCSRView louvain_graph; louvain_graph.indices = graph.indices; louvain_graph.offsets = graph.offsets; - louvain_graph.edge_data = ecg_weights_v.data().get(); + louvain_graph.edge_data = ecg_weights_v.data(); louvain_graph.number_of_vertices = graph.number_of_vertices; louvain_graph.number_of_edges = graph.number_of_edges; diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu new file mode 100644 index 00000000000..fa788aa307b --- /dev/null +++ b/cpp/src/community/egonet.cu @@ -0,0 +1,210 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +// Alex Fender afender@nvida.com +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include "experimental/graph.hpp" +#include "utilities/graph_utils.cuh" + +#include +#include + +namespace { + +/* +Description +Let the egonet graph of a node x be the subgraph that includes node x, the neighborhood of x, and +all edges between them. Naive algorithm +- Add center node x to the graph. +- Go through all the neighbors y of this center node x, add edge (x, y) to the graph. +- For each neighbor y of center node x, go through all the neighbors z of center node x, if there is +an edge between y and z in original graph, add edge (y, z) to our new graph. + +Rather than doing custom one/two hops features, we propose a generic k-hops solution leveraging BFS +cutoff and subgraph extraction +*/ + +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract( + raft::handle_t const &handle, + cugraph::experimental::graph_view_t const &csr_view, + vertex_t *source_vertex, + vertex_t n_subgraphs, + vertex_t radius) +{ + auto v = csr_view.get_number_of_vertices(); + auto e = csr_view.get_number_of_edges(); + auto stream = handle.get_stream(); + float avg_degree = e / v; + rmm::device_vector neighbors_offsets(n_subgraphs + 1); + rmm::device_vector neighbors; + + // It is the right thing to accept device memory for source_vertex + // FIXME consider adding a device API to BFS (ie. accept source on the device) + std::vector h_source_vertex(n_subgraphs); + raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, stream); + + // reserve some reasonable memory, but could grow larger than that + neighbors.reserve(v + avg_degree * n_subgraphs * radius); + neighbors_offsets[0] = 0; + // each source should be done concurently in the future + for (vertex_t i = 0; i < n_subgraphs; i++) { + // BFS with cutoff + rmm::device_vector reached(v); + rmm::device_vector predecessors(v); // not used + bool direction_optimizing = false; + cugraph::experimental::bfs(handle, + csr_view, + reached.data().get(), + predecessors.data().get(), + h_source_vertex[i], + direction_optimizing, + radius); + + // identify reached vertex ids from distance array + thrust::transform(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(v), + reached.begin(), + reached.begin(), + [sentinel = std::numeric_limits::max()] __device__( + auto id, auto val) { return val < sentinel ? id : sentinel; }); + + // removes unreached data + auto reached_end = thrust::remove(rmm::exec_policy(stream)->on(stream), + reached.begin(), + reached.end(), + std::numeric_limits::max()); + + // update extraction input + size_t n_reached = thrust::distance(reached.begin(), reached_end); + neighbors_offsets[i + 1] = neighbors_offsets[i] + n_reached; + if (neighbors_offsets[i + 1] > neighbors.capacity()) + neighbors.reserve(neighbors_offsets[i + 1] * 2); + neighbors.insert(neighbors.end(), reached.begin(), reached_end); + } + + // extract + return cugraph::experimental::extract_induced_subgraphs( + handle, csr_view, neighbors_offsets.data().get(), neighbors.data().get(), n_subgraphs); +} +} // namespace +namespace cugraph { +namespace experimental { +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t *source_vertex, + vertex_t n_subgraphs, + vertex_t radius) +{ + if (multi_gpu) { + CUGRAPH_FAIL("Unimplemented."); + return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream())); + } + CUGRAPH_EXPECTS(n_subgraphs > 0, "Need at least one source to extract the egonet from"); + CUGRAPH_EXPECTS(n_subgraphs < graph_view.get_number_of_vertices(), + "Can't have more sources to extract from than vertices in the graph"); + CUGRAPH_EXPECTS(radius > 0, "Radius should be at least 1"); + CUGRAPH_EXPECTS(radius < graph_view.get_number_of_vertices(), "radius is too large"); + // source_vertex range is checked in bfs. + + return extract( + handle, graph_view, source_vertex, n_subgraphs, radius); +} + +// SG FP32 +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int32_t *, + int32_t, + int32_t); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int32_t *, + int32_t, + int32_t); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int64_t *, + int64_t, + int64_t); + +// SG FP64 +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int32_t *, + int32_t, + int32_t); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int32_t *, + int32_t, + int32_t); +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_ego(raft::handle_t const &, + graph_view_t const &, + int64_t *, + int64_t, + int64_t); +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/community/extract_subgraph_by_vertex.cu b/cpp/src/community/extract_subgraph_by_vertex.cu index c39b7f8ad0a..eb7b1d494a0 100644 --- a/cpp/src/community/extract_subgraph_by_vertex.cu +++ b/cpp/src/community/extract_subgraph_by_vertex.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -119,7 +119,7 @@ std::unique_ptr> extract_subgraph_vertex(GraphCOOView +#include + +#include +#include + +namespace cugraph { + +template +void partition_at_level(raft::handle_t const &handle, + Dendrogram const &dendrogram, + vertex_t const *d_vertex_ids, + vertex_t *d_partition, + size_t level) +{ + vertex_t local_num_verts = dendrogram.get_level_size_nocheck(0); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + d_vertex_ids, + d_vertex_ids + local_num_verts, + d_partition); + + std::for_each(thrust::make_counting_iterator(0), + thrust::make_counting_iterator(level), + [&handle, &dendrogram, d_vertex_ids, &d_partition, local_num_verts](size_t l) { + cugraph::experimental::relabel( + handle, + std::tuple( + d_vertex_ids, dendrogram.get_level_ptr_nocheck(l)), + dendrogram.get_level_size_nocheck(l), + d_partition, + local_num_verts); + }); +} + +} // namespace cugraph diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 9e5a847cdf0..427e62d3286 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,8 +14,11 @@ * limitations under the License. */ +#include #include +#include + namespace cugraph { template @@ -27,11 +30,29 @@ std::pair leiden(raft::handle_t const &handle, { CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: leiden expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); Leiden> runner(handle, graph); + weight_t wt = runner(max_level, resolution); + + rmm::device_uvector vertex_ids_v(graph.number_of_vertices, handle.get_stream()); + + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(0), // MNMG - base vertex id + thrust::make_counting_iterator( + graph.number_of_vertices), // MNMG - base vertex id + number_of_vertices + vertex_ids_v.begin()); + + partition_at_level(handle, + runner.get_dendrogram(), + vertex_ids_v.data(), + clustering, + runner.get_dendrogram().num_levels()); - return runner(clustering, max_level, resolution); + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } // Explicit template instantations diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index f2f84433284..141f8beac40 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,8 @@ #include +#include + namespace cugraph { template @@ -28,7 +30,8 @@ class Leiden : public Louvain { using weight_t = typename graph_type::weight_type; Leiden(raft::handle_t const &handle, graph_type const &graph) - : Louvain(handle, graph), constraint_v_(graph.number_of_vertices) + : Louvain(handle, graph), + constraint_v_(graph.number_of_vertices, handle.get_stream()) { } @@ -38,22 +41,28 @@ class Leiden : public Louvain { { this->timer_start("update_clustering_constrained"); - rmm::device_vector next_cluster_v(this->cluster_v_); - rmm::device_vector delta_Q_v(graph.number_of_edges); - rmm::device_vector cluster_hash_v(graph.number_of_edges); - rmm::device_vector old_cluster_sum_v(graph.number_of_vertices); + rmm::device_uvector next_cluster_v(this->dendrogram_->current_level_size(), + this->stream_); + rmm::device_uvector delta_Q_v(graph.number_of_edges, this->stream_); + rmm::device_uvector cluster_hash_v(graph.number_of_edges, this->stream_); + rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, this->stream_); - vertex_t const *d_src_indices = this->src_indices_v_.data().get(); + vertex_t const *d_src_indices = this->src_indices_v_.data(); vertex_t const *d_dst_indices = graph.indices; - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - vertex_t *d_cluster = this->cluster_v_.data().get(); - weight_t const *d_vertex_weights = this->vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = this->cluster_weights_v_.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); - vertex_t *d_constraint = constraint_v_.data().get(); + vertex_t *d_cluster_hash = cluster_hash_v.data(); + vertex_t *d_cluster = this->dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = this->vertex_weights_v_.data(); + weight_t *d_cluster_weights = this->cluster_weights_v_.data(); + weight_t *d_delta_Q = delta_Q_v.data(); + vertex_t *d_constraint = constraint_v_.data(); + + thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), + this->dendrogram_->current_level_begin(), + this->dendrogram_->current_level_end(), + next_cluster_v.data()); - weight_t new_Q = - this->modularity(total_edge_weight, resolution, graph, this->cluster_v_.data().get()); + weight_t new_Q = this->modularity( + total_edge_weight, resolution, graph, this->dendrogram_->current_level_begin()); weight_t cur_Q = new_Q - 1; @@ -83,13 +92,13 @@ class Leiden : public Louvain { up_down = !up_down; - new_Q = this->modularity(total_edge_weight, resolution, graph, next_cluster_v.data().get()); + new_Q = this->modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); if (new_Q > cur_Q) { thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), next_cluster_v.begin(), next_cluster_v.end(), - this->cluster_v_.begin()); + this->dendrogram_->current_level_begin()); } } @@ -97,9 +106,7 @@ class Leiden : public Louvain { return cur_Q; } - std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) + weight_t operator()(size_t max_level, weight_t resolution) override { size_t num_level{0}; @@ -109,57 +116,50 @@ class Leiden : public Louvain { weight_t best_modularity = weight_t{-1}; - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end()); - thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end(), - d_cluster_vec); - // // Our copy of the graph. Each iteration of the outer loop will // shrink this copy of the graph. // - GraphCSRView current_graph(this->offsets_v_.data().get(), - this->indices_v_.data().get(), - this->weights_v_.data().get(), + GraphCSRView current_graph(this->offsets_v_.data(), + this->indices_v_.data(), + this->weights_v_.data(), this->number_of_vertices_, this->number_of_edges_); - current_graph.get_source_indices(this->src_indices_v_.data().get()); + current_graph.get_source_indices(this->src_indices_v_.data()); while (num_level < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + this->dendrogram_->add_level(current_graph.number_of_vertices); + + thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), + this->dendrogram_->current_level_begin(), + this->dendrogram_->current_level_end()); + this->compute_vertex_and_cluster_weights(current_graph); weight_t new_Q = this->update_clustering(total_edge_weight, resolution, current_graph); - thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), - this->cluster_v_.begin(), - this->cluster_v_.end(), - constraint_v_.begin()); - new_Q = update_clustering_constrained(total_edge_weight, resolution, current_graph); if (new_Q <= best_modularity) { break; } best_modularity = new_Q; - this->shrink_graph(current_graph, d_cluster_vec); + this->shrink_graph(current_graph); num_level++; } this->timer_display(std::cout); - return std::make_pair(num_level, best_modularity); + return best_modularity; } private: - rmm::device_vector constraint_v_; + rmm::device_uvector constraint_v_; }; } // namespace cugraph diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 81a68a31663..a851777ad93 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,10 +14,13 @@ * limitations under the License. */ +#include #include #include #include +#include + namespace cugraph { namespace detail { @@ -31,10 +34,28 @@ std::pair louvain(raft::handle_t const &handle, { CUGRAPH_EXPECTS(graph_view.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); Louvain> runner(handle, graph_view); - return runner(clustering, max_level, resolution); + weight_t wt = runner(max_level, resolution); + + rmm::device_uvector vertex_ids_v(graph_view.number_of_vertices, handle.get_stream()); + + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_ids_v.begin(), + vertex_ids_v.end(), + vertex_t{0}); + + partition_at_level(handle, + runner.get_dendrogram(), + vertex_ids_v.data(), + clustering, + runner.get_dendrogram().num_levels()); + + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } template @@ -45,7 +66,9 @@ std::pair louvain( size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + CUGRAPH_EXPECTS(clustering != nullptr, + "Invalid input argument: clustering is null, should be a device pointer to " + "memory for storing the result"); // "FIXME": remove this check and the guards below // @@ -61,7 +84,25 @@ std::pair louvain( } else { experimental::Louvain> runner(handle, graph_view); - return runner(clustering, max_level, resolution); + + weight_t wt = runner(max_level, resolution); + + rmm::device_uvector vertex_ids_v(graph_view.get_number_of_vertices(), + handle.get_stream()); + + thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_ids_v.begin(), + vertex_ids_v.end(), + graph_view.get_local_vertex_first()); + + partition_at_level(handle, + runner.get_dendrogram(), + vertex_ids_v.data(), + clustering, + runner.get_dendrogram().num_levels()); + + // FIXME: Consider returning the Dendrogram at some point + return std::make_pair(runner.get_dendrogram().num_levels(), wt); } } diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 7ca3638f42b..e28f0f1746d 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,11 +17,13 @@ #include -#include - #include #include +#include + +#include + //#define TIMING #ifdef TIMING @@ -44,26 +46,42 @@ class Louvain { hr_timer_(), #endif handle_(handle), + dendrogram_(std::make_unique>()), // FIXME: Don't really need to copy here but would need // to change the logic to populate this properly // in generate_superverticies_graph. // - offsets_v_(graph.offsets, graph.offsets + graph.number_of_vertices + 1), - indices_v_(graph.indices, graph.indices + graph.number_of_edges), - weights_v_(graph.edge_data, graph.edge_data + graph.number_of_edges), - src_indices_v_(graph.number_of_edges), - vertex_weights_v_(graph.number_of_vertices), - cluster_weights_v_(graph.number_of_vertices), - cluster_v_(graph.number_of_vertices), - tmp_arr_v_(graph.number_of_vertices), - cluster_inverse_v_(graph.number_of_vertices), + offsets_v_(graph.number_of_vertices + 1, handle.get_stream()), + indices_v_(graph.number_of_edges, handle.get_stream()), + weights_v_(graph.number_of_edges, handle.get_stream()), + src_indices_v_(graph.number_of_edges, handle.get_stream()), + vertex_weights_v_(graph.number_of_vertices, handle.get_stream()), + cluster_weights_v_(graph.number_of_vertices, handle.get_stream()), + tmp_arr_v_(graph.number_of_vertices, handle.get_stream()), + cluster_inverse_v_(graph.number_of_vertices, handle.get_stream()), number_of_vertices_(graph.number_of_vertices), number_of_edges_(graph.number_of_edges), stream_(handle.get_stream()) { + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.offsets, + graph.offsets + graph.number_of_vertices + 1, + offsets_v_.begin()); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.indices, + graph.indices + graph.number_of_edges, + indices_v_.begin()); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + graph.edge_data, + graph.edge_data + graph.number_of_edges, + weights_v_.begin()); } + virtual ~Louvain() {} + weight_t modularity(weight_t total_edge_weight, weight_t resolution, graph_t const &graph, @@ -71,43 +89,45 @@ class Louvain { { vertex_t n_verts = graph.number_of_vertices; - rmm::device_vector inc(n_verts, weight_t{0.0}); - rmm::device_vector deg(n_verts, weight_t{0.0}); + rmm::device_uvector inc(n_verts, stream_); + rmm::device_uvector deg(n_verts, stream_); - edge_t const *d_offsets = graph.offsets; - vertex_t const *d_indices = graph.indices; - weight_t const *d_weights = graph.edge_data; - weight_t *d_inc = inc.data().get(); - weight_t *d_deg = deg.data().get(); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), inc.begin(), inc.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), deg.begin(), deg.end(), weight_t{0.0}); // FIXME: Already have weighted degree computed in main loop, // could pass that in rather than computing d_deg... which // would save an atomicAdd (synchronization) // - thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - [d_inc, d_deg, d_offsets, d_indices, d_weights, d_cluster] __device__(vertex_t v) { - vertex_t community = d_cluster[v]; - weight_t increase{0.0}; - weight_t degree{0.0}; - - for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { - vertex_t neighbor = d_indices[loc]; - degree += d_weights[loc]; - if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } - } + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_vertices), + [d_inc = inc.data(), + d_deg = deg.data(), + d_offsets = graph.offsets, + d_indices = graph.indices, + d_weights = graph.edge_data, + d_cluster] __device__(vertex_t v) { + vertex_t community = d_cluster[v]; + weight_t increase{0.0}; + weight_t degree{0.0}; + + for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { + vertex_t neighbor = d_indices[loc]; + degree += d_weights[loc]; + if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } + } - if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); - if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); - }); + if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); + if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); + }); weight_t Q = thrust::transform_reduce( rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_vertices), - [d_deg, d_inc, total_edge_weight, resolution] __device__(vertex_t community) { + [d_deg = deg.data(), d_inc = inc.data(), total_edge_weight, resolution] __device__( + vertex_t community) { return ((d_inc[community] / total_edge_weight) - resolution * (d_deg[community] * d_deg[community]) / (total_edge_weight * total_edge_weight)); @@ -118,37 +138,35 @@ class Louvain { return Q; } - virtual std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) - { - size_t num_level{0}; + Dendrogram &get_dendrogram() const { return *dendrogram_; } + std::unique_ptr> move_dendrogram() { return dendrogram_; } + + virtual weight_t operator()(size_t max_level, weight_t resolution) + { weight_t total_edge_weight = thrust::reduce(rmm::exec_policy(stream_)->on(stream_), weights_v_.begin(), weights_v_.end()); weight_t best_modularity = weight_t{-1}; - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end()); - thrust::copy( - rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end(), d_cluster_vec); - // // Our copy of the graph. Each iteration of the outer loop will // shrink this copy of the graph. // - GraphCSRView current_graph(offsets_v_.data().get(), - indices_v_.data().get(), - weights_v_.data().get(), + GraphCSRView current_graph(offsets_v_.data(), + indices_v_.data(), + weights_v_.data(), number_of_vertices_, number_of_edges_); - current_graph.get_source_indices(src_indices_v_.data().get()); + current_graph.get_source_indices(src_indices_v_.data()); + + while (dendrogram_->num_levels() < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + initialize_dendrogram_level(current_graph.number_of_vertices); - while (num_level < max_level) { compute_vertex_and_cluster_weights(current_graph); weight_t new_Q = update_clustering(total_edge_weight, resolution, current_graph); @@ -157,14 +175,12 @@ class Louvain { best_modularity = new_Q; - shrink_graph(current_graph, d_cluster_vec); - - num_level++; + shrink_graph(current_graph); } timer_display(std::cout); - return std::make_pair(num_level, best_modularity); + return best_modularity; } protected: @@ -190,6 +206,15 @@ class Louvain { #endif } + virtual void initialize_dendrogram_level(vertex_t num_vertices) + { + dendrogram_->add_level(num_vertices); + + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end()); + } + public: void compute_vertex_and_cluster_weights(graph_type const &graph) { @@ -198,8 +223,8 @@ class Louvain { edge_t const *d_offsets = graph.offsets; vertex_t const *d_indices = graph.indices; weight_t const *d_weights = graph.edge_data; - weight_t *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + weight_t *d_vertex_weights = vertex_weights_v_.data(); + weight_t *d_cluster_weights = cluster_weights_v_.data(); // // MNMG: copy_v_transform_reduce_out_nbr, then copy @@ -226,21 +251,23 @@ class Louvain { { timer_start("update_clustering"); - // - // MNMG: This is the hard one, see writeup - // - rmm::device_vector next_cluster_v(cluster_v_); - rmm::device_vector delta_Q_v(graph.number_of_edges); - rmm::device_vector cluster_hash_v(graph.number_of_edges); - rmm::device_vector old_cluster_sum_v(graph.number_of_vertices); + rmm::device_uvector next_cluster_v(dendrogram_->current_level_size(), stream_); + rmm::device_uvector delta_Q_v(graph.number_of_edges, stream_); + rmm::device_uvector cluster_hash_v(graph.number_of_edges, stream_); + rmm::device_uvector old_cluster_sum_v(graph.number_of_vertices, stream_); + + vertex_t *d_cluster = dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = vertex_weights_v_.data(); + weight_t *d_cluster_weights = cluster_weights_v_.data(); + weight_t *d_delta_Q = delta_Q_v.data(); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - vertex_t *d_cluster = cluster_v_.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end(), + next_cluster_v.data()); - weight_t new_Q = modularity(total_edge_weight, resolution, graph, cluster_v_.data().get()); + weight_t new_Q = + modularity(total_edge_weight, resolution, graph, dendrogram_->current_level_begin()); weight_t cur_Q = new_Q - 1; @@ -259,13 +286,13 @@ class Louvain { up_down = !up_down; - new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data().get()); + new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data()); if (new_Q > cur_Q) { thrust::copy(rmm::exec_policy(stream_)->on(stream_), next_cluster_v.begin(), next_cluster_v.end(), - cluster_v_.begin()); + dendrogram_->current_level_begin()); } } @@ -276,45 +303,37 @@ class Louvain { void compute_delta_modularity(weight_t total_edge_weight, weight_t resolution, graph_type const &graph, - rmm::device_vector &cluster_hash_v, - rmm::device_vector &old_cluster_sum_v, - rmm::device_vector &delta_Q_v) + rmm::device_uvector &cluster_hash_v, + rmm::device_uvector &old_cluster_sum_v, + rmm::device_uvector &delta_Q_v) { - vertex_t const *d_src_indices = src_indices_v_.data().get(); - vertex_t const *d_dst_indices = graph.indices; edge_t const *d_offsets = graph.offsets; weight_t const *d_weights = graph.edge_data; - vertex_t const *d_cluster = cluster_v_.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t const *d_cluster_weights = cluster_weights_v_.data().get(); + vertex_t const *d_cluster = dendrogram_->current_level_begin(); + weight_t const *d_vertex_weights = vertex_weights_v_.data(); + weight_t const *d_cluster_weights = cluster_weights_v_.data(); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); - weight_t *d_old_cluster_sum = old_cluster_sum_v.data().get(); + vertex_t *d_cluster_hash = cluster_hash_v.data(); + weight_t *d_delta_Q = delta_Q_v.data(); + weight_t *d_old_cluster_sum = old_cluster_sum_v.data(); weight_t *d_new_cluster_sum = d_delta_Q; - thrust::fill(cluster_hash_v.begin(), cluster_hash_v.end(), vertex_t{-1}); - thrust::fill(delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); - thrust::fill(old_cluster_sum_v.begin(), old_cluster_sum_v.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + cluster_hash_v.begin(), + cluster_hash_v.end(), + vertex_t{-1}); + thrust::fill( + rmm::exec_policy(stream_)->on(stream_), delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + old_cluster_sum_v.begin(), + old_cluster_sum_v.end(), + weight_t{0.0}); - // MNMG: New technique using reduce_by_key. Would require a segmented sort - // or a pair of sorts on each node, so probably slower than what's here. - // This might still be faster even in MNMG... - // - // - // FIXME: Eventually this should use cuCollections concurrent map - // implementation, but that won't be available for a while. - // - // For each source vertex, we're going to build a hash - // table to the destination cluster ids. We can use - // the offsets ranges to define the bounds of the hash - // table. - // thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), - [d_src_indices, - d_dst_indices, + [d_src_indices = src_indices_v_.data(), + d_dst_indices = graph.indices, d_cluster, d_offsets, d_cluster_hash, @@ -355,7 +374,7 @@ class Louvain { [total_edge_weight, resolution, d_cluster_hash, - d_src_indices, + d_src_indices = src_indices_v_.data(), d_cluster, d_vertex_weights, d_delta_Q, @@ -383,33 +402,37 @@ class Louvain { } void assign_nodes(graph_type const &graph, - rmm::device_vector &cluster_hash_v, - rmm::device_vector &next_cluster_v, - rmm::device_vector &delta_Q_v, + rmm::device_uvector &cluster_hash_v, + rmm::device_uvector &next_cluster_v, + rmm::device_uvector &delta_Q_v, bool up_down) { - rmm::device_vector temp_vertices_v(graph.number_of_vertices); - rmm::device_vector temp_cluster_v(graph.number_of_vertices, vertex_t{-1}); - rmm::device_vector temp_delta_Q_v(graph.number_of_vertices, weight_t{0.0}); + rmm::device_uvector temp_vertices_v(graph.number_of_vertices, stream_); + rmm::device_uvector temp_cluster_v(graph.number_of_vertices, stream_); + rmm::device_uvector temp_delta_Q_v(graph.number_of_vertices, stream_); + + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + temp_cluster_v.begin(), + temp_cluster_v.end(), + vertex_t{-1}); - weight_t *d_delta_Q = delta_Q_v.data().get(); - vertex_t *d_next_cluster = next_cluster_v.data().get(); - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); - weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + temp_delta_Q_v.begin(), + temp_delta_Q_v.end(), + weight_t{0}); auto cluster_reduce_iterator = - thrust::make_zip_iterator(thrust::make_tuple(d_cluster_hash, d_delta_Q)); + thrust::make_zip_iterator(thrust::make_tuple(cluster_hash_v.begin(), delta_Q_v.begin())); - auto output_edge_iterator2 = thrust::make_zip_iterator( - thrust::make_tuple(temp_cluster_v.data().get(), temp_delta_Q_v.data().get())); + auto output_edge_iterator2 = + thrust::make_zip_iterator(thrust::make_tuple(temp_cluster_v.begin(), temp_delta_Q_v.begin())); auto cluster_reduce_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), src_indices_v_.begin(), src_indices_v_.end(), cluster_reduce_iterator, - temp_vertices_v.data().get(), + temp_vertices_v.data(), output_edge_iterator2, thrust::equal_to(), [] __device__(auto pair1, auto pair2) { @@ -422,22 +445,18 @@ class Louvain { return pair2; }); - vertex_t final_size = thrust::distance(temp_vertices_v.data().get(), cluster_reduce_end.first); - - vertex_t *d_temp_vertices = temp_vertices_v.data().get(); - vertex_t *d_temp_clusters = temp_cluster_v.data().get(); - weight_t *d_temp_delta_Q = temp_delta_Q_v.data().get(); + vertex_t final_size = thrust::distance(temp_vertices_v.data(), cluster_reduce_end.first); thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(final_size), - [d_temp_delta_Q, - up_down, - d_next_cluster, - d_temp_vertices, - d_vertex_weights, - d_temp_clusters, - d_cluster_weights] __device__(vertex_t id) { + [up_down, + d_temp_delta_Q = temp_delta_Q_v.data(), + d_next_cluster = next_cluster_v.data(), + d_temp_vertices = temp_vertices_v.data(), + d_vertex_weights = vertex_weights_v_.data(), + d_temp_clusters = temp_cluster_v.data(), + d_cluster_weights = cluster_weights_v_.data()] __device__(vertex_t id) { if ((d_temp_clusters[id] >= 0) && (d_temp_delta_Q[id] > weight_t{0.0})) { vertex_t new_cluster = d_temp_clusters[id]; vertex_t old_cluster = d_next_cluster[d_temp_vertices[id]]; @@ -453,38 +472,38 @@ class Louvain { }); } - void shrink_graph(graph_t &graph, vertex_t *d_cluster_vec) + void shrink_graph(graph_t &graph) { timer_start("shrinking graph"); // renumber the clusters to the range 0..(num_clusters-1) - vertex_t num_clusters = renumber_clusters(d_cluster_vec); - cluster_weights_v_.resize(num_clusters); + vertex_t num_clusters = renumber_clusters(); + cluster_weights_v_.resize(num_clusters, stream_); // shrink our graph to represent the graph of supervertices generate_superverticies_graph(graph, num_clusters); - // assign each new vertex to its own cluster - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end()); - timer_stop(stream_); } - vertex_t renumber_clusters(vertex_t *d_cluster_vec) + vertex_t renumber_clusters() { - vertex_t *d_tmp_array = tmp_arr_v_.data().get(); - vertex_t *d_cluster_inverse = cluster_inverse_v_.data().get(); - vertex_t *d_cluster = cluster_v_.data().get(); + vertex_t *d_tmp_array = tmp_arr_v_.data(); + vertex_t *d_cluster_inverse = cluster_inverse_v_.data(); + vertex_t *d_cluster = dendrogram_->current_level_begin(); - vertex_t old_num_clusters = cluster_v_.size(); + vertex_t old_num_clusters = dendrogram_->current_level_size(); // // New technique. Initialize cluster_inverse_v_ to 0 // - thrust::fill(cluster_inverse_v_.begin(), cluster_inverse_v_.end(), vertex_t{0}); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + cluster_inverse_v_.begin(), + cluster_inverse_v_.end(), + vertex_t{0}); // - // Iterate over every element c in cluster_v_ and set cluster_inverse_v to 1 + // Iterate over every element c in the current clustering and set cluster_inverse_v to 1 // auto first_1 = thrust::make_constant_iterator(1); auto last_1 = first_1 + old_num_clusters; @@ -492,7 +511,7 @@ class Louvain { thrust::scatter(rmm::exec_policy(stream_)->on(stream_), first_1, last_1, - cluster_v_.begin(), + dendrogram_->current_level_begin(), cluster_inverse_v_.begin()); // @@ -506,7 +525,7 @@ class Louvain { [d_cluster_inverse] __device__(const vertex_t idx) { return d_cluster_inverse[idx] == 1; }); vertex_t new_num_clusters = thrust::distance(tmp_arr_v_.begin(), copy_end); - tmp_arr_v_.resize(new_num_clusters); + tmp_arr_v_.resize(new_num_clusters, stream_); // // Now we can set each value in cluster_inverse of a cluster to its index @@ -525,32 +544,16 @@ class Louvain { d_cluster[i] = d_cluster_inverse[d_cluster[i]]; }); - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(number_of_vertices_), - [d_cluster_vec, d_cluster] __device__(vertex_t i) { - d_cluster_vec[i] = d_cluster[d_cluster_vec[i]]; - }); - - cluster_inverse_v_.resize(new_num_clusters); - cluster_v_.resize(new_num_clusters); + cluster_inverse_v_.resize(new_num_clusters, stream_); return new_num_clusters; } void generate_superverticies_graph(graph_t &graph, vertex_t num_clusters) { - rmm::device_vector new_src_v(graph.number_of_edges); - rmm::device_vector new_dst_v(graph.number_of_edges); - rmm::device_vector new_weight_v(graph.number_of_edges); - - vertex_t *d_old_src = src_indices_v_.data().get(); - vertex_t *d_old_dst = graph.indices; - weight_t *d_old_weight = graph.edge_data; - vertex_t *d_new_src = new_src_v.data().get(); - vertex_t *d_new_dst = new_dst_v.data().get(); - vertex_t *d_clusters = cluster_v_.data().get(); - weight_t *d_new_weight = new_weight_v.data().get(); + rmm::device_uvector new_src_v(graph.number_of_edges, stream_); + rmm::device_uvector new_dst_v(graph.number_of_edges, stream_); + rmm::device_uvector new_weight_v(graph.number_of_edges, stream_); // // Renumber the COO @@ -558,13 +561,13 @@ class Louvain { thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(graph.number_of_edges), - [d_old_src, - d_old_dst, - d_old_weight, - d_new_src, - d_new_dst, - d_new_weight, - d_clusters] __device__(edge_t e) { + [d_old_src = src_indices_v_.data(), + d_old_dst = graph.indices, + d_old_weight = graph.edge_data, + d_new_src = new_src_v.data(), + d_new_dst = new_dst_v.data(), + d_new_weight = new_weight_v.data(), + d_clusters = dendrogram_->current_level_begin()] __device__(edge_t e) { d_new_src[e] = d_clusters[d_old_src[e]]; d_new_dst[e] = d_clusters[d_old_dst[e]]; d_new_weight[e] = d_old_weight[e]; @@ -572,39 +575,42 @@ class Louvain { thrust::stable_sort_by_key( rmm::exec_policy(stream_)->on(stream_), - d_new_dst, - d_new_dst + graph.number_of_edges, - thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_weight))); + new_dst_v.begin(), + new_dst_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_weight_v.begin()))); thrust::stable_sort_by_key( rmm::exec_policy(stream_)->on(stream_), - d_new_src, - d_new_src + graph.number_of_edges, - thrust::make_zip_iterator(thrust::make_tuple(d_new_dst, d_new_weight))); + new_src_v.begin(), + new_src_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(new_dst_v.begin(), new_weight_v.begin()))); // // Now we reduce by key to combine the weights of duplicate // edges. // - auto start = thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_dst)); - auto new_start = thrust::make_zip_iterator(thrust::make_tuple(d_old_src, d_old_dst)); - auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), + auto start = + thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_dst_v.begin())); + auto new_start = + thrust::make_zip_iterator(thrust::make_tuple(src_indices_v_.data(), graph.indices)); + auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), start, start + graph.number_of_edges, - d_new_weight, + new_weight_v.begin(), new_start, - d_old_weight, + graph.edge_data, thrust::equal_to>(), thrust::plus()); graph.number_of_edges = thrust::distance(new_start, new_end.first); graph.number_of_vertices = num_clusters; - detail::fill_offset(d_old_src, graph.offsets, num_clusters, graph.number_of_edges, stream_); + detail::fill_offset( + src_indices_v_.data(), graph.offsets, num_clusters, graph.number_of_edges, stream_); CHECK_CUDA(stream_); - src_indices_v_.resize(graph.number_of_edges); - indices_v_.resize(graph.number_of_edges); - weights_v_.resize(graph.number_of_edges); + src_indices_v_.resize(graph.number_of_edges, stream_); + indices_v_.resize(graph.number_of_edges, stream_); + weights_v_.resize(graph.number_of_edges, stream_); } protected: @@ -613,27 +619,28 @@ class Louvain { edge_t number_of_edges_; cudaStream_t stream_; + std::unique_ptr> dendrogram_; + // // Copy of graph // - rmm::device_vector offsets_v_; - rmm::device_vector indices_v_; - rmm::device_vector weights_v_; - rmm::device_vector src_indices_v_; + rmm::device_uvector offsets_v_; + rmm::device_uvector indices_v_; + rmm::device_uvector weights_v_; + rmm::device_uvector src_indices_v_; // // Weights and clustering across iterations of algorithm // - rmm::device_vector vertex_weights_v_; - rmm::device_vector cluster_weights_v_; - rmm::device_vector cluster_v_; + rmm::device_uvector vertex_weights_v_; + rmm::device_uvector cluster_weights_v_; // // Temporaries used within kernels. Each iteration uses less // of this memory // - rmm::device_vector tmp_arr_v_; - rmm::device_vector cluster_inverse_v_; + rmm::device_uvector tmp_arr_v_; + rmm::device_uvector cluster_inverse_v_; #ifdef TIMING HighResTimer hr_timer_; diff --git a/cpp/src/components/connectivity.cu b/cpp/src/components/connectivity.cu index 2cc1da017a9..f4c7bf1d35c 100644 --- a/cpp/src/components/connectivity.cu +++ b/cpp/src/components/connectivity.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -64,8 +64,8 @@ std::enable_if_t::value> connected_components_impl( { using ByteT = unsigned char; // minimum addressable unit - CUGRAPH_EXPECTS(graph.offsets != nullptr, "Invalid API parameter: graph.offsets is nullptr"); - CUGRAPH_EXPECTS(graph.indices != nullptr, "Invalid API parameter: graph.indices is nullptr"); + CUGRAPH_EXPECTS(graph.offsets != nullptr, "Invalid input argument: graph.offsets is nullptr"); + CUGRAPH_EXPECTS(graph.indices != nullptr, "Invalid input argument: graph.indices is nullptr"); VT nrows = graph.number_of_vertices; @@ -90,7 +90,7 @@ void connected_components(GraphCSRView const &graph, { cudaStream_t stream{nullptr}; - CUGRAPH_EXPECTS(labels != nullptr, "Invalid API parameter: labels parameter is NULL"); + CUGRAPH_EXPECTS(labels != nullptr, "Invalid input argument: labels parameter is NULL"); return detail::connected_components_impl(graph, connectivity_type, labels, stream); } diff --git a/cpp/src/cores/core_number.cu b/cpp/src/cores/core_number.cu index cd2b928a81e..091ba07ccc6 100644 --- a/cpp/src/cores/core_number.cu +++ b/cpp/src/cores/core_number.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -151,9 +151,9 @@ std::unique_ptr> k_core(GraphCOOView const &in_ VT num_vertex_ids, rmm::mr::device_memory_resource *mr) { - CUGRAPH_EXPECTS(vertex_id != nullptr, "Invalid API parameter: vertex_id is NULL"); - CUGRAPH_EXPECTS(core_number != nullptr, "Invalid API parameter: core_number is NULL"); - CUGRAPH_EXPECTS(k >= 0, "Invalid API parameter: k must be >= 0"); + CUGRAPH_EXPECTS(vertex_id != nullptr, "Invalid input argument: vertex_id is NULL"); + CUGRAPH_EXPECTS(core_number != nullptr, "Invalid input argument: core_number is NULL"); + CUGRAPH_EXPECTS(k >= 0, "Invalid input argument: k must be >= 0"); return detail::extract_subgraph(in_graph, vertex_id, core_number, k, num_vertex_ids, mr); } diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index f297587a1d6..7adfbd7fbd7 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -165,8 +165,6 @@ void bfs(raft::handle_t const &handle, handle.get_stream())); // this is as necessary vertex_frontier will become out-of-scope once // this function returns (FIXME: should I stream sync in VertexFrontier // destructor?) - - return; } } // namespace detail diff --git a/cpp/src/experimental/coarsen_graph.cu b/cpp/src/experimental/coarsen_graph.cu new file mode 100644 index 00000000000..0cd551b0d73 --- /dev/null +++ b/cpp/src/experimental/coarsen_graph.cu @@ -0,0 +1,713 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_edgelist(edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t major_first, + vertex_t major_last, + cudaStream_t stream) +{ + edge_t number_of_edges{0}; + raft::update_host( + &number_of_edges, compressed_sparse_offsets + (major_last - major_first), 1, stream); + CUDA_TRY(cudaStreamSynchronize(stream)); + rmm::device_uvector edgelist_major_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_minor_vertices(number_of_edges, stream); + rmm::device_uvector edgelist_weights( + compressed_sparse_weights != nullptr ? number_of_edges : 0, stream); + + // FIXME: this is highly inefficient for very high-degree vertices, for better performance, we can + // fill high-degree vertices using one CUDA block per vertex, mid-degree vertices using one CUDA + // warp per vertex, and low-degree vertices using one CUDA thread per block + thrust::for_each(rmm::exec_policy(stream)->on(stream), + thrust::make_counting_iterator(major_first), + thrust::make_counting_iterator(major_last), + [compressed_sparse_offsets, + major_first, + p_majors = edgelist_major_vertices.begin()] __device__(auto v) { + auto first = compressed_sparse_offsets[v - major_first]; + auto last = compressed_sparse_offsets[v - major_first + 1]; + thrust::fill(thrust::seq, p_majors + first, p_majors + last, v); + }); + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_indices, + compressed_sparse_indices + number_of_edges, + edgelist_minor_vertices.begin()); + if (compressed_sparse_weights != nullptr) { + thrust::copy(rmm::exec_policy(stream)->on(stream), + compressed_sparse_weights, + compressed_sparse_weights + number_of_edges, + edgelist_weights.data()); + } + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +template +void sort_and_coarsen_edgelist(rmm::device_uvector &edgelist_major_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_minor_vertices /* [INOUT] */, + rmm::device_uvector &edgelist_weights /* [INOUT] */, + cudaStream_t stream) +{ + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + + size_t number_of_edges{0}; + if (edgelist_weights.size() > 0) { + thrust::sort_by_key(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin()); + + rmm::device_uvector tmp_edgelist_major_vertices(edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_minor_vertices(tmp_edgelist_major_vertices.size(), + stream); + rmm::device_uvector tmp_edgelist_weights(tmp_edgelist_major_vertices.size(), stream); + auto it = thrust::reduce_by_key( + rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + edgelist_weights.begin(), + thrust::make_zip_iterator(thrust::make_tuple(tmp_edgelist_major_vertices.begin(), + tmp_edgelist_minor_vertices.begin())), + tmp_edgelist_weights.begin()); + number_of_edges = thrust::distance(tmp_edgelist_weights.begin(), thrust::get<1>(it)); + + edgelist_major_vertices = std::move(tmp_edgelist_major_vertices); + edgelist_minor_vertices = std::move(tmp_edgelist_minor_vertices); + edgelist_weights = std::move(tmp_edgelist_weights); + } else { + thrust::sort(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size()); + auto it = thrust::unique(rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size()); + number_of_edges = thrust::distance(pair_first, it); + } + + edgelist_major_vertices.resize(number_of_edges, stream); + edgelist_minor_vertices.resize(number_of_edges, stream); + edgelist_weights.resize(number_of_edges, stream); + edgelist_major_vertices.shrink_to_fit(stream); + edgelist_minor_vertices.shrink_to_fit(stream); + edgelist_weights.shrink_to_fit(stream); +} + +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + edge_t const *compressed_sparse_offsets, + vertex_t const *compressed_sparse_indices, + weight_t const *compressed_sparse_weights, + vertex_t const *p_major_labels, + vertex_t const *p_minor_labels, + vertex_t major_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + cudaStream_t stream) +{ + // FIXME: it might be possible to directly create relabled & coarsened edgelist from the + // compressed sparse format to save memory + + rmm::device_uvector edgelist_major_vertices(0, stream); + rmm::device_uvector edgelist_minor_vertices(0, stream); + rmm::device_uvector edgelist_weights(0, stream); + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + compressed_sparse_to_edgelist(compressed_sparse_offsets, + compressed_sparse_indices, + compressed_sparse_weights, + major_first, + major_last, + stream); + + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + thrust::transform( + rmm::exec_policy(stream)->on(stream), + pair_first, + pair_first + edgelist_major_vertices.size(), + pair_first, + [p_major_labels, p_minor_labels, major_first, minor_first] __device__(auto val) { + return thrust::make_tuple(p_major_labels[thrust::get<0>(val) - major_first], + p_minor_labels[thrust::get<1>(val) - minor_first]); + }); + + sort_and_coarsen_edgelist( + edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights, stream); + + return std::make_tuple(std::move(edgelist_major_vertices), + std::move(edgelist_minor_vertices), + std::move(edgelist_weights)); +} + +// multi-GPU version +template +std::enable_if_t< + multi_gpu, + std::tuple>, + rmm::device_uvector>> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels, + bool do_expensive_check) +{ + auto &comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); + auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + + if (do_expensive_check) { + // currently, nothing to do + } + + // 1. locally construct coarsened edge list + + // FIXME: we don't need adj_matrix_major_labels if we apply the same partitioning scheme + // regardless of hypergraph partitioning is applied or not + rmm::device_uvector adj_matrix_major_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_cols() + : graph_view.get_number_of_local_adj_matrix_partition_rows(), + handle.get_stream()); + rmm::device_uvector adj_matrix_minor_labels( + store_transposed ? graph_view.get_number_of_local_adj_matrix_partition_rows() + : graph_view.get_number_of_local_adj_matrix_partition_cols(), + handle.get_stream()); + if (store_transposed) { + copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_major_labels.data()); + copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_minor_labels.data()); + } else { + copy_to_adj_matrix_row(handle, graph_view, labels, adj_matrix_major_labels.data()); + copy_to_adj_matrix_col(handle, graph_view, labels, adj_matrix_minor_labels.data()); + } + + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + // FIXME: we may compare performance/memory footprint with the hash_based approach especially when + // cuco::dynamic_map becomes available (so we don't need to preallocate memory assuming the worst + // case). We may be able to limit the memory requirement close to the final coarsened edgelist + // with the hash based approach. + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + // get edge list + + rmm::device_uvector edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector edgelist_weights(0, handle.get_stream()); + std::tie(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights) = + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + graph_view.offsets(i), + graph_view.indices(i), + graph_view.weights(i), + adj_matrix_major_labels.begin() + + (store_transposed ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(i) + : graph_view.get_local_adj_matrix_partition_row_value_start_offset(i)), + adj_matrix_minor_labels.begin(), + store_transposed ? graph_view.get_local_adj_matrix_partition_col_first(i) + : graph_view.get_local_adj_matrix_partition_row_first(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_col_last(i) + : graph_view.get_local_adj_matrix_partition_row_last(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_row_first(i) + : graph_view.get_local_adj_matrix_partition_col_first(i), + store_transposed ? graph_view.get_local_adj_matrix_partition_row_last(i) + : graph_view.get_local_adj_matrix_partition_col_last(i), + handle.get_stream()); + + auto cur_size = coarsened_edgelist_major_vertices.size(); + // FIXME: this can lead to frequent costly reallocation; we may be able to avoid this if we can + // reserve address space to avoid expensive reallocation. + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management + coarsened_edgelist_major_vertices.resize(cur_size + edgelist_major_vertices.size(), + handle.get_stream()); + coarsened_edgelist_minor_vertices.resize(coarsened_edgelist_major_vertices.size(), + handle.get_stream()); + coarsened_edgelist_weights.resize( + graph_view.is_weighted() ? coarsened_edgelist_major_vertices.size() : 0, handle.get_stream()); + + if (graph_view.is_weighted()) { + auto src_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(edgelist_major_vertices.begin(), + edgelist_minor_vertices.begin(), + edgelist_weights.begin())); + auto dst_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin(), + coarsened_edgelist_weights.begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + edgelist_major_vertices.size(), + dst_edge_first); + } else { + auto src_edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices.begin(), edgelist_minor_vertices.begin())); + auto dst_edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin())) + + cur_size; + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + src_edge_first, + src_edge_first + edgelist_major_vertices.size(), + dst_edge_first); + } + } + + sort_and_coarsen_edgelist(coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights, + handle.get_stream()); + + // 2. globally shuffle edge list and re-coarsen + + { + auto edge_first = + thrust::make_zip_iterator(thrust::make_tuple(coarsened_edgelist_major_vertices.begin(), + coarsened_edgelist_minor_vertices.begin(), + coarsened_edgelist_weights.begin())); + rmm::device_uvector rx_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector rx_edgelist_weights(0, handle.get_stream()); + std::forward_as_tuple( + std::tie(rx_edgelist_major_vertices, rx_edgelist_minor_vertices, rx_edgelist_weights), + std::ignore) = + groupby_gpuid_and_shuffle_values( + handle.get_comms(), + edge_first, + edge_first + coarsened_edgelist_major_vertices.size(), + [key_func = + detail::compute_gpu_id_from_edge_t{graph_view.is_hypergraph_partitioned(), + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + + sort_and_coarsen_edgelist(rx_edgelist_major_vertices, + rx_edgelist_minor_vertices, + rx_edgelist_weights, + handle.get_stream()); + + coarsened_edgelist_major_vertices = std::move(rx_edgelist_major_vertices); + coarsened_edgelist_minor_vertices = std::move(rx_edgelist_minor_vertices); + coarsened_edgelist_weights = std::move(rx_edgelist_weights); + } + + // 3. find unique labels for this GPU + + rmm::device_uvector unique_labels(graph_view.get_number_of_local_vertices(), + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + unique_labels.size(), + unique_labels.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end()); + unique_labels.resize( + thrust::distance(unique_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end())), + handle.get_stream()); + + rmm::device_uvector rx_unique_labels(0, handle.get_stream()); + std::tie(rx_unique_labels, std::ignore) = groupby_gpuid_and_shuffle_values( + handle.get_comms(), + unique_labels.begin(), + unique_labels.end(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm.get_size()}] __device__( + auto val) { return key_func(val); }, + handle.get_stream()); + + unique_labels = std::move(rx_unique_labels); + + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end()); + unique_labels.resize( + thrust::distance(unique_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end())), + handle.get_stream()); + + // 4. renumber + + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + partition_t partition(std::vector(comm_size + 1, 0), + graph_view.is_hypergraph_partitioned(), + row_comm_size, + col_comm_size, + row_comm_rank, + col_comm_rank); + vertex_t number_of_vertices{}; + edge_t number_of_edges{}; + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + renumber_edgelist( + handle, + unique_labels.data(), + static_cast(unique_labels.size()), + coarsened_edgelist_major_vertices.data(), + coarsened_edgelist_minor_vertices.data(), + static_cast(coarsened_edgelist_major_vertices.size()), + graph_view.is_hypergraph_partitioned(), + do_expensive_check); + + // 5. build a graph + + std::vector> edgelists{}; + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + edgelists.resize(1); + edgelists[0].p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelists[0].p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelists[0].p_edge_weights = coarsened_edgelist_weights.data(); + edgelists[0].number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + } + + return std::make_tuple( + std::make_unique>( + handle, + edgelists, + partition, + number_of_vertices, + number_of_edges, + graph_properties_t{graph_view.is_symmetric(), false}, + true), + std::move(renumber_map_labels)); +} + +// single-GPU version +template +std::enable_if_t< + !multi_gpu, + std::tuple>, + rmm::device_uvector>> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels, + bool do_expensive_check) +{ + if (do_expensive_check) { + // currently, nothing to do + } + + rmm::device_uvector coarsened_edgelist_major_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_minor_vertices(0, handle.get_stream()); + rmm::device_uvector coarsened_edgelist_weights(0, handle.get_stream()); + std::tie(coarsened_edgelist_major_vertices, + coarsened_edgelist_minor_vertices, + coarsened_edgelist_weights) = + compressed_sparse_to_relabeled_and_sorted_and_coarsened_edgelist( + graph_view.offsets(), + graph_view.indices(), + graph_view.weights(), + labels, + labels, + vertex_t{0}, + graph_view.get_number_of_vertices(), + vertex_t{0}, + graph_view.get_number_of_vertices(), + handle.get_stream()); + + rmm::device_uvector unique_labels(graph_view.get_number_of_vertices(), + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + unique_labels.size(), + unique_labels.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end()); + unique_labels.resize( + thrust::distance(unique_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_labels.begin(), + unique_labels.end())), + handle.get_stream()); + + auto renumber_map_labels = renumber_edgelist( + handle, + unique_labels.data(), + static_cast(unique_labels.size()), + coarsened_edgelist_major_vertices.data(), + coarsened_edgelist_minor_vertices.data(), + static_cast(coarsened_edgelist_major_vertices.size()), + do_expensive_check); + + edgelist_t edgelist{}; + edgelist.p_src_vertices = store_transposed ? coarsened_edgelist_minor_vertices.data() + : coarsened_edgelist_major_vertices.data(); + edgelist.p_dst_vertices = store_transposed ? coarsened_edgelist_major_vertices.data() + : coarsened_edgelist_minor_vertices.data(); + edgelist.p_edge_weights = coarsened_edgelist_weights.data(); + edgelist.number_of_edges = static_cast(coarsened_edgelist_major_vertices.size()); + + return std::make_tuple( + std::make_unique>( + handle, + edgelist, + static_cast(renumber_map_labels.size()), + graph_properties_t{graph_view.is_symmetric(), false}, + true), + std::move(renumber_map_labels)); +} + +} // namespace detail + +template +std::tuple>, + rmm::device_uvector> +coarsen_graph( + raft::handle_t const &handle, + graph_view_t const &graph_view, + vertex_t const *labels, + bool do_expensive_check) +{ + return detail::coarsen_graph(handle, graph_view, labels, do_expensive_check); +} + +// explicit instantiation + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int32_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector> +coarsen_graph(raft::handle_t const &handle, + graph_view_t const &graph_view, + int64_t const *labels, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 3a2b7126d22..5cf393bfce4 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,8 +17,8 @@ #include #include #include -#include #include +#include #include #include @@ -62,25 +62,19 @@ struct out_of_range_t { template std:: tuple, rmm::device_uvector, rmm::device_uvector> - edge_list_to_compressed_sparse(raft::handle_t const &handle, - edgelist_t const &edgelist, - vertex_t major_first, - vertex_t major_last, - vertex_t minor_first, - vertex_t minor_last) + edgelist_to_compressed_sparse(edgelist_t const &edgelist, + vertex_t major_first, + vertex_t major_last, + vertex_t minor_first, + vertex_t minor_last, + cudaStream_t stream) { - rmm::device_uvector offsets((major_last - major_first) + 1, handle.get_stream()); - rmm::device_uvector indices(edgelist.number_of_edges, handle.get_stream()); + rmm::device_uvector offsets((major_last - major_first) + 1, stream); + rmm::device_uvector indices(edgelist.number_of_edges, stream); rmm::device_uvector weights( - edgelist.p_edge_weights != nullptr ? edgelist.number_of_edges : 0, handle.get_stream()); - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - offsets.begin(), - offsets.end(), - edge_t{0}); - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - indices.begin(), - indices.end(), - vertex_t{0}); + edgelist.p_edge_weights != nullptr ? edgelist.number_of_edges : 0, stream); + thrust::fill(rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), edge_t{0}); + thrust::fill(rmm::exec_policy(stream)->on(stream), indices.begin(), indices.end(), vertex_t{0}); // FIXME: need to performance test this code with R-mat graphs having highly-skewed degree // distribution. If there is a small number of vertices with very large degrees, atomicAdd can @@ -98,7 +92,7 @@ std:: auto p_weights = edgelist.p_edge_weights != nullptr ? weights.data() : static_cast(nullptr); - thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::for_each(rmm::exec_policy(stream)->on(stream), store_transposed ? edgelist.p_dst_vertices : edgelist.p_src_vertices, store_transposed ? edgelist.p_dst_vertices + edgelist.number_of_edges : edgelist.p_src_vertices + edgelist.number_of_edges, @@ -106,15 +100,13 @@ std:: atomicAdd(p_offsets + (v - major_first), edge_t{1}); }); - thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - offsets.begin(), - offsets.end(), - offsets.begin()); + thrust::exclusive_scan( + rmm::exec_policy(stream)->on(stream), offsets.begin(), offsets.end(), offsets.begin()); if (edgelist.p_edge_weights != nullptr) { auto edge_first = thrust::make_zip_iterator(thrust::make_tuple( edgelist.p_src_vertices, edgelist.p_dst_vertices, edgelist.p_edge_weights)); - thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::for_each(rmm::exec_policy(stream)->on(stream), edge_first, edge_first + edgelist.number_of_edges, [p_offsets, p_indices, p_weights, major_first] __device__(auto e) { @@ -137,7 +129,7 @@ std:: } else { auto edge_first = thrust::make_zip_iterator( thrust::make_tuple(edgelist.p_src_vertices, edgelist.p_dst_vertices)); - thrust::for_each(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::for_each(rmm::exec_policy(stream)->on(stream), edge_first, edge_first + edgelist.number_of_edges, [p_offsets, p_indices, p_weights, major_first] __device__(auto e) { @@ -162,42 +154,6 @@ std:: return std::make_tuple(std::move(offsets), std::move(indices), std::move(weights)); } -template -std::vector segment_degree_sorted_vertex_partition(raft::handle_t const &handle, - DegreeIterator degree_first, - DegreeIterator degree_last, - ThresholdIterator threshold_first, - ThresholdIterator threshold_last) -{ - auto num_elements = thrust::distance(degree_first, degree_last); - auto num_segments = thrust::distance(threshold_first, threshold_last) + 1; - - std::vector h_segment_offsets(num_segments + 1); - h_segment_offsets[0] = 0; - h_segment_offsets.back() = num_elements; - - rmm::device_uvector d_segment_offsets(num_segments - 1, handle.get_stream()); - - thrust::upper_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - degree_first, - degree_last, - threshold_first, - threshold_last, - d_segment_offsets.begin()); - - raft::update_host(h_segment_offsets.begin() + 1, - d_segment_offsets.begin(), - d_segment_offsets.size(), - handle.get_stream()); - - CUDA_TRY(cudaStreamSynchronize( - handle.get_stream())); // this is necessary as d_segment_offsets will become out-of-scope once - // this function returns and this function returns a host variable which - // can be used right after return. - - return h_segment_offsets; -} - } // namespace template get_handle_ptr()->get_stream(); CUGRAPH_EXPECTS(edgelists.size() > 0, - "Invalid API parameter: edgelists.size() should be non-zero."); + "Invalid input argument: edgelists.size() should be non-zero."); bool is_weighted = edgelists[0].p_edge_weights != nullptr; @@ -246,14 +202,14 @@ graph_t(col_comm_size))) || (!(partition.is_hypergraph_partitioned()) && (edgelists.size() == 1)), - "Invalid API parameter: errneous edgelists.size()."); + "Invalid input argument: errneous edgelists.size()."); // optional expensive checks (part 1/3) @@ -278,17 +234,17 @@ graph_t{ major_first, major_last, minor_first, minor_last}) == 0, - "Invalid API parameter: edgelists[] have out-of-range values."); + "Invalid input argument: edgelists[] have out-of-range values."); } number_of_local_edges_sum = host_scalar_allreduce(comm, number_of_local_edges_sum, default_stream); - CUGRAPH_EXPECTS(number_of_local_edges_sum == this->get_number_of_edges(), - "Invalid API parameter: the sum of local edges doe counts not match with " - "number_of_local_edges."); + CUGRAPH_EXPECTS( + number_of_local_edges_sum == this->get_number_of_edges(), + "Invalid input argument: the sum of local edge counts does not match with number_of_edges."); CUGRAPH_EXPECTS( partition.get_vertex_partition_last(comm_size - 1) == number_of_vertices, - "Invalid API parameter: vertex partition should cover [0, number_of_vertices)."); + "Invalid input argument: vertex partition should cover [0, number_of_vertices)."); } // convert edge list (COO) to compressed sparse format (CSR or CSC) @@ -307,8 +263,13 @@ graph_t offsets(0, default_stream); rmm::device_uvector indices(0, default_stream); rmm::device_uvector weights(0, default_stream); - std::tie(offsets, indices, weights) = edge_list_to_compressed_sparse( - *(this->get_handle_ptr()), edgelists[i], major_first, major_last, minor_first, minor_last); + std::tie(offsets, indices, weights) = + edgelist_to_compressed_sparse(edgelists[i], + major_first, + major_last, + minor_first, + minor_last, + this->get_handle_ptr()->get_stream()); adj_matrix_partition_offsets_.push_back(std::move(offsets)); adj_matrix_partition_indices_.push_back(std::move(indices)); if (is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } @@ -327,7 +288,7 @@ graph_t{}), - "Invalid API parameter: sorted_by_global_degree_within_vertex_partition is " + "Invalid input argument: sorted_by_global_degree_within_vertex_partition is " "set to true, but degrees are not non-ascending."); } @@ -427,7 +388,7 @@ graph_t{ 0, this->get_number_of_vertices(), 0, this->get_number_of_vertices()}) == 0, - "Invalid API parameter: edgelist have out-of-range values."); + "Invalid input argument: edgelist have out-of-range values."); // FIXME: check for symmetricity may better be implemetned with transpose(). if (this->is_symmetric()) {} @@ -455,12 +416,12 @@ graph_t(*(this->get_handle_ptr()), - edgelist, - vertex_t{0}, - this->get_number_of_vertices(), - vertex_t{0}, - this->get_number_of_vertices()); + edgelist_to_compressed_sparse(edgelist, + vertex_t{0}, + this->get_number_of_vertices(), + vertex_t{0}, + this->get_number_of_vertices(), + this->get_handle_ptr()->get_stream()); // update degree-based segment offsets (to be used for graph analytics kernel optimization) @@ -472,12 +433,13 @@ graph_ton(default_stream), - degree_first, - degree_first + this->get_number_of_vertices(), - thrust::greater{}), - "Invalid API parameter: sorted_by_degree is set to true, but degrees are not " - "non-ascending."); + CUGRAPH_EXPECTS( + thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), + degree_first, + degree_first + this->get_number_of_vertices(), + thrust::greater{}), + "Invalid input argument: sorted_by_degree is set to true, but degrees are not " + "non-ascending."); } static_assert(detail::num_segments_per_vertex_partition == 3); @@ -508,9 +470,7 @@ graph_t #include #include -#include #include +#include #include #include @@ -49,6 +49,27 @@ struct out_of_range_t { __device__ bool operator()(vertex_t v) { return (v < min) || (v >= max); } }; +template +std::vector update_adj_matrix_partition_edge_counts( + std::vector const& adj_matrix_partition_offsets, + partition_t const& partition, + cudaStream_t stream) +{ + std::vector adj_matrix_partition_edge_counts(partition.get_number_of_matrix_partitions(), + 0); + for (size_t i = 0; i < adj_matrix_partition_offsets.size(); ++i) { + vertex_t major_first{}; + vertex_t major_last{}; + std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(i); + raft::update_host(&(adj_matrix_partition_edge_counts[i]), + adj_matrix_partition_offsets[i] + (major_last - major_first), + 1, + stream); + } + CUDA_TRY(cudaStreamSynchronize(stream)); + return adj_matrix_partition_edge_counts; +} + } // namespace template (row_comm_size))) || (!(partition.is_hypergraph_partitioned()) && (adj_matrix_partition_offsets.size() == 1)), - "Invalid API parameter: errneous adj_matrix_partition_offsets.size()."); + "Internal Error: erroneous adj_matrix_partition_offsets.size()."); CUGRAPH_EXPECTS((sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == @@ -106,7 +129,7 @@ graph_view_ton(default_stream), adj_matrix_partition_offsets[i], adj_matrix_partition_offsets[i] + (major_last - major_first + 1)), - "Invalid API parameter: adj_matrix_partition_offsets[] is not sorted."); + "Internal Error: adj_matrix_partition_offsets[] is not sorted."); edge_t number_of_local_edges{}; raft::update_host(&number_of_local_edges, adj_matrix_partition_offsets[i] + (major_last - major_first), @@ -148,22 +171,23 @@ graph_view_t{minor_first, minor_last}) == 0, - "Invalid API parameter: adj_matrix_partition_indices[] have out-of-range vertex IDs."); + "Internal Error: adj_matrix_partition_indices[] have out-of-range vertex IDs."); } number_of_local_edges_sum = host_scalar_allreduce( this->get_handle_ptr()->get_comms(), number_of_local_edges_sum, default_stream); CUGRAPH_EXPECTS(number_of_local_edges_sum == this->get_number_of_edges(), - "Invalid API parameter: the sum of local edges doe counts not match with " + "Internal Error: the sum of local edges counts does not match with " "number_of_local_edges."); if (sorted_by_global_degree_within_vertex_partition) { auto degrees = detail::compute_major_degree(handle, adj_matrix_partition_offsets, partition); - CUGRAPH_EXPECTS(thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), - degrees.begin(), - degrees.end(), - thrust::greater{}), - "Invalid API parameter: sorted_by_global_degree_within_vertex_partition is " - "set to true, but degrees are not non-ascending."); + CUGRAPH_EXPECTS( + thrust::is_sorted(rmm::exec_policy(default_stream)->on(default_stream), + degrees.begin(), + degrees.end(), + thrust::greater{}), + "Invalid Invalid input argument: sorted_by_global_degree_within_vertex_partition is " + "set to true, but degrees are not non-ascending."); for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); ++i) { @@ -171,11 +195,11 @@ graph_view_tis_symmetric()) {} @@ -229,11 +252,10 @@ graph_view_ton(default_stream), offsets, offsets + (this->get_number_of_vertices() + 1)), - "Invalid API parameter: offsets is not sorted."); + "Internal Error: offsets is not sorted."); // better use thrust::any_of once https://github.com/thrust/thrust/issues/1016 is resolved CUGRAPH_EXPECTS( @@ -251,7 +273,7 @@ graph_view_tget_number_of_edges(), out_of_range_t{0, this->get_number_of_vertices()}) == 0, - "Invalid API parameter: adj_matrix_partition_indices[] have out-of-range vertex IDs."); + "Internal Error: adj_matrix_partition_indices[] have out-of-range vertex IDs."); if (sorted_by_degree) { auto degree_first = @@ -261,14 +283,14 @@ graph_view_tget_number_of_vertices(), thrust::greater{}), - "Invalid API parameter: sorted_by_degree is set to true, but degrees are not " - "non-ascending."); + "Internal Error: sorted_by_degree is set to true, but degrees are not " + "in ascending order."); CUGRAPH_EXPECTS(std::is_sorted(segment_offsets.begin(), segment_offsets.end()), - "Invalid API parameter: erroneous segment_offsets."); - CUGRAPH_EXPECTS(segment_offsets[0] == 0, "Invalid API parameter: segment_offsets."); + "Internal Error: erroneous segment_offsets."); + CUGRAPH_EXPECTS(segment_offsets[0] == 0, "Invalid input argument segment_offsets."); CUGRAPH_EXPECTS(segment_offsets.back() == this->get_number_of_vertices(), - "Invalid API parameter: segment_offsets."); + "Invalid input argument: segment_offsets."); } // FIXME: check for symmetricity may better be implemetned with transpose(). diff --git a/cpp/src/experimental/include_cuco_static_map.cuh b/cpp/src/experimental/include_cuco_static_map.cuh new file mode 100644 index 00000000000..9e54acef72c --- /dev/null +++ b/cpp/src/experimental/include_cuco_static_map.cuh @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +// "FIXME": remove the guards below and references to CUCO_STATIC_MAP_DEFINED +// +// cuco/static_map.cuh depends on features not supported on or before Pascal. +// +// If we build for sm_60 or before, the inclusion of cuco/static_map.cuh wil +// result in compilation errors. +// +// If we're Pascal or before we do nothing here and will suppress including +// some code below. If we are later than Pascal we define CUCO_STATIC_MAP_DEFINED +// which will result in the full implementation being pulled in. +// +#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 +#else +#define CUCO_STATIC_MAP_DEFINED +#include +#endif diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu new file mode 100644 index 00000000000..a88adf76ef4 --- /dev/null +++ b/cpp/src/experimental/induced_subgraph.cu @@ -0,0 +1,390 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +namespace cugraph { +namespace experimental { + +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs( + raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets /* size == num_subgraphs + 1 */, + vertex_t const *subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */, + size_t num_subgraphs, + bool do_expensive_check) +{ + // FIXME: this code is inefficient for the vertices with their local degrees much larger than the + // number of vertices in the subgraphs (in this case, searching that the subgraph vertices are + // included in the local neighbors is more efficient than searching the local neighbors are + // included in the subgraph vertices). We may later add additional code to handle such cases. + // FIXME: we may consider the performance (speed & memory footprint, hash based approach uses + // extra-memory) of hash table based and binary search based approaches + + // 1. check input arguments + + if (do_expensive_check) { + size_t should_be_zero{std::numeric_limits::max()}; + size_t num_aggregate_subgraph_vertices{}; + raft::update_host(&should_be_zero, subgraph_offsets, 1, handle.get_stream()); + raft::update_host( + &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + CUGRAPH_EXPECTS(should_be_zero == 0, + "Invalid input argument: subgraph_offsets[0] should be 0."); + + CUGRAPH_EXPECTS( + thrust::is_sorted(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_offsets, + subgraph_offsets + (num_subgraphs + 1)), + "Invalid input argument: subgraph_offsets is not sorted."); + vertex_partition_device_t> + vertex_partition(graph_view); + CUGRAPH_EXPECTS(thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_vertices, + subgraph_vertices + num_aggregate_subgraph_vertices, + [vertex_partition] __device__(auto v) { + return !vertex_partition.is_valid_vertex(v) || + !vertex_partition.is_local_vertex_nocheck(v); + }) == 0, + "Invalid input argument: subgraph_vertices has invalid vertex IDs."); + + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_subgraphs), + [subgraph_offsets, subgraph_vertices] __device__(auto i) { + // vertices are sorted and unique + return !thrust::is_sorted(thrust::seq, + subgraph_vertices + subgraph_offsets[i], + subgraph_vertices + subgraph_offsets[i + 1]) || + (thrust::count_if( + thrust::seq, + thrust::make_counting_iterator(subgraph_offsets[i]), + thrust::make_counting_iterator(subgraph_offsets[i + 1]), + [subgraph_vertices, last = subgraph_offsets[i + 1] - 1] __device__(auto i) { + return (i != last) && (subgraph_vertices[i] == subgraph_vertices[i + 1]); + }) != 0); + }) == 0, + "Invalid input argument: subgraph_vertices for each subgraph idx should be sorted in " + "ascending order and unique."); + } + + // 2. extract induced subgraphs + + if (multi_gpu) { + CUGRAPH_FAIL("Unimplemented."); + return std::make_tuple(rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream()), + rmm::device_uvector(0, handle.get_stream())); + } else { + // 2-1. Phase 1: calculate memory requirements + + size_t num_aggregate_subgraph_vertices{}; + raft::update_host( + &num_aggregate_subgraph_vertices, subgraph_offsets + num_subgraphs, 1, handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + rmm::device_uvector subgraph_vertex_output_offsets( + num_aggregate_subgraph_vertices + 1, + handle.get_stream()); // for each element of subgraph_vertices + + matrix_partition_device_t> + matrix_partition(graph_view, 0); + // count the numbers of the induced subgraph edges for each vertex in the aggregate subgraph + // vertex list. + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_aggregate_subgraph_vertices), + subgraph_vertex_output_offsets.begin(), + [subgraph_offsets, subgraph_vertices, num_subgraphs, matrix_partition] __device__(auto i) { + auto subgraph_idx = thrust::distance( + subgraph_offsets + 1, + thrust::upper_bound(thrust::seq, subgraph_offsets, subgraph_offsets + num_subgraphs, i)); + vertex_t const *indices{nullptr}; + weight_t const *weights{nullptr}; + edge_t local_degree{}; + auto major_offset = + matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(major_offset); + // FIXME: this is inefficient for high local degree vertices + return thrust::count_if( + thrust::seq, + indices, + indices + local_degree, + [vertex_first = subgraph_vertices + subgraph_offsets[subgraph_idx], + vertex_last = + subgraph_vertices + subgraph_offsets[subgraph_idx + 1]] __device__(auto nbr) { + return thrust::binary_search(thrust::seq, vertex_first, vertex_last, nbr); + }); + }); + thrust::exclusive_scan(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_vertex_output_offsets.begin(), + subgraph_vertex_output_offsets.end(), + subgraph_vertex_output_offsets.begin()); + + size_t num_aggregate_edges{}; + raft::update_host(&num_aggregate_edges, + subgraph_vertex_output_offsets.data() + num_aggregate_subgraph_vertices, + 1, + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + // 2-2. Phase 2: find the edges in the induced subgraphs + + rmm::device_uvector edge_majors(num_aggregate_edges, handle.get_stream()); + rmm::device_uvector edge_minors(num_aggregate_edges, handle.get_stream()); + rmm::device_uvector edge_weights( + graph_view.is_weighted() ? num_aggregate_edges : size_t{0}, handle.get_stream()); + + // fill the edge list buffer (to be returned) for each vetex in the aggregate subgraph vertex + // list (use the offsets computed in the Phase 1) + thrust::for_each( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_aggregate_subgraph_vertices), + [subgraph_offsets, + subgraph_vertices, + num_subgraphs, + matrix_partition, + subgraph_vertex_output_offsets = subgraph_vertex_output_offsets.data(), + edge_majors = edge_majors.data(), + edge_minors = edge_minors.data(), + edge_weights = edge_weights.data()] __device__(auto i) { + auto subgraph_idx = thrust::distance( + subgraph_offsets + 1, + thrust::upper_bound( + thrust::seq, subgraph_offsets, subgraph_offsets + num_subgraphs, size_t{i})); + vertex_t const *indices{nullptr}; + weight_t const *weights{nullptr}; + edge_t local_degree{}; + auto major_offset = + matrix_partition.get_major_offset_from_major_nocheck(subgraph_vertices[i]); + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(major_offset); + if (weights != nullptr) { + auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( + thrust::make_constant_iterator(subgraph_vertices[i]), indices, weights)); + // FIXME: this is inefficient for high local degree vertices + thrust::copy_if( + thrust::seq, + triplet_first, + triplet_first + local_degree, + thrust::make_zip_iterator(thrust::make_tuple(edge_majors, edge_minors, edge_weights)) + + subgraph_vertex_output_offsets[i], + [vertex_first = subgraph_vertices + subgraph_offsets[subgraph_idx], + vertex_last = + subgraph_vertices + subgraph_offsets[subgraph_idx + 1]] __device__(auto t) { + return thrust::binary_search( + thrust::seq, vertex_first, vertex_last, thrust::get<1>(t)); + }); + } else { + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(thrust::make_constant_iterator(subgraph_vertices[i]), indices)); + // FIXME: this is inefficient for high local degree vertices + thrust::copy_if(thrust::seq, + pair_first, + pair_first + local_degree, + thrust::make_zip_iterator(thrust::make_tuple(edge_majors, edge_minors)) + + subgraph_vertex_output_offsets[i], + [vertex_first = subgraph_vertices + subgraph_offsets[subgraph_idx], + vertex_last = subgraph_vertices + + subgraph_offsets[subgraph_idx + 1]] __device__(auto t) { + return thrust::binary_search( + thrust::seq, vertex_first, vertex_last, thrust::get<1>(t)); + }); + } + }); + + rmm::device_uvector subgraph_edge_offsets(num_subgraphs + 1, handle.get_stream()); + thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + subgraph_offsets, + subgraph_offsets + (num_subgraphs + 1), + subgraph_vertex_output_offsets.begin(), + subgraph_edge_offsets.begin()); + + return std::make_tuple(std::move(edge_majors), + std::move(edge_minors), + std::move(edge_weights), + std::move(subgraph_edge_offsets)); + } +} + +// explicit instantiation + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int32_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int64_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int64_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int64_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + rmm::device_uvector> +extract_induced_subgraphs(raft::handle_t const &handle, + graph_view_t const &graph_view, + size_t const *subgraph_offsets, + int64_t const *subgraph_vertices, + size_t num_subgraphs, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 587011da817..1ab824f1c91 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -166,8 +166,6 @@ void katz_centrality(raft::handle_t const &handle, katz_centralities, [l2_norm] __device__(auto val) { return val / l2_norm; }); } - - return; } } // namespace detail diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index 08e52092362..f162cd17a61 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,32 +22,21 @@ #include #include #include -#include #include #include +#include #include #include #include #include -// "FIXME": remove the guards below and references to CUCO_STATIC_MAP_DEFINED -// -// cuco/static_map.cuh depends on features not supported on or before Pascal. -// -// If we build for sm_60 or before, the inclusion of cuco/static_map.cuh wil -// result in compilation errors. -// -// If we're Pascal or before we do nothing here and will suppress including -// some code below. If we are later than Pascal we define CUCO_STATIC_MAP_DEFINED -// which will result in the full implementation being pulled in. -// -#if defined(__CUDA_ARCH__) && __CUDA_ARCH__ < 700 -#else -#define CUCO_STATIC_MAP_DEFINED -#include -#endif +#include + +#include + +#include //#define TIMING @@ -390,9 +379,9 @@ create_graph(raft::handle_t const &handle, // as above would allow us to eventually run the single GPU version of single level Louvain // on the contracted graphs - which should be more efficient. // -// FIXME: We should return the dendogram and let the python layer clean it up (or have a -// separate C++ function to flatten the dendogram). There are customers that might -// like the dendogram and the implementation would be a bit cleaner if we did the +// FIXME: We should return the dendrogram and let the python layer clean it up (or have a +// separate C++ function to flatten the dendrogram). There are customers that might +// like the dendrogram and the implementation would be a bit cleaner if we did the // collapsing as a separate step // template @@ -414,6 +403,7 @@ class Louvain { hr_timer_(), #endif handle_(handle), + dendrogram_(std::make_unique>()), current_graph_view_(graph_view), compute_partition_(graph_view), local_num_vertices_(graph_view.get_number_of_local_vertices()), @@ -422,7 +412,6 @@ class Louvain { local_num_edges_(graph_view.get_number_of_edges()), vertex_weights_v_(graph_view.get_number_of_local_vertices()), cluster_weights_v_(graph_view.get_number_of_local_vertices()), - cluster_v_(graph_view.get_number_of_local_vertices()), number_of_vertices_(graph_view.get_number_of_local_vertices()), stream_(handle.get_stream()) { @@ -432,11 +421,16 @@ class Louvain { base_src_vertex_id_ = graph_view.get_local_adj_matrix_partition_row_first(0); base_dst_vertex_id_ = graph_view.get_local_adj_matrix_partition_col_first(0); - raft::copy(&local_num_edges_, - graph_view.offsets() + graph_view.get_local_adj_matrix_partition_row_last(0) - - graph_view.get_local_adj_matrix_partition_row_first(0), - 1, - stream_); + local_num_edges_ = thrust::transform_reduce( + thrust::host, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator( + graph_view.get_number_of_local_adj_matrix_partitions()), + [&graph_view](auto indx) { + return graph_view.get_number_of_local_adj_matrix_partition_edges(indx); + }, + size_t{0}, + thrust::plus()); CUDA_TRY(cudaStreamSynchronize(stream_)); } @@ -456,11 +450,12 @@ class Louvain { } } - virtual std::pair operator()(vertex_t *d_cluster_vec, - size_t max_level, - weight_t resolution) + Dendrogram &get_dendrogram() const { return *dendrogram_; } + + std::unique_ptr> move_dendrogram() { return dendrogram_; } + + virtual weight_t operator()(size_t max_level, weight_t resolution) { - size_t num_level{0}; weight_t best_modularity = weight_t{-1}; #ifdef CUCO_STATIC_MAP_DEFINED @@ -473,17 +468,12 @@ class Louvain { [] __device__(auto, auto, weight_t wt, auto, auto) { return wt; }, weight_t{0}); - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), - cluster_v_.begin(), - cluster_v_.end(), - base_vertex_id_); - thrust::copy( - rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end(), d_cluster_vec); + while (dendrogram_->num_levels() < max_level) { + // + // Initialize every cluster to reference each vertex to itself + // + initialize_dendrogram_level(current_graph_view_.get_number_of_local_vertices()); - while (num_level < max_level) { compute_vertex_and_cluster_weights(); weight_t new_Q = update_clustering(total_edge_weight, resolution); @@ -492,15 +482,13 @@ class Louvain { best_modularity = new_Q; - shrink_graph(d_cluster_vec); - - num_level++; + shrink_graph(); } timer_display(std::cout); #endif - return std::make_pair(num_level, best_modularity); + return best_modularity; } protected: @@ -528,6 +516,17 @@ class Louvain { #endif } + protected: + void initialize_dendrogram_level(vertex_t num_vertices) + { + dendrogram_->add_level(num_vertices); + + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + dendrogram_->current_level_begin(), + dendrogram_->current_level_end(), + base_vertex_id_); + } + public: weight_t modularity(weight_t total_edge_weight, weight_t resolution) { @@ -577,23 +576,16 @@ class Louvain { cluster_weights_v_.begin()); cache_vertex_properties( - vertex_weights_v_, src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); + vertex_weights_v_.begin(), src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); cache_vertex_properties( - cluster_weights_v_, src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); timer_stop(stream_); } - // - // FIXME: Consider returning d_src_cache and d_dst_cache - // (as a pair). This would be a nice optimization - // for single GPU, as we wouldn't need to make 3 copies - // of the data, could return a pair of device pointers to - // local_input_v. - // - template - void cache_vertex_properties(rmm::device_vector const &local_input_v, + template + void cache_vertex_properties(iterator_t const &local_input_iterator, rmm::device_vector &src_cache_v, rmm::device_vector &dst_cache_v, bool src = true, @@ -602,13 +594,13 @@ class Louvain { if (src) { src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows()); copy_to_adj_matrix_row( - handle_, current_graph_view_, local_input_v.begin(), src_cache_v.begin()); + handle_, current_graph_view_, local_input_iterator, src_cache_v.begin()); } if (dst) { dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols()); copy_to_adj_matrix_col( - handle_, current_graph_view_, local_input_v.begin(), dst_cache_v.begin()); + handle_, current_graph_view_, local_input_iterator, dst_cache_v.begin()); } } @@ -617,9 +609,10 @@ class Louvain { { timer_start("update_clustering"); - rmm::device_vector next_cluster_v(cluster_v_); + rmm::device_vector next_cluster_v(dendrogram_->current_level_begin(), + dendrogram_->current_level_end()); - cache_vertex_properties(next_cluster_v, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); weight_t new_Q = modularity(total_edge_weight, resolution); weight_t cur_Q = new_Q - 1; @@ -636,7 +629,7 @@ class Louvain { up_down = !up_down; - cache_vertex_properties(next_cluster_v, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties(next_cluster_v.begin(), src_cluster_cache_v_, dst_cluster_cache_v_); new_Q = modularity(total_edge_weight, resolution); @@ -644,12 +637,13 @@ class Louvain { thrust::copy(rmm::exec_policy(stream_)->on(stream_), next_cluster_v.begin(), next_cluster_v.end(), - cluster_v_.begin()); + dendrogram_->current_level_begin()); } } // cache the final clustering locally on each cpu - cache_vertex_properties(cluster_v_, src_cluster_cache_v_, dst_cluster_cache_v_); + cache_vertex_properties( + dendrogram_->current_level_begin(), src_cluster_cache_v_, dst_cluster_cache_v_); timer_stop(stream_); return cur_Q; @@ -678,7 +672,7 @@ class Louvain { old_cluster_sum_v.begin()); cache_vertex_properties( - old_cluster_sum_v, src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); + old_cluster_sum_v.begin(), src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); detail::src_cluster_equality_comparator_t compare( src_indices_v_.data().get(), @@ -1134,7 +1128,7 @@ class Louvain { }); cache_vertex_properties( - cluster_weights_v_, src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + cluster_weights_v_.begin(), src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); } template @@ -1219,475 +1213,63 @@ class Louvain { return std::make_pair(relevant_edges_v, relevant_edge_weights_v); } +#endif - void shrink_graph(vertex_t *d_cluster_vec) + void shrink_graph() { timer_start("shrinking graph"); - std::size_t capacity{static_cast((local_num_rows_ + local_num_cols_) / 0.7)}; - - cuco::static_map hash_map( - capacity, std::numeric_limits::max(), std::numeric_limits::max()); - - // renumber the clusters to the range 0..(num_clusters-1) - vertex_t num_clusters = renumber_clusters(hash_map); - - renumber_result(hash_map, d_cluster_vec, num_clusters); - - // shrink our graph to represent the graph of supervertices - generate_supervertices_graph(hash_map, num_clusters); - - // assign each new vertex to its own cluster - // MNMG: This can be done locally with no communication required - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), - cluster_v_.begin(), - cluster_v_.end(), - base_vertex_id_); - - timer_stop(stream_); - } + rmm::device_uvector numbering_map(0, stream_); - vertex_t renumber_clusters(cuco::static_map &hash_map) - { - rmm::device_vector cluster_inverse_v(local_num_vertices_, vertex_t{0}); - - // - // FIXME: Faster to iterate from graph_.get_vertex_partition_first() - // to graph_.get_vertex_partition_last()? That would potentially - // result in adding a cluster that isn't used on this GPU, - // although I don't think it would break the result in any way. - // - // This would also eliminate this use of src_indices_v_. - // - auto it_src = thrust::make_transform_iterator( - src_indices_v_.begin(), - [base_src_vertex_id = base_src_vertex_id_, - d_src_cluster_cache = src_cluster_cache_v_.data().get()] __device__(auto idx) { - return detail::create_cuco_pair_t()( - d_src_cluster_cache[idx - base_src_vertex_id]); - }); - - auto it_dst = thrust::make_transform_iterator( - current_graph_view_.indices(), - [base_dst_vertex_id = base_dst_vertex_id_, - d_dst_cluster_cache = dst_cluster_cache_v_.data().get()] __device__(auto idx) { - return detail::create_cuco_pair_t()( - d_dst_cluster_cache[idx - base_dst_vertex_id]); - }); - - hash_map.insert(it_src, it_src + local_num_edges_); - hash_map.insert(it_dst, it_dst + local_num_edges_); - - // Now I need to get the keys into an array and shuffle them - rmm::device_vector used_cluster_ids_v(hash_map.get_size()); - - auto transform_iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [d_hash_map = hash_map.get_device_view()] __device__(std::size_t idx) { - return d_hash_map.begin_slot()[idx].first.load(); - }); - - used_cluster_ids_v = detail::remove_elements_from_vector( - used_cluster_ids_v, - transform_iter, - transform_iter + hash_map.get_capacity(), - [vmax = std::numeric_limits::max()] __device__(vertex_t cluster) { - return cluster != vmax; - }, - stream_); - - auto partition_cluster_ids_iter = thrust::make_transform_iterator( - used_cluster_ids_v.begin(), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - }); - - rmm::device_vector original_gpus_v; - rmm::device_vector my_cluster_ids_v = - variable_shuffle( - handle_, used_cluster_ids_v.size(), used_cluster_ids_v.begin(), partition_cluster_ids_iter); - - if (graph_view_t::is_multi_gpu) { - original_gpus_v = variable_shuffle( - handle_, - used_cluster_ids_v.size(), - thrust::make_constant_iterator(rank_), - partition_cluster_ids_iter); - } - - // - // Now my_cluster_ids contains the cluster ids that this gpu is - // responsible for. I'm going to set cluster_inverse_v to one - // for each cluster in this list. - // - thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), - my_cluster_ids_v.begin(), - my_cluster_ids_v.end(), - [base_vertex_id = base_vertex_id_, - d_cluster_inverse = cluster_inverse_v.data().get()] __device__(vertex_t cluster) { - d_cluster_inverse[cluster - base_vertex_id] = 1; - }); - - rmm::device_vector my_cluster_ids_deduped_v = detail::remove_elements_from_vector( - my_cluster_ids_v, - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(cluster_inverse_v.size()), - [d_cluster_inverse = cluster_inverse_v.data().get()] __device__(auto idx) { - return d_cluster_inverse[idx] == 1; - }, - stream_); - - // - // Need to gather everything to be able to compute base addresses - // - vertex_t base_address{0}; - - if (graph_view_t::is_multi_gpu) { - int num_gpus{1}; - rmm::device_vector sizes_v(num_gpus + 1, my_cluster_ids_deduped_v.size()); - - handle_.get_comms().allgather( - sizes_v.data().get() + num_gpus, sizes_v.data().get(), num_gpus, stream_); - - base_address = thrust::reduce(rmm::exec_policy(stream_)->on(stream_), - sizes_v.begin(), - sizes_v.begin() + rank_, - vertex_t{0}); - } - - // - // Now let's update cluster_inverse_v to contain - // the mapping of old cluster id to new vertex id - // - thrust::fill( - cluster_inverse_v.begin(), cluster_inverse_v.end(), std::numeric_limits::max()); - - thrust::for_each_n(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - my_cluster_ids_deduped_v.size(), - [base_address, - d_my_cluster_ids_deduped = my_cluster_ids_deduped_v.data().get(), - d_cluster_inverse = cluster_inverse_v.data().get()] __device__(auto idx) { - d_cluster_inverse[d_my_cluster_ids_deduped[idx]] = idx + base_address; - }); - - // - // Now I need to shuffle back to original gpus the - // subset of my mapping that is required - // - rmm::device_vector new_vertex_ids_v = - variable_shuffle( - handle_, - my_cluster_ids_v.size(), - thrust::make_transform_iterator(my_cluster_ids_v.begin(), - [d_cluster_inverse = cluster_inverse_v.data().get(), - base_vertex_id = base_vertex_id_] __device__(auto v) { - return d_cluster_inverse[v - base_vertex_id]; - }), - original_gpus_v.begin()); - - if (graph_view_t::is_multi_gpu) { - my_cluster_ids_v = variable_shuffle( - handle_, my_cluster_ids_v.size(), my_cluster_ids_v.begin(), original_gpus_v.begin()); - } - - // - // Now update the hash map with the new vertex id - // - thrust::for_each_n(rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(my_cluster_ids_v.begin(), new_vertex_ids_v.begin())), - my_cluster_ids_v.size(), - [d_hash_map = hash_map.get_device_view()] __device__(auto p) mutable { - auto pos = d_hash_map.find(thrust::get<0>(p)); - pos->second.store(thrust::get<1>(p)); - }); - - // - // At this point we have a renumbered COO that is - // improperly distributed around the cluster, which - // will be fixed by generate_supervertices_graph - // - if (graph_t::is_multi_gpu) { - return host_scalar_allreduce( - handle_.get_comms(), static_cast(my_cluster_ids_deduped_v.size()), stream_); - } else { - return static_cast(my_cluster_ids_deduped_v.size()); - } - } - - void renumber_result(cuco::static_map const &hash_map, - vertex_t *d_cluster_vec, - vertex_t num_clusters) - { - if (graph_view_t::is_multi_gpu) { - // - // FIXME: Perhaps there's a general purpose function hidden here... - // Given a set of vertex_t values, and a distributed set of - // vertex properties, go to the proper node and retrieve - // the vertex properties and return them to this gpu. - // - std::size_t capacity{static_cast((local_num_vertices_) / 0.7)}; - cuco::static_map result_hash_map( - capacity, std::numeric_limits::max(), std::numeric_limits::max()); - - auto cluster_iter = thrust::make_transform_iterator(d_cluster_vec, [] __device__(vertex_t c) { - return detail::create_cuco_pair_t()(c); - }); - - result_hash_map.insert(cluster_iter, cluster_iter + local_num_vertices_); - - rmm::device_vector used_cluster_ids_v(result_hash_map.get_size()); - - auto transform_iter = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), - [d_result_hash_map = result_hash_map.get_device_view()] __device__(std::size_t idx) { - return d_result_hash_map.begin_slot()[idx].first.load(); - }); - - used_cluster_ids_v = detail::remove_elements_from_vector( - used_cluster_ids_v, - transform_iter, - transform_iter + result_hash_map.get_capacity(), - [vmax = std::numeric_limits::max()] __device__(vertex_t cluster) { - return cluster != vmax; - }, - stream_); - - auto partition_cluster_ids_iter = thrust::make_transform_iterator( - used_cluster_ids_v.begin(), - [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { - return d_vertex_device_view(v); - }); - - rmm::device_vector old_cluster_ids_v = - variable_shuffle(handle_, - used_cluster_ids_v.size(), - used_cluster_ids_v.begin(), - partition_cluster_ids_iter); - - rmm::device_vector original_gpus_v = - variable_shuffle( - handle_, - used_cluster_ids_v.size(), - thrust::make_constant_iterator(rank_), - partition_cluster_ids_iter); - - // Now each GPU has old cluster ids, let's compute new cluster ids - rmm::device_vector new_cluster_ids_v(old_cluster_ids_v.size()); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - old_cluster_ids_v.begin(), - old_cluster_ids_v.end(), - new_cluster_ids_v.begin(), - [base_vertex_id = base_vertex_id_, - d_cluster = cluster_v_.data().get(), - d_hash_map = hash_map.get_device_view()] __device__(vertex_t cluster_id) { - vertex_t c = d_cluster[cluster_id - base_vertex_id]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - - // Shuffle everything back - old_cluster_ids_v = variable_shuffle( - handle_, old_cluster_ids_v.size(), old_cluster_ids_v.begin(), original_gpus_v.begin()); - new_cluster_ids_v = variable_shuffle( - handle_, new_cluster_ids_v.size(), new_cluster_ids_v.begin(), original_gpus_v.begin()); - - // Update result_hash_map - thrust::for_each_n( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator( - thrust::make_tuple(old_cluster_ids_v.begin(), new_cluster_ids_v.begin())), - old_cluster_ids_v.size(), - [d_result_hash_map = result_hash_map.get_device_view()] __device__(auto pair) mutable { - auto pos = d_result_hash_map.find(thrust::get<0>(pair)); - pos->second.store(thrust::get<1>(pair)); - }); - - thrust::transform( - rmm::exec_policy(stream_)->on(stream_), - d_cluster_vec, - d_cluster_vec + number_of_vertices_, - d_cluster_vec, - [d_result_hash_map = result_hash_map.get_device_view()] __device__(vertex_t c) { - auto pos = d_result_hash_map.find(c); - return pos->second.load(); - }); - - } else { - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - d_cluster_vec, - d_cluster_vec + number_of_vertices_, - d_cluster_vec, - [d_hash_map = hash_map.get_device_view(), - d_dst_cluster = dst_cluster_cache_v_.data()] __device__(vertex_t v) { - vertex_t c = d_dst_cluster[v]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - } - } - - void generate_supervertices_graph(cuco::static_map const &hash_map, - vertex_t num_clusters) - { - rmm::device_vector new_src_v(local_num_edges_); - rmm::device_vector new_dst_v(local_num_edges_); - rmm::device_vector new_weight_v(current_graph_view_.weights(), - current_graph_view_.weights() + local_num_edges_); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - src_indices_v_.begin(), - src_indices_v_.end(), - new_src_v.begin(), - [base_src_vertex_id = base_src_vertex_id_, - d_src_cluster = src_cluster_cache_v_.data().get(), - d_hash_map = hash_map.get_device_view()] __device__(vertex_t v) { - vertex_t c = d_src_cluster[v - base_src_vertex_id]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - - thrust::transform(rmm::exec_policy(stream_)->on(stream_), - current_graph_view_.indices(), - current_graph_view_.indices() + local_num_edges_, - new_dst_v.begin(), - [base_dst_vertex_id = base_dst_vertex_id_, - d_dst_cluster = dst_cluster_cache_v_.data().get(), - d_hash_map = hash_map.get_device_view()] __device__(vertex_t v) { - vertex_t c = d_dst_cluster[v - base_dst_vertex_id]; - auto pos = d_hash_map.find(c); - return pos->second.load(); - }); - - // Combine common edges on local gpu - std::tie(new_src_v, new_dst_v, new_weight_v) = - combine_local_edges(new_src_v, new_dst_v, new_weight_v); - - if (graph_view_t::is_multi_gpu) { - // - // Shuffle the data to the proper GPU - // FIXME: This needs some performance exploration. It is - // possible (likely?) that the shrunken graph is - // more dense than the original graph. Perhaps that - // changes the dynamic of partitioning efficiently. - // - // For now, we're going to keep the partitioning the same, - // but because we've renumbered to lower numbers, fewer - // partitions will actually have data. - // - rmm::device_vector partition_v(new_src_v.size()); - - thrust::transform( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_dst_v.begin())), - thrust::make_zip_iterator(thrust::make_tuple(new_src_v.end(), new_dst_v.end())), - partition_v.begin(), - [d_edge_device_view = compute_partition_.edge_device_view()] __device__( - thrust::tuple tuple) { - return d_edge_device_view(thrust::get<0>(tuple), thrust::get<1>(tuple)); - }); - - new_src_v = variable_shuffle( - handle_, partition_v.size(), new_src_v.begin(), partition_v.begin()); - - new_dst_v = variable_shuffle( - handle_, partition_v.size(), new_dst_v.begin(), partition_v.begin()); - - new_weight_v = variable_shuffle( - handle_, partition_v.size(), new_weight_v.begin(), partition_v.begin()); - - // - // Now everything is on the correct node, again combine like edges - // - std::tie(new_src_v, new_dst_v, new_weight_v) = - combine_local_edges(new_src_v, new_dst_v, new_weight_v); - } - - // - // Now I have a COO of the new graph, distributed according to the - // original clustering (eventually this likely fits on one GPU and - // everything else is empty). - // - current_graph_ = - detail::create_graph(handle_, - new_src_v, - new_dst_v, - new_weight_v, - num_clusters, - experimental::graph_properties_t{true, true}, - current_graph_view_); + std::tie(current_graph_, numbering_map) = + coarsen_graph(handle_, current_graph_view_, dendrogram_->current_level_begin()); current_graph_view_ = current_graph_->view(); - src_indices_v_.resize(new_src_v.size()); - local_num_vertices_ = current_graph_view_.get_number_of_local_vertices(); local_num_rows_ = current_graph_view_.get_number_of_local_adj_matrix_partition_rows(); local_num_cols_ = current_graph_view_.get_number_of_local_adj_matrix_partition_cols(); - local_num_edges_ = new_src_v.size(); + base_vertex_id_ = current_graph_view_.get_local_vertex_first(); + + local_num_edges_ = thrust::transform_reduce( + thrust::host, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator( + current_graph_view_.get_number_of_local_adj_matrix_partitions()), + [this](auto indx) { + return current_graph_view_.get_number_of_local_adj_matrix_partition_edges(indx); + }, + size_t{0}, + thrust::plus()); + + src_indices_v_.resize(local_num_edges_); cugraph::detail::offsets_to_indices( current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); - } -#endif - std:: - tuple, rmm::device_vector, rmm::device_vector> - combine_local_edges(rmm::device_vector &src_v, - rmm::device_vector &dst_v, - rmm::device_vector &weight_v) - { - thrust::stable_sort_by_key( - rmm::exec_policy(stream_)->on(stream_), - dst_v.begin(), - dst_v.end(), - thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), weight_v.begin()))); - thrust::stable_sort_by_key( - rmm::exec_policy(stream_)->on(stream_), - src_v.begin(), - src_v.end(), - thrust::make_zip_iterator(thrust::make_tuple(dst_v.begin(), weight_v.begin()))); + rmm::device_uvector numbering_indices(numbering_map.size(), stream_); + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + numbering_indices.begin(), + numbering_indices.end(), + base_vertex_id_); - rmm::device_vector combined_src_v(src_v.size()); - rmm::device_vector combined_dst_v(src_v.size()); - rmm::device_vector combined_weight_v(src_v.size()); + relabel( + handle_, + std::make_tuple(static_cast(numbering_map.begin()), + static_cast(numbering_indices.begin())), + local_num_vertices_, + dendrogram_->current_level_begin(), + dendrogram_->current_level_size()); - // - // Now we reduce by key to combine the weights of duplicate - // edges. - // - auto start = thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), dst_v.begin())); - auto new_start = - thrust::make_zip_iterator(thrust::make_tuple(combined_src_v.begin(), combined_dst_v.begin())); - auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), - start, - start + src_v.size(), - weight_v.begin(), - new_start, - combined_weight_v.begin(), - thrust::equal_to>(), - thrust::plus()); - - auto num_edges = thrust::distance(new_start, new_end.first); - - combined_src_v.resize(num_edges); - combined_dst_v.resize(num_edges); - combined_weight_v.resize(num_edges); - - return std::make_tuple(combined_src_v, combined_dst_v, combined_weight_v); + timer_stop(stream_); } protected: raft::handle_t const &handle_; cudaStream_t stream_; + std::unique_ptr> dendrogram_; + vertex_t number_of_vertices_; vertex_t base_vertex_id_{0}; vertex_t base_src_vertex_id_{0}; @@ -1723,7 +1305,6 @@ class Louvain { rmm::device_vector src_cluster_weights_cache_v_{}; rmm::device_vector dst_cluster_weights_cache_v_{}; - rmm::device_vector cluster_v_; rmm::device_vector src_cluster_cache_v_{}; rmm::device_vector dst_cluster_cache_v_{}; diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 1aa7f37fa6b..058cbfe5966 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -286,8 +286,6 @@ void pagerank(raft::handle_t const& handle, CUGRAPH_FAIL("PageRank failed to converge."); } } - - return; } } // namespace detail diff --git a/cpp/src/experimental/relabel.cu b/cpp/src/experimental/relabel.cu new file mode 100644 index 00000000000..62bd6951f71 --- /dev/null +++ b/cpp/src/experimental/relabel.cu @@ -0,0 +1,243 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +template +void relabel(raft::handle_t const& handle, + std::tuple old_new_label_pairs, + vertex_t num_label_pairs, + vertex_t* labels /* [INOUT] */, + vertex_t num_labels, + bool do_expensive_check) +{ + double constexpr load_factor = 0.7; + + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + auto key_func = detail::compute_gpu_id_from_vertex_t{comm_size}; + + // find unique old labels (to be relabeled) + + rmm::device_uvector unique_old_labels(num_labels, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + num_labels, + unique_old_labels.data()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end()); + unique_old_labels.resize( + thrust::distance( + unique_old_labels.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_old_labels.begin(), + unique_old_labels.end())), + handle.get_stream()); + unique_old_labels.shrink_to_fit(handle.get_stream()); + + // collect new labels for the unique old labels + + rmm::device_uvector new_labels_for_unique_old_labels(0, handle.get_stream()); + { + // shuffle the old_new_label_pairs based on applying the compute_gpu_id_from_vertex_t functor + // to the old labels + + rmm::device_uvector rx_label_pair_old_labels(0, handle.get_stream()); + rmm::device_uvector rx_label_pair_new_labels(0, handle.get_stream()); + { + rmm::device_uvector label_pair_old_labels(num_label_pairs, handle.get_stream()); + rmm::device_uvector label_pair_new_labels(num_label_pairs, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + std::get<0>(old_new_label_pairs), + std::get<0>(old_new_label_pairs) + num_label_pairs, + label_pair_old_labels.begin()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + std::get<1>(old_new_label_pairs), + std::get<1>(old_new_label_pairs) + num_label_pairs, + label_pair_new_labels.begin()); + auto pair_first = thrust::make_zip_iterator( + thrust::make_tuple(label_pair_old_labels.begin(), label_pair_new_labels.begin())); + std::forward_as_tuple(std::tie(rx_label_pair_old_labels, rx_label_pair_new_labels), + std::ignore) = + groupby_gpuid_and_shuffle_values( + handle.get_comms(), + pair_first, + pair_first + num_label_pairs, + [key_func] __device__(auto val) { return key_func(thrust::get<0>(val)); }, + handle.get_stream()); + } + + // update intermediate relabel map + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map relabel_map{ + static_cast(static_cast(rx_label_pair_old_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(rx_label_pair_old_labels.begin(), rx_label_pair_new_labels.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + relabel_map.insert(pair_first, pair_first + rx_label_pair_old_labels.size()); + + rx_label_pair_old_labels.resize(0, handle.get_stream()); + rx_label_pair_new_labels.resize(0, handle.get_stream()); + rx_label_pair_old_labels.shrink_to_fit(handle.get_stream()); + rx_label_pair_new_labels.shrink_to_fit(handle.get_stream()); + + // shuffle unique_old_labels, relabel using the intermediate relabel map, and shuffle back + + { + rmm::device_uvector rx_unique_old_labels(0, handle.get_stream()); + std::vector rx_value_counts{}; + std::tie(rx_unique_old_labels, rx_value_counts) = groupby_gpuid_and_shuffle_values( + handle.get_comms(), + unique_old_labels.begin(), + unique_old_labels.end(), + [key_func] __device__(auto val) { return key_func(val); }, + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + relabel_map.find( + rx_unique_old_labels.begin(), + rx_unique_old_labels.end(), + rx_unique_old_labels + .begin()); // now rx_unique_old_lables hold new labels for the corresponding old labels + + std::tie(new_labels_for_unique_old_labels, std::ignore) = shuffle_values( + handle.get_comms(), rx_unique_old_labels.begin(), rx_value_counts, handle.get_stream()); + } + } + + cuco::static_map relabel_map( + static_cast(static_cast(unique_old_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(unique_old_labels.begin(), new_labels_for_unique_old_labels.begin())), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + relabel_map.insert(pair_first, pair_first + unique_old_labels.size()); + relabel_map.find(labels, labels + num_labels, labels); + } else { + cuco::static_map relabel_map( + static_cast(static_cast(num_label_pairs) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value); + + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(std::get<0>(old_new_label_pairs), std::get<1>(old_new_label_pairs))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + + relabel_map.insert(pair_first, pair_first + num_label_pairs); + relabel_map.find(labels, labels + num_labels, labels); + } + + if (do_expensive_check) { + CUGRAPH_EXPECTS( + thrust::count(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + labels, + labels + num_labels, + invalid_vertex_id::value) == 0, + "Invalid input argument: labels include old label values missing in old_new_label_pairs."); + } +#endif + + return; +} + +// explicit instantiation + +template void relabel(raft::handle_t const& handle, + std::tuple old_new_label_pairs, + int32_t num_label_pairs, + int32_t* labels, + int32_t num_labels, + bool do_expensive_check); + +template void relabel( + raft::handle_t const& handle, + std::tuple old_new_label_pairs, + int32_t num_label_pairs, + int32_t* labels, + int32_t num_labels, + bool do_expensive_check); + +template void relabel(raft::handle_t const& handle, + std::tuple old_new_label_pairs, + int64_t num_label_pairs, + int64_t* labels, + int64_t num_labels, + bool do_expensive_check); + +template void relabel( + raft::handle_t const& handle, + std::tuple old_new_label_pairs, + int64_t num_label_pairs, + int64_t* labels, + int64_t num_labels, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu new file mode 100644 index 00000000000..6a5a1c732c2 --- /dev/null +++ b/cpp/src/experimental/renumber_edgelist.cu @@ -0,0 +1,821 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +#ifdef CUCO_STATIC_MAP_DEFINED +template +rmm::device_uvector compute_renumber_map( + raft::handle_t const& handle, + vertex_t const* vertices, + vertex_t num_local_vertices /* relevant only if vertices != nullptr */, + vertex_t const* edgelist_major_vertices, + vertex_t const* edgelist_minor_vertices, + edge_t num_edgelist_edges) +{ + // FIXME: compare this sort based approach with hash based approach in both speed and memory + // footprint + + // 1. acquire (unique major label, count) pairs + + rmm::device_uvector tmp_labels(num_edgelist_edges, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + tmp_labels.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_labels.begin(), + tmp_labels.end()); + rmm::device_uvector major_labels(tmp_labels.size(), handle.get_stream()); + rmm::device_uvector major_counts(major_labels.size(), handle.get_stream()); + auto major_pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + tmp_labels.begin(), + tmp_labels.end(), + thrust::make_constant_iterator(edge_t{1}), + major_labels.begin(), + major_counts.begin()); + tmp_labels.resize(0, handle.get_stream()); + tmp_labels.shrink_to_fit(handle.get_stream()); + major_labels.resize(thrust::distance(major_labels.begin(), thrust::get<0>(major_pair_it)), + handle.get_stream()); + major_counts.resize(major_labels.size(), handle.get_stream()); + major_labels.shrink_to_fit(handle.get_stream()); + major_counts.shrink_to_fit(handle.get_stream()); + + // 2. acquire unique minor labels + + rmm::device_uvector minor_labels(num_edgelist_edges, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_minor_vertices, + edgelist_minor_vertices + num_edgelist_edges, + minor_labels.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_labels.begin(), + minor_labels.end()); + auto minor_label_it = + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_labels.begin(), + minor_labels.end()); + minor_labels.resize(thrust::distance(minor_labels.begin(), minor_label_it), handle.get_stream()); + minor_labels.shrink_to_fit(handle.get_stream()); + + // 3. merge major and minor labels and vertex labels + + rmm::device_uvector merged_labels(major_labels.size() + minor_labels.size(), + handle.get_stream()); + + rmm::device_uvector merged_counts(merged_labels.size(), handle.get_stream()); + thrust::merge_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + major_labels.begin(), + major_labels.end(), + minor_labels.begin(), + minor_labels.end(), + major_counts.begin(), + thrust::make_constant_iterator(edge_t{0}), + merged_labels.begin(), + merged_counts.begin()); + + major_labels.resize(0, handle.get_stream()); + major_counts.resize(0, handle.get_stream()); + minor_labels.resize(0, handle.get_stream()); + major_labels.shrink_to_fit(handle.get_stream()); + major_counts.shrink_to_fit(handle.get_stream()); + minor_labels.shrink_to_fit(handle.get_stream()); + + rmm::device_uvector labels(merged_labels.size(), handle.get_stream()); + rmm::device_uvector counts(labels.size(), handle.get_stream()); + auto pair_it = + thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + merged_labels.begin(), + merged_labels.end(), + merged_counts.begin(), + labels.begin(), + counts.begin()); + merged_labels.resize(0, handle.get_stream()); + merged_counts.resize(0, handle.get_stream()); + merged_labels.shrink_to_fit(handle.get_stream()); + merged_counts.shrink_to_fit(handle.get_stream()); + labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + counts.shrink_to_fit(handle.get_stream()); + + // 4. if multi-GPU, shuffle and reduce (label, count) pairs + + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + + auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(labels.begin(), counts.begin())); + rmm::device_uvector rx_labels(0, handle.get_stream()); + rmm::device_uvector rx_counts(0, handle.get_stream()); + std::forward_as_tuple(std::tie(rx_labels, rx_counts), std::ignore) = + groupby_gpuid_and_shuffle_values( + comm, + pair_first, + pair_first + labels.size(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( + auto val) { return key_func(thrust::get<0>(val)); }, + handle.get_stream()); + + labels.resize(rx_labels.size(), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_labels.begin(), + rx_labels.end(), + rx_counts.begin()); + pair_it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_labels.begin(), + rx_labels.end(), + rx_counts.begin(), + labels.begin(), + counts.begin()); + rx_labels.resize(0, handle.get_stream()); + rx_counts.resize(0, handle.get_stream()); + rx_labels.shrink_to_fit(handle.get_stream()); + rx_counts.shrink_to_fit(handle.get_stream()); + labels.resize(thrust::distance(labels.begin(), thrust::get<0>(pair_it)), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + labels.shrink_to_fit(handle.get_stream()); + } + + // 5. if vertices != nullptr, add isolated vertices + + rmm::device_uvector isolated_vertices(0, handle.get_stream()); + if (vertices != nullptr) { + auto num_isolated_vertices = thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_local_vertices, + [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { + return !thrust::binary_search(thrust::seq, label_first, label_last, v); + }); + isolated_vertices.resize(num_isolated_vertices, handle.get_stream()); + thrust::copy_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertices, + vertices + num_local_vertices, + isolated_vertices.begin(), + [label_first = labels.begin(), label_last = labels.end()] __device__(auto v) { + return !thrust::binary_search(thrust::seq, label_first, label_last, v); + }); + } + + if (isolated_vertices.size() > 0) { + labels.resize(labels.size() + isolated_vertices.size(), handle.get_stream()); + counts.resize(labels.size(), handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + isolated_vertices.begin(), + isolated_vertices.end(), + labels.end() - isolated_vertices.size()); + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + counts.end() - isolated_vertices.size(), + counts.end(), + edge_t{0}); + } + + // 6. sort by degree + + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + counts.begin(), + counts.end(), + labels.begin(), + thrust::greater()); + + return std::move(labels); +} + +template +void expensive_check_edgelist( + raft::handle_t const& handle, + vertex_t const* local_vertices, + vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, + vertex_t const* edgelist_major_vertices, + vertex_t const* edgelist_minor_vertices, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned /* relevant only if multi_gpu == true */) +{ + rmm::device_uvector sorted_local_vertices( + local_vertices != nullptr ? num_local_vertices : vertex_t{0}, handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_vertices, + local_vertices + num_local_vertices, + sorted_local_vertices.begin()); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + sorted_local_vertices.begin(), + sorted_local_vertices.end()); + + if (multi_gpu) { + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + local_vertices, + local_vertices + num_local_vertices, + [comm_rank, + key_func = + detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) { + return key_func(val) != comm_rank; + }) == 0, + "Invalid input argument: local_vertices should be pre-shuffled."); + + auto edge_first = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices)); + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edge_first, + edge_first + num_edgelist_edges, + [comm_rank, + key_func = + detail::compute_gpu_id_from_edge_t{is_hypergraph_partitioned, + comm_size, + row_comm_size, + col_comm_size}] __device__(auto edge) { + return key_func(thrust::get<0>(edge), thrust::get<1>(edge)) != comm_rank; + }) == 0, + "Invalid input argument: edgelist_major_vertices & edgelist_minor_vertices should be " + "pre-shuffled."); + + if (local_vertices != nullptr) { + rmm::device_uvector unique_edge_vertices(num_edgelist_edges * 2, + handle.get_stream()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + unique_edge_vertices.begin()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_minor_vertices, + edgelist_minor_vertices + num_edgelist_edges, + unique_edge_vertices.begin() + num_edgelist_edges); + thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_edge_vertices.begin(), + unique_edge_vertices.end()); + unique_edge_vertices.resize( + thrust::distance( + unique_edge_vertices.begin(), + thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_edge_vertices.begin(), + unique_edge_vertices.end())), + handle.get_stream()); + + rmm::device_uvector rx_unique_edge_vertices(0, handle.get_stream()); + std::tie(rx_unique_edge_vertices, std::ignore) = groupby_gpuid_and_shuffle_values( + handle.get_comms(), + unique_edge_vertices.begin(), + unique_edge_vertices.end(), + [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__( + auto val) { return key_func(val); }, + handle.get_stream()); + + unique_edge_vertices = std::move(rx_unique_edge_vertices); + + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + unique_edge_vertices.begin(), + unique_edge_vertices.end(), + [num_local_vertices, + sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { + return !thrust::binary_search( + thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); + }) == 0, + "Invalid input argument: edgelist_major_vertices and/or edgelist_minor_vertices have " + "invalid vertex ID(s)."); + } + } else { + if (local_vertices != nullptr) { + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + [num_local_vertices, + sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { + return !thrust::binary_search( + thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); + }) == 0, + "Invalid input argument: edgelist_major_vertices has invalid vertex ID(s)."); + + CUGRAPH_EXPECTS( + thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + [num_local_vertices, + sorted_local_vertices = sorted_local_vertices.data()] __device__(auto v) { + return !thrust::binary_search( + thrust::seq, sorted_local_vertices, sorted_local_vertices + num_local_vertices, v); + }) == 0, + "Invalid input argument: edgelist_major_vertices has invalid vertex ID(s)."); + } + } +} +#endif + +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const& handle, + vertex_t const* local_vertices, + vertex_t num_local_vertices /* relevant only if local_vertices != nullptr */, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check) +{ + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_size = col_comm.get_size(); + auto const col_comm_rank = col_comm.get_rank(); + + if (do_expensive_check) { + expensive_check_edgelist(handle, + local_vertices, + num_local_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + is_hypergraph_partitioned); + } + + // 1. compute renumber map + + auto renumber_map_labels = + detail::compute_renumber_map(handle, + local_vertices, + num_local_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges); + + // 2. initialize partition_t object, number_of_vertices, and number_of_edges for the coarsened + // graph + + auto vertex_partition_counts = host_scalar_allgather( + comm, static_cast(renumber_map_labels.size()), handle.get_stream()); + std::vector vertex_partition_offsets(comm_size + 1, 0); + std::partial_sum(vertex_partition_counts.begin(), + vertex_partition_counts.end(), + vertex_partition_offsets.begin() + 1); + + partition_t partition(vertex_partition_offsets, + is_hypergraph_partitioned, + row_comm_size, + col_comm_size, + row_comm_rank, + col_comm_rank); + + auto number_of_vertices = vertex_partition_offsets.back(); + auto number_of_edges = host_scalar_allreduce(comm, num_edgelist_edges, handle.get_stream()); + + // 3. renumber edges + + if (is_hypergraph_partitioned) { + CUGRAPH_FAIL("unimplemented."); + } else { + double constexpr load_factor = 0.7; + + // FIXME: compare this hash based approach with a binary search based approach in both memory + // footprint and execution time + + { + vertex_t major_first{}; + vertex_t major_last{}; + std::tie(major_first, major_last) = partition.get_matrix_partition_major_range(0); + rmm::device_uvector renumber_map_major_labels(major_last - major_first, + handle.get_stream()); + std::vector recvcounts(row_comm_size); + for (int i = 0; i < row_comm_size; ++i) { + recvcounts[i] = partition.get_vertex_partition_size(col_comm_rank * row_comm_size + i); + } + std::vector displacements(row_comm_size, 0); + std::partial_sum(recvcounts.begin(), recvcounts.end() - 1, displacements.begin() + 1); + device_allgatherv(row_comm, + renumber_map_labels.begin(), + renumber_map_major_labels.begin(), + recvcounts, + displacements, + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_major_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(renumber_map_major_labels.begin(), + thrust::make_counting_iterator(major_first))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + renumber_map.insert(pair_first, pair_first + renumber_map_major_labels.size()); + renumber_map.find(edgelist_major_vertices, + edgelist_major_vertices + num_edgelist_edges, + edgelist_major_vertices); + } + + { + vertex_t minor_first{}; + vertex_t minor_last{}; + std::tie(minor_first, minor_last) = partition.get_matrix_partition_minor_range(); + rmm::device_uvector renumber_map_minor_labels(minor_last - minor_first, + handle.get_stream()); + + // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph + // partitioning + auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; + auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; + // FIXME: this branch may be no longer necessary with NCCL backend + if (comm_src_rank == comm_rank) { + assert(comm_dst_rank == comm_rank); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + renumber_map_labels.begin(), + renumber_map_labels.end(), + renumber_map_minor_labels.begin() + + (partition.get_vertex_partition_first(comm_src_rank) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size))); + } else { + device_sendrecv(comm, + renumber_map_labels.begin(), + renumber_map_labels.size(), + comm_dst_rank, + renumber_map_minor_labels.begin() + + (partition.get_vertex_partition_first(comm_src_rank) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size)), + static_cast(partition.get_vertex_partition_size(comm_src_rank)), + comm_src_rank, + handle.get_stream()); + } + + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + for (int i = 0; i < col_comm_size; ++i) { + auto offset = partition.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - + partition.get_vertex_partition_first(row_comm_rank * col_comm_size); + auto count = partition.get_vertex_partition_size(row_comm_rank * col_comm_size + i); + device_bcast(col_comm, + renumber_map_minor_labels.begin() + offset, + renumber_map_minor_labels.begin() + offset, + count, + i, + handle.get_stream()); + } + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // cuco::static_map currently does not take stream + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_minor_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator(thrust::make_tuple(renumber_map_minor_labels.begin(), + thrust::make_counting_iterator(minor_first))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + renumber_map.insert(pair_first, pair_first + renumber_map_minor_labels.size()); + renumber_map.find(edgelist_minor_vertices, + edgelist_minor_vertices + num_edgelist_edges, + edgelist_minor_vertices); + } + } + + return std::make_tuple( + std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges); +#else + return std::make_tuple( + rmm::device_uvector(0, handle.get_stream()), + partition_t(std::vector(), false, int{0}, int{0}, int{0}, int{0}), + vertex_t{0}, + edge_t{0}); +#endif +} + +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const& handle, + vertex_t const* vertices, + vertex_t num_vertices /* relevant only if vertices != nullptr */, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check) +{ + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + +#ifdef CUCO_STATIC_MAP_DEFINED + if (do_expensive_check) { + expensive_check_edgelist(handle, + vertices, + num_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + false); + } + + auto renumber_map_labels = + detail::compute_renumber_map(handle, + vertices, + num_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges); + + double constexpr load_factor = 0.7; + + // FIXME: compare this hash based approach with a binary search based approach in both memory + // footprint and execution time + + cuco::static_map renumber_map{ + static_cast(static_cast(renumber_map_labels.size()) / load_factor), + invalid_vertex_id::value, + invalid_vertex_id::value}; + auto pair_first = thrust::make_transform_iterator( + thrust::make_zip_iterator( + thrust::make_tuple(renumber_map_labels.begin(), thrust::make_counting_iterator(vertex_t{0}))), + [] __device__(auto val) { + return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val)); + }); + renumber_map.insert(pair_first, pair_first + renumber_map_labels.size()); + renumber_map.find( + edgelist_major_vertices, edgelist_major_vertices + num_edgelist_edges, edgelist_major_vertices); + renumber_map.find( + edgelist_minor_vertices, edgelist_minor_vertices + num_edgelist_edges, edgelist_minor_vertices); + + return std::move(renumber_map_labels); +#else + return rmm::device_uvector(0, handle.get_stream()); +#endif +} + +} // namespace detail + +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const& handle, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check) +{ + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + return detail::renumber_edgelist(handle, + static_cast(nullptr), + vertex_t{0}, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + is_hypergraph_partitioned, + do_expensive_check); +} + +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const& handle, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check) +{ + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + return detail::renumber_edgelist(handle, + static_cast(nullptr), + vertex_t{0} /* dummy */, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + do_expensive_check); +} + +template +std::enable_if_t, partition_t, vertex_t, edge_t>> +renumber_edgelist(raft::handle_t const& handle, + vertex_t const* local_vertices, + vertex_t num_local_vertices, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check) +{ + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + return detail::renumber_edgelist(handle, + local_vertices, + num_local_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + is_hypergraph_partitioned, + do_expensive_check); +} + +template +std::enable_if_t> renumber_edgelist( + raft::handle_t const& handle, + vertex_t const* vertices, + vertex_t num_vertices, + vertex_t* edgelist_major_vertices /* [INOUT] */, + vertex_t* edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool do_expensive_check) +{ + // FIXME: remove this check once we drop Pascal support + CUGRAPH_EXPECTS(handle.get_device_properties().major >= 7, + "Relabel not supported on Pascal and older architectures."); + return detail::renumber_edgelist(handle, + vertices, + num_vertices, + edgelist_major_vertices, + edgelist_minor_vertices, + num_edgelist_edges, + do_expensive_check); +} + +// explicit instantiation directives (EIDir's): +// +// instantiations for +// +template std::tuple, partition_t, int32_t, int32_t> +renumber_edgelist(raft::handle_t const& handle, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool do_expensive_check); + +template std::tuple, partition_t, int32_t, int32_t> +renumber_edgelist(raft::handle_t const& handle, + int32_t const* local_vertices, + int32_t num_local_vertices, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int32_t const* vertices, + int32_t num_vertices, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool do_expensive_check); + +// instantiations for +// +template std::tuple, partition_t, int32_t, int64_t> +renumber_edgelist(raft::handle_t const& handle, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool do_expensive_check); + +template std::tuple, partition_t, int32_t, int64_t> +renumber_edgelist(raft::handle_t const& handle, + int32_t const* local_vertices, + int32_t num_local_vertices, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int32_t const* vertices, + int32_t num_vertices, + int32_t* edgelist_major_vertices /* [INOUT] */, + int32_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool do_expensive_check); + +// instantiations for +// +template std::tuple, partition_t, int64_t, int64_t> +renumber_edgelist(raft::handle_t const& handle, + int64_t* edgelist_major_vertices /* [INOUT] */, + int64_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int64_t* edgelist_major_vertices /* [INOUT] */, + int64_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool do_expensive_check); + +template std::tuple, partition_t, int64_t, int64_t> +renumber_edgelist(raft::handle_t const& handle, + int64_t const* local_vertices, + int64_t num_local_vertices, + int64_t* edgelist_major_vertices /* [INOUT] */, + int64_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check); + +template rmm::device_uvector renumber_edgelist( + raft::handle_t const& handle, + int64_t const* vertices, + int64_t num_vertices, + int64_t* edgelist_major_vertices /* [INOUT] */, + int64_t* edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index ebcde1b1444..4996b3734cb 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -249,8 +249,6 @@ void sssp(raft::handle_t const &handle, handle.get_stream())); // this is as necessary vertex_frontier will become out-of-scope once // this function returns (FIXME: should I stream sync in VertexFrontier // destructor?) - - return; } } // namespace detail diff --git a/cpp/src/layout/force_atlas2.cu b/cpp/src/layout/force_atlas2.cu index ef00f504d86..6da9b77b45d 100644 --- a/cpp/src/layout/force_atlas2.cu +++ b/cpp/src/layout/force_atlas2.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -38,7 +38,7 @@ void force_atlas2(GraphCOOView &graph, bool verbose, internals::GraphBasedDimRedCallback *callback) { - CUGRAPH_EXPECTS(pos != nullptr, "Invalid API parameter: pos array should be of size 2 * V"); + CUGRAPH_EXPECTS(pos != nullptr, "Invalid input argument: pos array should be of size 2 * V"); CUGRAPH_EXPECTS(graph.number_of_vertices != 0, "Invalid input: Graph is empty"); if (!barnes_hut_optimize) { diff --git a/cpp/src/linear_assignment/README-hungarian.md b/cpp/src/linear_assignment/README-hungarian.md new file mode 100644 index 00000000000..42dabd7cfbc --- /dev/null +++ b/cpp/src/linear_assignment/README-hungarian.md @@ -0,0 +1,36 @@ +# LAP +Implementation of ***O(n^3) Alternating Tree Variant*** of Hungarian Algorithm on NVIDIA CUDA-enabled GPU. + +This implementation solves a batch of ***k*** **Linear Assignment Problems (LAP)**, each with ***nxn*** matrix of single floating point cost values. At optimality, the algorithm produces an assignment with ***minimum*** cost. + +The API can be used to query optimal primal and dual costs, optimal assignment vector, and optimal row/column dual vectors for each subproblem in the batch. + +cuGraph exposes the Hungarian algorithm, the actual implementation is contained in the RAFT library which contains some common tools and kernels shared between cuGraph and cuML. + +Following parameters can be used to tune the performance of algorithm: + +1. epsilon: (in raft/lap/lap_kernels.cuh) This parameter controls the tolerance on the floating point precision. Setting this too small will result in increased solution time because the algorithm will search for precise solutions. Setting it too high may cause some inaccuracies. + +2. BLOCKDIMX, BLOCKDIMY: (in raft/lap/lap_functions.cuh) These parameters control threads_per_block to be used along the given dimension. Set these according to the device specifications and occupancy calculation. + +***This library is licensed under Apache License 2.0. Please cite our paper, if this library helps you in your research.*** + +- Harvard citation style + + Date, K. and Nagi, R., 2016. GPU-accelerated Hungarian algorithms for the Linear Assignment Problem. Parallel Computing, 57, pp.52-72. + +- BibTeX Citation block to be used in LaTeX bibliography file: + +``` +@article{date2016gpu, + title={GPU-accelerated Hungarian algorithms for the Linear Assignment Problem}, + author={Date, Ketan and Nagi, Rakesh}, + journal={Parallel Computing}, + volume={57}, + pages={52--72}, + year={2016}, + publisher={Elsevier} +} +``` + +The paper is available online on [ScienceDirect](https://www.sciencedirect.com/science/article/abs/pii/S016781911630045X). diff --git a/cpp/src/linear_assignment/hungarian.cu b/cpp/src/linear_assignment/hungarian.cu index 164a386c6dd..40f7be52c90 100644 --- a/cpp/src/linear_assignment/hungarian.cu +++ b/cpp/src/linear_assignment/hungarian.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -67,9 +67,9 @@ weight_t hungarian_sparse(raft::handle_t const &handle, vertex_t *assignment, cudaStream_t stream) { - CUGRAPH_EXPECTS(assignment != nullptr, "Invalid API parameter: assignment pointer is NULL"); + CUGRAPH_EXPECTS(assignment != nullptr, "Invalid input argument: assignment pointer is NULL"); CUGRAPH_EXPECTS(graph.edge_data != nullptr, - "Invalid API parameter: graph must have edge data (costs)"); + "Invalid input argument: graph must have edge data (costs)"); #ifdef TIMING HighResTimer hr_timer; diff --git a/cpp/src/link_analysis/gunrock_hits.cpp b/cpp/src/link_analysis/gunrock_hits.cpp index 8662c3bea79..5ffaacfe7a6 100644 --- a/cpp/src/link_analysis/gunrock_hits.cpp +++ b/cpp/src/link_analysis/gunrock_hits.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,9 +42,9 @@ void hits(cugraph::GraphCSRView const &graph, weight_t *hubs, weight_t *authorities) { - CUGRAPH_EXPECTS(hubs != nullptr, "Invalid API parameter: hubs array should be of size V"); + CUGRAPH_EXPECTS(hubs != nullptr, "Invalid input argument: hubs array should be of size V"); CUGRAPH_EXPECTS(authorities != nullptr, - "Invalid API parameter: authorities array should be of size V"); + "Invalid input argument: authorities array should be of size V"); // // NOTE: gunrock doesn't support passing a starting value diff --git a/cpp/src/link_analysis/pagerank.cu b/cpp/src/link_analysis/pagerank.cu deleted file mode 100644 index e5da24e328d..00000000000 --- a/cpp/src/link_analysis/pagerank.cu +++ /dev/null @@ -1,432 +0,0 @@ -/* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - * - */ - -// Pagerank solver -// Author: Alex Fender afender@nvidia.com - -#include -#include -#include -#include -#include -#include -#include -#include -#include "cub/cub.cuh" - -#include -#include -#include - -#include -#include "pagerank_1D.cuh" -#include "utilities/graph_utils.cuh" - -#include - -namespace cugraph { -namespace detail { - -#ifdef DEBUG -#define PR_VERBOSE -#endif - -template -bool pagerankIteration(raft::handle_t const &handle, - IndexType n, - IndexType e, - IndexType const *cscPtr, - IndexType const *cscInd, - ValueType *cscVal, - ValueType alpha, - ValueType *a, - ValueType *b, - float tolerance, - int iter, - int max_iter, - ValueType *&tmp, - void *cub_d_temp_storage, - size_t cub_temp_storage_bytes, - ValueType *&pr, - ValueType *residual) -{ - ValueType dot_res; -//#if defined(CUDART_VERSION) and CUDART_VERSION >= 11000 -#if 1 - { - raft::matrix::sparse_matrix_t const r_csr_m{ - handle, cscPtr, cscInd, cscVal, n, e}; - r_csr_m.mv(1.0, tmp, 0.0, pr); - } -#else - CUDA_TRY(cub::DeviceSpmv::CsrMV(cub_d_temp_storage, - cub_temp_storage_bytes, - cscVal, - (IndexType *)cscPtr, - (IndexType *)cscInd, - tmp, - pr, - n, - n, - e)); -#endif - scal(n, alpha, pr); - dot_res = dot(n, a, tmp); - axpy(n, dot_res, b, pr); - scal(n, (ValueType)1.0 / nrm2(n, pr), pr); - axpy(n, (ValueType)-1.0, pr, tmp); - *residual = nrm2(n, tmp); - if (*residual < tolerance) { - scal(n, (ValueType)1.0 / nrm1(n, pr), pr); - return true; - } else { - if (iter < max_iter) { - // FIXME: Copy the pagerank vector results to the tmp vector, since there - // are still raw pointers in pagerank pointing to tmp vector locations - // that were std::swapped out in the solver. A thrust::swap would - // probably be more efficent if the vectors were passed everywhere instead - // of pointers. std::swap is unsafe though. Just copying for now, as this - // may soon be replaced by the pattern accelerator. - copy(n, pr, tmp); - } else { - scal(n, (ValueType)1.0 / nrm1(n, pr), pr); - } - return false; - } -} - -template -int pagerankSolver(raft::handle_t const &handle, - IndexType n, - IndexType e, - IndexType const *cscPtr, - IndexType const *cscInd, - ValueType *cscVal, - IndexType *prsVtx, - ValueType *prsVal, - IndexType prsLen, - bool has_personalization, - ValueType alpha, - ValueType *a, - bool has_guess, - float tolerance, - int max_iter, - ValueType *&pagerank_vector, - ValueType *&residual) -{ - int max_it, i = 0; - float tol; - bool converged = false; - ValueType randomProbability = static_cast(1.0 / n); - ValueType *tmp_d{nullptr}; - ValueType *b_d{nullptr}; - void *cub_d_temp_storage = NULL; - size_t cub_temp_storage_bytes = 0; - - if (max_iter > 0) - max_it = max_iter; - else - max_it = 500; - - if (tolerance == 0.0f) - tol = 1.0E-6f; - else if (tolerance < 1.0f && tolerance > 0.0f) - tol = tolerance; - else - return -1; - - if (alpha <= 0.0f || alpha >= 1.0f) return -1; - - rmm::device_vector b(n); - b_d = b.data().get(); - -#if 1 /* temporary solution till https://github.com/NVlabs/cub/issues/162 is resolved */ - thrust::device_vector tmp(n); - tmp_d = tmp.data().get(); -#else - rmm::device_vector tmp(n); - tmp_d = pr.data().get(); -#endif - // FIXME: this should take a passed CUDA strema instead of default nullptr - CHECK_CUDA(nullptr); - - if (!has_guess) { - fill(n, pagerank_vector, randomProbability); - fill(n, tmp_d, randomProbability); - } else { - copy(n, pagerank_vector, tmp_d); - } - - if (has_personalization) { - ValueType sum = nrm1(prsLen, prsVal); - if (static_cast(0) == sum) { - fill(n, b_d, randomProbability); - } else { - scal(n, static_cast(1.0 / sum), prsVal); - fill(n, b_d, static_cast(0)); - scatter(prsLen, prsVal, b_d, prsVtx); - } - } else { - fill(n, b_d, randomProbability); - } - update_dangling_nodes(n, a, alpha); - -//#if defined(CUDART_VERSION) and CUDART_VERSION >= 11000 -#if 1 - { - raft::matrix::sparse_matrix_t const r_csr_m{ - handle, cscPtr, cscInd, cscVal, n, e}; - r_csr_m.mv(1.0, tmp_d, 0.0, pagerank_vector); - } -#else - CUDA_TRY(cub::DeviceSpmv::CsrMV(cub_d_temp_storage, - cub_temp_storage_bytes, - cscVal, - (IndexType *)cscPtr, - (IndexType *)cscInd, - tmp_d, - pagerank_vector, - n, - n, - e)); -#endif - // Allocate temporary storage - rmm::device_buffer cub_temp_storage(cub_temp_storage_bytes); - cub_d_temp_storage = cub_temp_storage.data(); - -#ifdef PR_VERBOSE - std::stringstream ss; - ss.str(std::string()); - ss << " ------------------PageRank------------------" << std::endl; - ss << " --------------------------------------------" << std::endl; - ss << std::setw(10) << "Iteration" << std::setw(15) << "Residual" << std::endl; - ss << " --------------------------------------------" << std::endl; - std::cout << ss.str(); -#endif - - while (!converged && i < max_it) { - i++; - converged = pagerankIteration(handle, - n, - e, - cscPtr, - cscInd, - cscVal, - alpha, - a, - b_d, - tol, - i, - max_it, - tmp_d, - cub_d_temp_storage, - cub_temp_storage_bytes, - pagerank_vector, - residual); -#ifdef PR_VERBOSE - ss.str(std::string()); - ss << std::setw(10) << i; - ss.precision(3); - ss << std::setw(15) << std::scientific << *residual << std::endl; - std::cout << ss.str(); -#endif - } -#ifdef PR_VERBOSE - std::cout << " --------------------------------------------" << std::endl; -#endif - - return converged ? 0 : 1; -} - -// template int pagerankSolver ( int n, int e, int *cscPtr, int *cscInd,half *cscVal, -// half alpha, half *a, bool has_guess, float tolerance, int max_iter, half * &pagerank_vector, half -// * &residual); -template int pagerankSolver(raft::handle_t const &handle, - int n, - int e, - int const *cscPtr, - int const *cscInd, - float *cscVal, - int *prsVtx, - float *prsVal, - int prsLen, - bool has_personalization, - float alpha, - float *a, - bool has_guess, - float tolerance, - int max_iter, - float *&pagerank_vector, - float *&residual); -template int pagerankSolver(raft::handle_t const &handle, - int n, - int e, - const int *cscPtr, - int const *cscInd, - double *cscVal, - int *prsVtx, - double *prsVal, - int prsLen, - bool has_personalization, - double alpha, - double *a, - bool has_guess, - float tolerance, - int max_iter, - double *&pagerank_vector, - double *&residual); - -template -void pagerank_impl(raft::handle_t const &handle, - GraphCSCView const &graph, - WT *pagerank, - VT personalization_subset_size = 0, - VT *personalization_subset = nullptr, - WT *personalization_values = nullptr, - double alpha = 0.85, - double tolerance = 1e-5, - int64_t max_iter = 100, - bool has_guess = false) -{ - bool has_personalization = false; - int prsLen = 0; - VT m = graph.number_of_vertices; - ET nnz = graph.number_of_edges; - int status{0}; - WT *d_pr{nullptr}, *d_val{nullptr}, *d_leaf_vector{nullptr}; - WT res = 1.0; - WT *residual = &res; - - if (personalization_subset_size != 0) { - CUGRAPH_EXPECTS(personalization_subset != nullptr, - "Invalid API parameter: personalization_subset array should be of size " - "personalization_subset_size"); - CUGRAPH_EXPECTS(personalization_values != nullptr, - "Invalid API parameter: personalization_values array should be of size " - "personalization_subset_size"); - CUGRAPH_EXPECTS(personalization_subset_size <= m, - "Personalization size should be smaller than V"); - has_personalization = true; - prsLen = static_cast(personalization_subset_size); - } - -#if 1 /* temporary solution till https://github.com/NVlabs/cub/issues/162 is resolved */ - thrust::device_vector pr(m); - d_pr = pr.data().get(); -#else - rmm::device_vector pr(m); - d_pr = pr.data().get(); -#endif - - rmm::device_vector leaf_vector(m); - rmm::device_vector val(nnz); - - d_leaf_vector = leaf_vector.data().get(); - d_val = val.data().get(); - - // The templating for HT_matrix_csc_coo assumes that m, nnz and data are all the same type - HT_matrix_csc_coo(m, nnz, graph.offsets, graph.indices, d_val, d_leaf_vector); - - if (has_guess) { copy(m, (WT *)pagerank, d_pr); } - - status = pagerankSolver(handle, - m, - nnz, - graph.offsets, - graph.indices, - d_val, - personalization_subset, - personalization_values, - prsLen, - has_personalization, - alpha, - d_leaf_vector, - has_guess, - tolerance, - max_iter, - d_pr, - residual); - - switch (status) { - case 0: break; - case -1: CUGRAPH_FAIL("Error : bad parameters in Pagerank"); - case 1: break; // Warning : Pagerank did not reached the desired tolerance - default: CUGRAPH_FAIL("Pagerank exec failed"); - } - - copy(m, d_pr, (WT *)pagerank); -} -} // namespace detail - -template -void pagerank(raft::handle_t const &handle, - GraphCSCView const &graph, - WT *pagerank, - VT personalization_subset_size, - VT *personalization_subset, - WT *personalization_values, - double alpha, - double tolerance, - int64_t max_iter, - bool has_guess) -{ - CUGRAPH_EXPECTS(pagerank != nullptr, "Invalid API parameter: Pagerank array should be of size V"); - // Multi-GPU - if (handle.comms_initialized()) { - CUGRAPH_EXPECTS(has_guess == false, - "Invalid API parameter: Multi-GPU Pagerank does not guess, please use the " - "single GPU version for this feature"); - CUGRAPH_EXPECTS(max_iter > 0, "The number of iteration must be positive"); - cugraph::mg::pagerank(handle, - graph, - pagerank, - personalization_subset_size, - personalization_subset, - personalization_values, - alpha, - max_iter, - tolerance); - } else // Single GPU - return detail::pagerank_impl(handle, - graph, - pagerank, - personalization_subset_size, - personalization_subset, - personalization_values, - alpha, - tolerance, - max_iter, - has_guess); -} - -// explicit instantiation -template void pagerank(raft::handle_t const &handle, - GraphCSCView const &graph, - float *pagerank, - int personalization_subset_size, - int *personalization_subset, - float *personalization_values, - double alpha, - double tolerance, - int64_t max_iter, - bool has_guess); -template void pagerank(raft::handle_t const &handle, - GraphCSCView const &graph, - double *pagerank, - int personalization_subset_size, - int *personalization_subset, - double *personalization_values, - double alpha, - double tolerance, - int64_t max_iter, - bool has_guess); - -} // namespace cugraph diff --git a/cpp/src/link_analysis/pagerank_1D.cu b/cpp/src/link_analysis/pagerank_1D.cu deleted file mode 100644 index 3774a364cf1..00000000000 --- a/cpp/src/link_analysis/pagerank_1D.cu +++ /dev/null @@ -1,188 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Author: Alex Fender afender@nvidia.com - -#include -#include -#include "pagerank_1D.cuh" -#include "utilities/graph_utils.cuh" - -namespace cugraph { -namespace mg { - -template -__global__ void transition_kernel(const size_t e, const VT *ind, const VT *degree, WT *val) -{ - for (auto i = threadIdx.x + blockIdx.x * blockDim.x; i < e; i += gridDim.x * blockDim.x) - val[i] = 1.0 / degree[ind[i]]; // Degree contains IN degree. So all degree[ind[i]] were - // incremented by definition (no div by 0). -} - -template -Pagerank::Pagerank(const raft::handle_t &handle_, GraphCSCView const &G) - : comm(handle_.get_comms()), - bookmark(G.number_of_vertices), - prev_pr(G.number_of_vertices), - val(G.local_edges[comm.get_rank()]), - handle(handle_), - has_personalization(false) -{ - v_glob = G.number_of_vertices; - v_loc = G.local_vertices[comm.get_rank()]; - e_loc = G.local_edges[comm.get_rank()]; - part_off = G.local_offsets; - local_vertices = G.local_vertices; - off = G.offsets; - ind = G.indices; - blocks = handle_.get_device_properties().maxGridSize[0]; - threads = handle_.get_device_properties().maxThreadsPerBlock; - sm_count = handle_.get_device_properties().multiProcessorCount; - - is_setup = false; -} - -template -Pagerank::~Pagerank() -{ -} - -template -void Pagerank::transition_vals(const VT *degree) -{ - if (e_loc > 0) { - int threads = std::min(e_loc, this->threads); - int blocks = std::min(32 * sm_count, this->blocks); - transition_kernel<<>>(e_loc, ind, degree, val.data().get()); - CHECK_CUDA(nullptr); - } -} - -template -void Pagerank::flag_leafs(const VT *degree) -{ - if (v_glob > 0) { - int threads = std::min(v_glob, this->threads); - int blocks = std::min(32 * sm_count, this->blocks); - cugraph::detail::flag_leafs_kernel - <<>>(v_glob, degree, bookmark.data().get()); - CHECK_CUDA(nullptr); - } -} - -// Artificially create the google matrix by setting val and bookmark -template -void Pagerank::setup(WT _alpha, - VT *degree, - VT personalization_subset_size, - VT *personalization_subset, - WT *personalization_values) -{ - if (!is_setup) { - alpha = _alpha; - WT zero = 0.0; - WT one = 1.0; - // Update dangling node vector - cugraph::detail::fill(v_glob, bookmark.data().get(), zero); - flag_leafs(degree); - cugraph::detail::update_dangling_nodes(v_glob, bookmark.data().get(), alpha); - - // Transition matrix - transition_vals(degree); - - // personalize - if (personalization_subset_size != 0) { - CUGRAPH_EXPECTS(personalization_subset != nullptr, - "Invalid API parameter: personalization_subset array should be of size " - "personalization_subset_size"); - CUGRAPH_EXPECTS(personalization_values != nullptr, - "Invalid API parameter: personalization_values array should be of size " - "personalization_subset_size"); - CUGRAPH_EXPECTS(personalization_subset_size <= v_glob, - "Personalization size should be smaller than V"); - - WT sum = cugraph::detail::nrm1(personalization_subset_size, personalization_values); - if (sum != zero) { - has_personalization = true; - personalization_vector.resize(v_glob); - cugraph::detail::fill(v_glob, personalization_vector.data().get(), zero); - cugraph::detail::scal(v_glob, one / sum, personalization_values); - cugraph::detail::scatter(personalization_subset_size, - personalization_values, - personalization_vector.data().get(), - personalization_subset); - } - } - is_setup = true; - } else - CUGRAPH_FAIL("MG PageRank : Setup can be called only once"); -} - -// run the power iteration on the google matrix -template -int Pagerank::solve(int max_iter, float tolerance, WT *pagerank) -{ - if (is_setup) { - WT dot_res; - WT one = 1.0; - WT *pr = pagerank; - cugraph::detail::fill(v_glob, pagerank, one / v_glob); - cugraph::detail::fill(v_glob, prev_pr.data().get(), one / v_glob); - // This cuda sync was added to fix #426 - // This should not be requiered in theory - // This is not needed on one GPU at this time - cudaDeviceSynchronize(); - dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr); - MGcsrmv spmv_solver( - handle, local_vertices, part_off, off, ind, val.data().get(), pagerank); - - WT residual; - int i; - for (i = 0; i < max_iter; ++i) { - spmv_solver.run(pagerank); - cugraph::detail::scal(v_glob, alpha, pr); - - // personalization - if (has_personalization) - cugraph::detail::axpy(v_glob, dot_res, personalization_vector.data().get(), pr); - else - cugraph::detail::addv(v_glob, dot_res * (one / v_glob), pr); - - dot_res = cugraph::detail::dot(v_glob, bookmark.data().get(), pr); - cugraph::detail::scal(v_glob, one / cugraph::detail::nrm2(v_glob, pr), pr); - - // convergence check - cugraph::detail::axpy(v_glob, (WT)-1.0, pr, prev_pr.data().get()); - residual = cugraph::detail::nrm2(v_glob, prev_pr.data().get()); - if (residual < tolerance) - break; - else - cugraph::detail::copy(v_glob, pr, prev_pr.data().get()); - } - cugraph::detail::scal(v_glob, one / cugraph::detail::nrm1(v_glob, pr), pr); - return i; - } else { - CUGRAPH_FAIL("MG PageRank : Solve was called before setup"); - } -} - -template class Pagerank; -template class Pagerank; - -} // namespace mg -} // namespace cugraph - -#include "utilities/eidir_graph_utils.hpp" diff --git a/cpp/src/link_analysis/pagerank_1D.cuh b/cpp/src/link_analysis/pagerank_1D.cuh deleted file mode 100644 index feb410daa9a..00000000000 --- a/cpp/src/link_analysis/pagerank_1D.cuh +++ /dev/null @@ -1,125 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -// Author: Alex Fender afender@nvidia.com - -#pragma once - -#include -#include -#include - -#include "utilities/error.hpp" -#include "utilities/spmv_1D.cuh" - -namespace cugraph { -namespace mg { - -template -class Pagerank { - private: - VT v_glob{}; // global number of vertices - VT v_loc{}; // local number of vertices - ET e_loc{}; // local number of edges - WT alpha{}; // damping factor - bool has_personalization; - // CUDA - const raft::comms::comms_t &comm; // info about the mg comm setup - cudaStream_t stream; - int blocks; - int threads; - int sm_count; - - // Vertex offsets for each partition. - VT *part_off; - VT *local_vertices; - - // Google matrix - ET *off; - VT *ind; - - rmm::device_vector val; // values of the substochastic matrix - rmm::device_vector bookmark; // constant vector with dangling node info - rmm::device_vector prev_pr; // record the last pagerank for convergence check - rmm::device_vector personalization_vector; // personalization vector after reconstruction - - bool is_setup; - raft::handle_t const &handle; // raft handle propagation for SpMV, etc. - - public: - Pagerank(const raft::handle_t &handle, const GraphCSCView &G); - ~Pagerank(); - - void transition_vals(const VT *degree); - - void flag_leafs(const VT *degree); - - // Artificially create the google matrix by setting val and bookmark - void setup(WT _alpha, - VT *degree, - VT personalization_subset_size, - VT *personalization_subset, - WT *personalization_values); - - // run the power iteration on the google matrix, return the number of iterations - int solve(int max_iter, float tolerance, WT *pagerank); -}; - -template -int pagerank(raft::handle_t const &handle, - const GraphCSCView &G, - WT *pagerank_result, - VT personalization_subset_size, - VT *personalization_subset, - WT *personalization_values, - const double damping_factor = 0.85, - const int64_t n_iter = 100, - const double tolerance = 1e-5) -{ - // null pointers check - CUGRAPH_EXPECTS(G.offsets != nullptr, "Invalid API parameter - offsets is null"); - CUGRAPH_EXPECTS(G.indices != nullptr, "Invalid API parameter - indidices is null"); - CUGRAPH_EXPECTS(pagerank_result != nullptr, - "Invalid API parameter - pagerank output memory must be allocated"); - - // parameter values - CUGRAPH_EXPECTS(damping_factor > 0.0, - "Invalid API parameter - invalid damping factor value (alpha<0)"); - CUGRAPH_EXPECTS(damping_factor < 1.0, - "Invalid API parameter - invalid damping factor value (alpha>1)"); - CUGRAPH_EXPECTS(n_iter > 0, "Invalid API parameter - n_iter must be > 0"); - - rmm::device_vector degree(G.number_of_vertices); - - // in-degree of CSC (equivalent to out-degree of original edge list) - G.degree(degree.data().get(), DegreeDirection::IN); - - // Allocate and intialize Pagerank class - Pagerank pr_solver(handle, G); - - // Set all constants info - pr_solver.setup(damping_factor, - degree.data().get(), - personalization_subset_size, - personalization_subset, - personalization_values); - - // Run pagerank - return pr_solver.solve(n_iter, tolerance, pagerank_result); -} - -} // namespace mg -} // namespace cugraph diff --git a/cpp/src/link_prediction/jaccard.cu b/cpp/src/link_prediction/jaccard.cu index 70952974b39..83a4ec6e713 100644 --- a/cpp/src/link_prediction/jaccard.cu +++ b/cpp/src/link_prediction/jaccard.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -315,7 +315,7 @@ int jaccard_pairs(vertex_t n, template void jaccard(GraphCSRView const &graph, WT const *weights, WT *result) { - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: result pointer is NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); rmm::device_vector weight_i(graph.number_of_edges); rmm::device_vector weight_s(graph.number_of_edges); @@ -352,9 +352,9 @@ void jaccard_list(GraphCSRView const &graph, VT const *second, WT *result) { - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: result pointer is NULL"); - CUGRAPH_EXPECTS(first != nullptr, "Invalid API parameter: first is NULL"); - CUGRAPH_EXPECTS(second != nullptr, "Invalid API parameter: second in NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); + CUGRAPH_EXPECTS(first != nullptr, "Invalid input argument: first is NULL"); + CUGRAPH_EXPECTS(second != nullptr, "Invalid input argument: second in NULL"); rmm::device_vector weight_i(num_pairs, WT{0.0}); rmm::device_vector weight_s(num_pairs); diff --git a/cpp/src/link_prediction/overlap.cu b/cpp/src/link_prediction/overlap.cu index e3f80b50d9a..83fdc799649 100644 --- a/cpp/src/link_prediction/overlap.cu +++ b/cpp/src/link_prediction/overlap.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -316,7 +316,7 @@ int overlap_pairs(vertex_t n, template void overlap(GraphCSRView const &graph, WT const *weights, WT *result) { - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: result pointer is NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); rmm::device_vector weight_i(graph.number_of_edges); rmm::device_vector weight_s(graph.number_of_edges); @@ -353,9 +353,9 @@ void overlap_list(GraphCSRView const &graph, VT const *second, WT *result) { - CUGRAPH_EXPECTS(result != nullptr, "Invalid API parameter: result pointer is NULL"); - CUGRAPH_EXPECTS(first != nullptr, "Invalid API parameter: first column is NULL"); - CUGRAPH_EXPECTS(second != nullptr, "Invalid API parameter: second column is NULL"); + CUGRAPH_EXPECTS(result != nullptr, "Invalid input argument: result pointer is NULL"); + CUGRAPH_EXPECTS(first != nullptr, "Invalid input argument: first column is NULL"); + CUGRAPH_EXPECTS(second != nullptr, "Invalid input argument: second column is NULL"); rmm::device_vector weight_i(num_pairs); rmm::device_vector weight_s(num_pairs); diff --git a/cpp/src/structure/graph.cu b/cpp/src/structure/graph.cu index 63ef725c3b7..056ad39fefc 100644 --- a/cpp/src/structure/graph.cu +++ b/cpp/src/structure/graph.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -148,3 +148,5 @@ template class GraphCOOView; template class GraphCompressedSparseBaseView; template class GraphCompressedSparseBaseView; } // namespace cugraph + +#include "utilities/eidir_graph_utils.hpp" diff --git a/cpp/src/traversal/sssp.cu b/cpp/src/traversal/sssp.cu index 4018c9d9878..6ffbbbf462b 100644 --- a/cpp/src/traversal/sssp.cu +++ b/cpp/src/traversal/sssp.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -247,7 +247,7 @@ void sssp(GraphCSRView const &graph, VT *predecessors, const VT source_vertex) { - CUGRAPH_EXPECTS(distances || predecessors, "Invalid API parameter, both outputs are nullptr"); + CUGRAPH_EXPECTS(distances || predecessors, "Invalid input argument, both outputs are nullptr"); if (typeid(VT) != typeid(int)) CUGRAPH_FAIL("Unsupported vertex id data type, please use int"); if (typeid(ET) != typeid(int)) CUGRAPH_FAIL("Unsupported edge id data type, please use int"); diff --git a/cpp/src/traversal/tsp.cu b/cpp/src/traversal/tsp.cu new file mode 100644 index 00000000000..c669246bc49 --- /dev/null +++ b/cpp/src/traversal/tsp.cu @@ -0,0 +1,252 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include "tsp.hpp" +#include "tsp_solver.hpp" + +namespace cugraph { +namespace detail { + +TSP::TSP(raft::handle_t &handle, + int const *vtx_ptr, + float const *x_pos, + float const *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route) + : handle_(handle), + vtx_ptr_(vtx_ptr), + x_pos_(x_pos), + y_pos_(y_pos), + nodes_(nodes), + restarts_(restarts), + beam_search_(beam_search), + k_(k), + nstart_(nstart), + verbose_(verbose), + route_(route), + stream_(handle_.get_stream()), + max_blocks_(handle_.get_device_properties().maxGridSize[0]), + max_threads_(handle_.get_device_properties().maxThreadsPerBlock), + warp_size_(handle_.get_device_properties().warpSize), + sm_count_(handle_.get_device_properties().multiProcessorCount), + restart_batch_(4096) +{ + allocate(); +} + +void TSP::allocate() +{ + // Scalars + mylock_ = mylock_scalar_.data(); + best_tour_ = best_tour_scalar_.data(); + climbs_ = climbs_scalar_.data(); + + // Vectors + neighbors_vec_.resize((k_ + 1) * nodes_); + // pre-allocate workspace for climbs, each block needs a separate permutation space and search + // buffer. We allocate a work buffer that will store the computed distances, px, py and the route. + // We align it on the warp size. + work_vec_.resize(sizeof(float) * restart_batch_ * + ((4 * nodes_ + 3 + warp_size_ - 1) / warp_size_ * warp_size_)); + + // Pointers + neighbors_ = neighbors_vec_.data().get(); + work_ = work_vec_.data().get(); +} + +float TSP::compute() +{ + float valid_coo_dist = 0.f; + int num_restart_batches = (restarts_ + restart_batch_ - 1) / restart_batch_; + int restart_resid = restarts_ - (num_restart_batches - 1) * restart_batch_; + int global_best = INT_MAX; + float *soln = nullptr; + int *route_sol = nullptr; + int best = 0; + std::vector h_x_pos; + std::vector h_y_pos; + h_x_pos.reserve(nodes_ + 1); + h_y_pos.reserve(nodes_ + 1); + + // Stats + int n_timers = 3; + long total_climbs = 0; + std::vector h_times; + struct timeval starttime, endtime; + + // KNN call + knn(); + + if (verbose_) { + std::cout << "Doing " << num_restart_batches - 1 << " batches of size " << restart_batch_ + << ", with " << restart_resid << " tail\n"; + std::cout << "configuration: " << nodes_ << " nodes, " << restarts_ << " restart\n"; + std::cout << "optimizing graph with kswap = " << kswaps << "\n"; + } + + // Tell the cache how we want it to behave + cudaFuncSetCacheConfig(search_solution, cudaFuncCachePreferEqual); + + int threads = best_thread_count(nodes_, max_threads_, sm_count_, warp_size_); + if (verbose_) std::cout << "Calculated best thread number = " << threads << "\n"; + + rmm::device_vector times(n_timers * threads + n_timers); + h_times.reserve(n_timers * threads + n_timers); + + gettimeofday(&starttime, NULL); + for (int b = 0; b < num_restart_batches; ++b) { + reset<<<1, 1, 0, stream_>>>(mylock_, best_tour_, climbs_); + CHECK_CUDA(stream_); + + if (b == num_restart_batches - 1) restart_batch_ = restart_resid; + + search_solution<<>>(mylock_, + best_tour_, + vtx_ptr_, + beam_search_, + k_, + nodes_, + neighbors_, + x_pos_, + y_pos_, + work_, + nstart_, + times.data().get(), + climbs_, + threads); + + CHECK_CUDA(stream_); + cudaDeviceSynchronize(); + + CUDA_TRY(cudaMemcpy(&best, best_tour_, sizeof(int), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + if (verbose_) std::cout << "Best reported by kernel = " << best << "\n"; + + if (best < global_best) { + global_best = best; + CUDA_TRY(cudaMemcpyFromSymbol(&soln, best_soln, sizeof(void *))); + cudaDeviceSynchronize(); + CUDA_TRY(cudaMemcpyFromSymbol(&route_sol, best_route, sizeof(void *))); + cudaDeviceSynchronize(); + } + total_climbs += climbs_scalar_.value(stream_); + } + gettimeofday(&endtime, NULL); + double runtime = + endtime.tv_sec + endtime.tv_usec / 1e6 - starttime.tv_sec - starttime.tv_usec / 1e6; + long long moves = 1LL * total_climbs * (nodes_ - 2) * (nodes_ - 1) / 2; + + raft::copy(route_, route_sol, nodes_, stream_); + + CUDA_TRY(cudaMemcpy(h_x_pos.data(), soln, sizeof(float) * (nodes_ + 1), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + CUDA_TRY(cudaMemcpy( + h_y_pos.data(), soln + nodes_ + 1, sizeof(float) * (nodes_ + 1), cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + for (int i = 0; i < nodes_; ++i) { + if (verbose_) { std::cout << h_x_pos[i] << " " << h_y_pos[i] << "\n"; } + valid_coo_dist += euclidean_dist(h_x_pos.data(), h_y_pos.data(), i, i + 1); + } + + CUDA_TRY(cudaMemcpy(h_times.data(), + times.data().get(), + sizeof(float) * n_timers * threads + n_timers, + cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + if (verbose_) { + std::cout << "Search runtime = " << runtime << ", " << moves * 1e-9 / runtime << " Gmoves/s\n"; + std::cout << "Optimized tour length = " << global_best << "\n"; + print_times(h_times, n_timers, handle_.get_device(), threads); + } + + return valid_coo_dist; +} + +void TSP::knn() +{ + if (verbose_) std::cout << "Looking at " << k_ << " nearest neighbors\n"; + + int dim = 2; + bool row_major_order = false; + + rmm::device_vector input(nodes_ * dim); + float *input_ptr = input.data().get(); + raft::copy(input_ptr, x_pos_, nodes_, stream_); + raft::copy(input_ptr + nodes_, y_pos_, nodes_, stream_); + + rmm::device_vector search_data(nodes_ * dim); + float *search_data_ptr = search_data.data().get(); + raft::copy(search_data_ptr, input_ptr, nodes_ * dim, stream_); + + rmm::device_vector distances(nodes_ * (k_ + 1)); + float *distances_ptr = distances.data().get(); + + std::vector input_vec; + std::vector sizes_vec; + input_vec.push_back(input_ptr); + sizes_vec.push_back(nodes_); + + // k neighbors + 1 is needed because the nearest neighbor of each point is + // the point itself that we don't want to take into account. + + raft::spatial::knn::brute_force_knn(handle_, + input_vec, + sizes_vec, + dim, + search_data_ptr, + nodes_, + neighbors_, + distances_ptr, + k_ + 1, + row_major_order, + row_major_order); +} +} // namespace detail + +float traveling_salesperson(raft::handle_t &handle, + int const *vtx_ptr, + float const *x_pos, + float const *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route) +{ + RAFT_EXPECTS(route != nullptr, "route should equal the number of nodes"); + RAFT_EXPECTS(nodes > 0, "nodes should be strictly positive"); + RAFT_EXPECTS(restarts > 0, "restarts should be strictly positive"); + RAFT_EXPECTS(nstart >= 0 && nstart < nodes, "nstart should be between 0 and nodes - 1"); + RAFT_EXPECTS(k > 0, "k should be strictly positive"); + + cugraph::detail::TSP tsp( + handle, vtx_ptr, x_pos, y_pos, nodes, restarts, beam_search, k, nstart, verbose, route); + return tsp.compute(); +} + +} // namespace cugraph diff --git a/cpp/src/traversal/tsp.hpp b/cpp/src/traversal/tsp.hpp new file mode 100644 index 00000000000..b065b779b96 --- /dev/null +++ b/cpp/src/traversal/tsp.hpp @@ -0,0 +1,88 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace cugraph { +namespace detail { +class TSP { + public: + TSP(raft::handle_t &handle, + int const *vtx_ptr, + float const *x_pos, + float const *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route); + + void allocate(); + float compute(); + void knn(); + ~TSP(){}; + + private: + // Config + raft::handle_t &handle_; + cudaStream_t stream_; + int max_blocks_; + int max_threads_; + int warp_size_; + int sm_count_; + // how large a grid we want to run, this is fixed + int restart_batch_; + + // TSP + int const *vtx_ptr_; + int *route_; + float const *x_pos_; + float const *y_pos_; + int nodes_; + int restarts_; + bool beam_search_; + int k_; + int nstart_; + bool verbose_; + + // Scalars + rmm::device_scalar mylock_scalar_; + rmm::device_scalar best_tour_scalar_; + rmm::device_scalar climbs_scalar_; + + int *mylock_; + int *best_tour_; + int *climbs_; + + // Vectors + rmm::device_vector neighbors_vec_; + rmm::device_vector work_vec_; + + int64_t *neighbors_; + int *work_; + int *work_route_; +}; +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/traversal/tsp_solver.hpp b/cpp/src/traversal/tsp_solver.hpp new file mode 100644 index 00000000000..20d826cac5c --- /dev/null +++ b/cpp/src/traversal/tsp_solver.hpp @@ -0,0 +1,414 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include "tsp_utils.hpp" + +namespace cugraph { +namespace detail { + +__device__ float *best_soln; +__device__ int *best_route; +extern __shared__ int shbuf[]; + +__global__ void reset(int *mylock, int *best_tour, int *climbs) +{ + *mylock = 0; + *best_tour = INT_MAX; + *climbs = 0; + best_soln = nullptr; + best_route = nullptr; +} + +// random permutation kernel +__device__ void random_init(float const *posx, + float const *posy, + int const *vtx_ptr, + int *path, + float *px, + float *py, + int const nstart, + int const nodes) +{ + // Fill values + for (int i = threadIdx.x; i <= nodes; i += blockDim.x) { + px[i] = posx[i]; + py[i] = posy[i]; + path[i] = vtx_ptr[i]; + } + + __syncthreads(); + + if (threadIdx.x == 0) { /* serial permutation as starting point */ + // swap to start at nstart node + raft::swapVals(px[0], px[nstart]); + raft::swapVals(py[0], py[nstart]); + raft::swapVals(path[0], path[nstart]); + + curandState rndstate; + curand_init(blockIdx.x, 0, 0, &rndstate); + for (int i = 1; i < nodes; i++) { + int j = curand(&rndstate) % (nodes - 1 - i) + i; + if (i == j) continue; + raft::swapVals(px[i], px[j]); + raft::swapVals(py[i], py[j]); + raft::swapVals(path[i], path[j]); + } + px[nodes] = px[0]; /* close the loop now, avoid special cases later */ + py[nodes] = py[0]; + path[nodes] = path[0]; + } +} + +// Use KNN as a starting solution +__device__ void knn_init(float const *posx, + float const *posy, + int const *vtx_ptr, + int64_t const *neighbors, + int *buf, + int *path, + float *px, + float *py, + int const nstart, + int const nodes, + int const K) +{ + for (int i = threadIdx.x; i < nodes; i += blockDim.x) buf[i] = 0; + + __syncthreads(); + + if (threadIdx.x == 0) { + curandState rndstate; + curand_init(blockIdx.x, 0, 0, &rndstate); + int progress = 0; + int initlen = 0; + + px[0] = posx[nstart]; + py[0] = posy[nstart]; + path[0] = vtx_ptr[nstart]; + int head = nstart; + int v = 0; + buf[head] = 1; + while (progress < nodes - 1) { // beam search as starting point + for (int i = 1; i <= progress; i++) buf[i] = 0; + progress = 0; // reset current location in path and visited array + initlen = 0; + int randjumps = 0; + while (progress < nodes - 1) { + int nj = curand(&rndstate) % K; + int linked = 0; + for (int nh = 0; nh < K; ++nh) { + // offset (idx / K) + 1 filters the points as their own nearest neighbors. + int offset = (K * head + nj) / K + 1; + v = neighbors[K * head + nj + offset]; + if (v < nodes && buf[v] == 0) { + head = v; + progress += 1; + buf[head] = 1; + linked = 1; + break; + } + nj = (nj + 1) % K; + } + if (linked == 0) { + if (randjumps > nodes - 1) + break; // give up on this traversal, we failed to find a next link + randjumps += 1; + int nr = (head + 1) % nodes; // jump to next node + while (buf[nr] == 1) { nr = (nr + 1) % nodes; } + head = nr; + progress += 1; + buf[head] = 1; + } + // copy from input into beam-search order, update len + px[progress] = posx[head]; + py[progress] = posy[head]; + path[progress] = vtx_ptr[head]; + initlen += __float2int_rn(euclidean_dist(px, py, progress, progress - 1)); + } + } + px[nodes] = px[nstart]; + py[nodes] = py[nstart]; + path[nodes] = path[nstart]; + initlen += __float2int_rn(euclidean_dist(px, py, nodes, nstart)); + } +} + +__device__ void two_opt_search( + int *buf, float *px, float *py, int *shbuf, int *minchange, int *mini, int *minj, int const nodes) +{ + __shared__ float shmem_x[tilesize]; + __shared__ float shmem_y[tilesize]; + + for (int ii = 0; ii < nodes - 2; ii += blockDim.x) { + int i = ii + threadIdx.x; + float pxi0, pyi0, pxi1, pyi1, pxj1, pyj1; + if (i < nodes - 2) { + minchange[0] -= buf[i]; + pxi0 = px[i]; + pyi0 = py[i]; + pxi1 = px[i + 1]; + pyi1 = py[i + 1]; + pxj1 = px[nodes]; + pyj1 = py[nodes]; + } + for (int jj = nodes - 1; jj >= ii + 2; jj -= tilesize) { + int bound = jj - tilesize + 1; + for (int k = threadIdx.x; k < tilesize; k += blockDim.x) { + if (k + bound >= ii + 2) { + shmem_x[k] = px[k + bound]; + shmem_y[k] = py[k + bound]; + shbuf[k] = buf[k + bound]; + } + } + __syncthreads(); + + int lower = bound; + if (lower < (i + 2)) lower = i + 2; + for (int j = jj; j >= lower; j--) { + int jm = j - bound; + float pxj0 = shmem_x[jm]; + float pyj0 = shmem_y[jm]; + int delta = + shbuf[jm] + + __float2int_rn(sqrtf((pxi0 - pxj0) * (pxi0 - pxj0) + (pyi0 - pyj0) * (pyi0 - pyj0))) + + __float2int_rn(sqrtf((pxi1 - pxj1) * (pxi1 - pxj1) + (pyi1 - pyj1) * (pyi1 - pyj1))); + pxj1 = pxj0; + pyj1 = pyj0; + + if (delta < minchange[0]) { + minchange[0] = delta; + mini[0] = i; + minj[0] = j; + } + } + __syncthreads(); + } + + if (i < nodes - 2) { minchange[0] += buf[i]; } + } +} + +// This function being runned for each block +__device__ void hill_climbing( + float *px, float *py, int *buf, int *path, int *shbuf, int const nodes, int *climbs) +{ + __shared__ int best_change[kswaps]; + __shared__ int best_i[kswaps]; + __shared__ int best_j[kswaps]; + + int minchange; + int mini; + int minj; + int kswaps_active = kswaps; + int myswaps = 0; + + // Hill climbing, iteratively improve from the starting guess + do { + if (threadIdx.x == 0) { + for (int k = 0; k < kswaps; k++) { + best_change[k] = 0; + best_i[k] = 0; + best_j[k] = 0; + } + } + __syncthreads(); + for (int i = threadIdx.x; i < nodes; i += blockDim.x) { + buf[i] = -__float2int_rn(euclidean_dist(px, py, i, i + 1)); + } + __syncthreads(); + + // Reset + minchange = 0; + mini = 0; + minj = 0; + + // Find best indices + two_opt_search(buf, px, py, shbuf, &minchange, &mini, &minj, nodes); + __syncthreads(); + + // Stats only + if (threadIdx.x == 0) atomicAdd(climbs, 1); + + shbuf[threadIdx.x] = minchange; + + int j = blockDim.x; // warp reduction to find best thread results + do { + int k = (j + 1) / 2; + if ((threadIdx.x + k) < j) { + shbuf[threadIdx.x] = min(shbuf[threadIdx.x + k], shbuf[threadIdx.x]); + } + j = k; + __syncthreads(); + } while (j > 1); // thread winner for this k is in shbuf[0] + + if (threadIdx.x == 0) { + best_change[0] = shbuf[0]; // sort best result in shared + } + __syncthreads(); + + if (minchange == shbuf[0]) { // My thread is as good as the winner + shbuf[1] = threadIdx.x; // store thread ID in shbuf[1] + } + __syncthreads(); + + if (threadIdx.x == shbuf[1]) { // move from thread local to shared + best_i[0] = mini; // shared best indices for compatibility checks + best_j[0] = minj; + } + __syncthreads(); + + // look for more compatible swaps + for (int kmin = 1; kmin < kswaps_active; kmin++) { + // disallow swaps that conflict with ones already picked + for (int kchk = kmin - 1; kchk >= 0; --kchk) { + if ((mini < (best_j[kchk] + 1)) && (minj > (best_i[kchk] - 1))) { + minchange = shbuf[threadIdx.x] = 0; + } + __syncthreads(); + } + shbuf[threadIdx.x] = minchange; + + j = blockDim.x; + do { + int k = (j + 1) / 2; + if ((threadIdx.x + k) < j) { + shbuf[threadIdx.x] = min(shbuf[threadIdx.x + k], shbuf[threadIdx.x]); + } + j = k; + __syncthreads(); + } while (j > 1); // thread winner for this k is in shbuf[0] + + if (threadIdx.x == 0) { + best_change[kmin] = shbuf[0]; // store best result in shared + } + __syncthreads(); + + if (minchange == shbuf[0]) { // My thread is as good as the winner + shbuf[1] = threadIdx.x; // store thread ID in shbuf[1] + __threadfence_block(); + } + __syncthreads(); + + if (threadIdx.x == shbuf[1]) { // move from thread local to shared + best_i[kmin] = mini; // store swap targets + best_j[kmin] = minj; + __threadfence_block(); + } + __syncthreads(); + // look for the best compatible move + } // end loop over kmin + minchange = best_change[0]; + myswaps += 1; + for (int kmin = 0; kmin < kswaps_active; kmin++) { + int sum = best_i[kmin] + best_j[kmin] + 1; // = mini + minj +1 + // this is a reversal of all nodes included in the range [ i+1, j ] + for (int i = threadIdx.x; (i + i) < sum; i += blockDim.x) { + if (best_i[kmin] < i) { + int j = sum - i; + raft::swapVals(px[i], px[j]); + raft::swapVals(py[i], py[j]); + raft::swapVals(path[i], path[j]); + } + } + __syncthreads(); + } + } while (minchange < 0 && myswaps < 2 * nodes); +} + +__device__ void get_optimal_tour( + int *mylock, int *best_tour, float *px, float *py, int *path, int *shbuf, int const nodes) +{ + // Now find actual length of the last tour, result of the climb + int term = 0; + for (int i = threadIdx.x; i < nodes; i += blockDim.x) { + term += __float2int_rn(euclidean_dist(px, py, i, i + 1)); + } + shbuf[threadIdx.x] = term; + __syncthreads(); + + int j = blockDim.x; // block level reduction + do { + int k = (j + 1) / 2; + if ((threadIdx.x + k) < j) { shbuf[threadIdx.x] += shbuf[threadIdx.x + k]; } + j = k; // divide active warp size in half + __syncthreads(); + } while (j > 1); + term = shbuf[0]; + + if (threadIdx.x == 0) { + atomicMin(best_tour, term); + while (atomicExch(mylock, 1) != 0) + ; // acquire + if (best_tour[0] == term) { + best_soln = px; + best_route = path; + } + *mylock = 0; // release + __threadfence(); + } +} + +__global__ __launch_bounds__(2048, 2) void search_solution(int *mylock, + int *best_tour, + int const *vtx_ptr, + bool beam_search, + int const K, + int nodes, + int64_t const *neighbors, + float const *posx, + float const *posy, + int *work, + int const nstart, + float *times, + int *climbs, + int threads) +{ + int *buf = &work[blockIdx.x * ((4 * nodes + 3 + 31) / 32 * 32)]; + float *px = (float *)(&buf[nodes]); + float *py = &px[nodes + 1]; + int *path = (int *)(&py[nodes + 1]); + __shared__ int shbuf[tilesize]; + clock_t start; + + start = clock64(); + if (!beam_search) + random_init(posx, posy, vtx_ptr, path, px, py, nstart, nodes); + else + knn_init(posx, posy, vtx_ptr, neighbors, buf, path, px, py, nstart, nodes, K); + __syncthreads(); + times[threadIdx.x] = clock64() - start; + + start = clock64(); + hill_climbing(px, py, buf, path, shbuf, nodes, climbs); + __syncthreads(); + times[threads + threadIdx.x + 1] = clock64() - start; + + start = clock64(); + get_optimal_tour(mylock, best_tour, px, py, path, shbuf, nodes); + times[2 * threads + threadIdx.x + 1] = clock64() - start; +} +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/traversal/tsp_utils.hpp b/cpp/src/traversal/tsp_utils.hpp new file mode 100644 index 00000000000..3faa2efea3b --- /dev/null +++ b/cpp/src/traversal/tsp_utils.hpp @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#define tilesize 128 +#define kswaps 4 + +#include +#include +#include + +namespace cugraph { +namespace detail { + +__host__ __device__ inline float euclidean_dist(float *px, float *py, int a, int b) +{ + return sqrtf((px[a] - px[b]) * (px[a] - px[b]) + (py[a] - py[b]) * (py[a] - py[b])); +} + +static std::vector device_func = {"Find First", "Hill Climbing", "Retrieve Path"}; + +void print_times(std::vector &h_times, int const n_timers, int device, int threads) +{ + int clock_rate; + cudaDeviceGetAttribute(&clock_rate, cudaDevAttrClockRate, device); + + double total = 0; + h_times[0] /= (float)clock_rate; + total += h_times[0]; + for (int i = 1; i < n_timers; ++i) { + h_times[i * threads + 1] /= (float)clock_rate; + total += h_times[i * threads + 1]; + } + std::cout << "Stats: \n"; + std::cout << device_func[0] << " time: " << h_times[0] * 1e-3 << " " + << (h_times[0] / total) * 100.0 << "%\n"; + for (int i = 1; i < n_timers; ++i) { + std::cout << device_func[i] << " time: " << h_times[i * threads + 1] * 1e-3 << " " + << (h_times[i * threads + 1] / total) * 100.0 << "%\n"; + } +} + +// Get maximum number of threads we can run on based on number of nodes, +// shared memory usage, max threads per block and SM, max blocks for SM and registers per SM. +int best_thread_count(int nodes, int max_threads, int sm_count, int warp_size) +{ + int smem, blocks, thr, perf; + int const max_threads_sm = 2048; + int max = nodes - 2; + int best = 0; + int bthr = 4; + + if (max > max_threads) max = max_threads; + + for (int threads = 1; threads <= max; ++threads) { + smem = sizeof(int) * threads + 2 * sizeof(float) * tilesize + sizeof(int) * tilesize; + blocks = (16384 * 2) / smem; + if (blocks > sm_count) blocks = sm_count; + thr = (threads + warp_size - 1) / warp_size * warp_size; + while (blocks * thr > max_threads_sm) blocks--; + perf = threads * blocks; + if (perf > best) { + best = perf; + bthr = threads; + } + } + + return bthr; +} +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 6c8ef98e2e2..e95a001cb91 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,16 +15,20 @@ */ #include +#include +#include #include #include #include #include #include #include +#include #include #include #include +#include namespace cugraph { namespace cython { @@ -86,7 +90,6 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai reinterpret_cast(graph_container.dst_vertices), reinterpret_cast(graph_container.weights), static_cast(graph_container.num_partition_edges)}; - return std::make_unique>( handle, edgelist, @@ -123,12 +126,18 @@ void populate_graph_container(graph_container_t& graph_container, bool do_expensive_check{true}; bool hypergraph_partitioned{false}; - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); // pcols - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); // prows + if (multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); // pcols + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); // prows + graph_container.row_comm_size = row_comm_size; + graph_container.col_comm_size = col_comm_size; + graph_container.row_comm_rank = row_comm_rank; + graph_container.col_comm_rank = col_comm_rank; + } graph_container.vertex_partition_offsets = vertex_partition_offsets; graph_container.src_vertices = src_vertices; @@ -143,10 +152,6 @@ void populate_graph_container(graph_container_t& graph_container, graph_container.transposed = transposed; graph_container.is_multi_gpu = multi_gpu; graph_container.hypergraph_partitioned = hypergraph_partitioned; - graph_container.row_comm_size = row_comm_size; - graph_container.col_comm_size = col_comm_size; - graph_container.row_comm_rank = row_comm_rank; - graph_container.col_comm_rank = col_comm_rank; graph_container.sorted_by_degree = sorted_by_degree; graph_container.do_expensive_check = do_expensive_check; @@ -463,33 +468,7 @@ void call_pagerank(raft::handle_t const& handle, int64_t max_iter, bool has_guess) { - if (graph_container.graph_type == graphTypeEnum::GraphCSCViewFloat) { - pagerank(handle, - *(graph_container.graph_ptr_union.GraphCSCViewFloatPtr), - reinterpret_cast(p_pagerank), - static_cast(personalization_subset_size), - reinterpret_cast(personalization_subset), - reinterpret_cast(personalization_values), - alpha, - tolerance, - max_iter, - has_guess); - graph_container.graph_ptr_union.GraphCSCViewFloatPtr->get_vertex_identifiers( - reinterpret_cast(identifiers)); - } else if (graph_container.graph_type == graphTypeEnum::GraphCSCViewDouble) { - pagerank(handle, - *(graph_container.graph_ptr_union.GraphCSCViewDoublePtr), - reinterpret_cast(p_pagerank), - static_cast(personalization_subset_size), - reinterpret_cast(personalization_subset), - reinterpret_cast(personalization_values), - alpha, - tolerance, - max_iter, - has_guess); - graph_container.graph_ptr_union.GraphCSCViewDoublePtr->get_vertex_identifiers( - reinterpret_cast(identifiers)); - } else if (graph_container.graph_type == graphTypeEnum::graph_t) { + if (graph_container.is_multi_gpu) { if (graph_container.edgeType == numberTypeEnum::int32Type) { auto graph = detail::create_graph(handle, graph_container); @@ -504,7 +483,7 @@ void call_pagerank(raft::handle_t const& handle, static_cast(tolerance), max_iter, has_guess, - false); + true); } else if (graph_container.edgeType == numberTypeEnum::int64Type) { auto graph = detail::create_graph(handle, graph_container); @@ -519,9 +498,39 @@ void call_pagerank(raft::handle_t const& handle, static_cast(tolerance), max_iter, has_guess, - false); - } else { - CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + true); + } + } else { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::pagerank(handle, + graph->view(), + static_cast(nullptr), + reinterpret_cast(personalization_subset), + reinterpret_cast(personalization_values), + static_cast(personalization_subset_size), + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + true); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::pagerank(handle, + graph->view(), + static_cast(nullptr), + reinterpret_cast(personalization_subset), + reinterpret_cast(personalization_values), + static_cast(personalization_subset_size), + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + true); } } } @@ -638,6 +647,55 @@ void call_bfs(raft::handle_t const& handle, } } +// Wrapper for calling extract_egonet through a graph container +// FIXME : this should not be a legacy COO and it is not clear how to handle C++ api return type as +// is.graph_container Need to figure out how to return edge lists +template +std::unique_ptr call_egonet(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* source_vertex, + vertex_t n_subgraphs, + vertex_t radius) +{ + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + auto g = cugraph::experimental::extract_ego(handle, + graph->view(), + reinterpret_cast(source_vertex), + static_cast(n_subgraphs), + static_cast(radius)); + cy_multi_edgelists_t coo_contents{ + 0, // not used + std::get<0>(g).size(), + static_cast(n_subgraphs), + std::make_unique(std::get<0>(g).release()), + std::make_unique(std::get<1>(g).release()), + std::make_unique(std::get<2>(g).release()), + std::make_unique(std::get<3>(g).release())}; + return std::make_unique(std::move(coo_contents)); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + auto g = cugraph::experimental::extract_ego(handle, + graph->view(), + reinterpret_cast(source_vertex), + static_cast(n_subgraphs), + static_cast(radius)); + cy_multi_edgelists_t coo_contents{ + 0, // not used + std::get<0>(g).size(), + static_cast(n_subgraphs), + std::make_unique(std::get<0>(g).release()), + std::make_unique(std::get<1>(g).release()), + std::make_unique(std::get<2>(g).release()), + std::make_unique(std::get<3>(g).release())}; + return std::make_unique(std::move(coo_contents)); + } else { + CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + } +} + // Wrapper for calling SSSP through a graph container template void call_sssp(raft::handle_t const& handle, @@ -686,6 +744,101 @@ void call_sssp(raft::handle_t const& handle, } } +// wrapper for shuffling: +// +template +std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + vertex_t* + edgelist_major_vertices, // [IN / OUT]: groupby_gpuid_and_shuffle_values() sorts in-place + vertex_t* edgelist_minor_vertices, // [IN / OUT] + weight_t* edgelist_weights, // [IN / OUT] + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned) // = false +{ + auto& comm = handle.get_comms(); + + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + + auto zip_edge = thrust::make_zip_iterator( + thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights)); + + std::unique_ptr> ptr_ret = + std::make_unique>(handle); + + std::forward_as_tuple( + std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()), + std::ignore) = + cugraph::experimental::groupby_gpuid_and_shuffle_values( + comm, // handle.get_comms(), + zip_edge, + zip_edge + num_edgelist_edges, + [key_func = + cugraph::experimental::detail::compute_gpu_id_from_edge_t{ + is_hypergraph_partitioned, + comm.get_size(), + row_comm.get_size(), + col_comm.get_size()}] __device__(auto val) { + return key_func(thrust::get<0>(val), thrust::get<1>(val)); + }, + handle.get_stream()); + + return ptr_ret; // RVO-ed +} + +// Wrapper for calling renumber_edeglist() inplace: +// TODO: check if return type needs further handling... +// +template +std::unique_ptr> call_renumber( + raft::handle_t const& handle, + vertex_t* shuffled_edgelist_major_vertices /* [INOUT] */, + vertex_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + edge_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu) // bc. cython cannot take non-type template params +{ + // caveat: return values have different types on the 2 branches below: + // + std::unique_ptr> p_ret = + std::make_unique>(handle); + + if (multi_gpu) { + std::tie( + p_ret->get_dv(), p_ret->get_partition(), p_ret->get_num_vertices(), p_ret->get_num_edges()) = + cugraph::experimental::renumber_edgelist( + handle, + shuffled_edgelist_major_vertices, + shuffled_edgelist_minor_vertices, + num_edgelist_edges, + is_hypergraph_partitioned, + do_expensive_check); + } else { + auto ret_f = cugraph::experimental::renumber_edgelist( + handle, + shuffled_edgelist_major_vertices, + shuffled_edgelist_minor_vertices, + num_edgelist_edges, + do_expensive_check); + + auto tot_vertices = static_cast(ret_f.size()); + + p_ret->get_dv() = std::move(ret_f); + cugraph::experimental::partition_t part_sg( + std::vector{0, tot_vertices}, false, 1, 1, 0, 0); + + p_ret->get_partition() = std::move(part_sg); + + p_ret->get_num_vertices() = tot_vertices; + p_ret->get_num_edges() = num_edgelist_edges; + } + + return p_ret; // RVO-ed (copy ellision) +} + // Helper for setting up subcommunicators void init_subcomms(raft::handle_t& handle, size_t row_comm_size) { @@ -836,6 +989,33 @@ template void call_bfs(raft::handle_t const& handle, double* sp_counters, const int64_t start_vertex, bool directed); +template std::unique_ptr call_egonet( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* source_vertex, + int32_t n_subgraphs, + int32_t radius); + +template std::unique_ptr call_egonet( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* source_vertex, + int32_t n_subgraphs, + int32_t radius); + +template std::unique_ptr call_egonet( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* source_vertex, + int64_t n_subgraphs, + int64_t radius); + +template std::unique_ptr call_egonet( + raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* source_vertex, + int64_t n_subgraphs, + int64_t radius); template void call_sssp(raft::handle_t const& handle, graph_container_t const& graph_container, @@ -865,5 +1045,82 @@ template void call_sssp(raft::handle_t const& handle, int64_t* predecessors, const int64_t source_vertex); +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices, + int32_t* edgelist_minor_vertices, + float* edgelist_weights, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices, + int32_t* edgelist_minor_vertices, + float* edgelist_weights, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices, + int32_t* edgelist_minor_vertices, + double* edgelist_weights, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int32_t* edgelist_major_vertices, + int32_t* edgelist_minor_vertices, + double* edgelist_weights, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int64_t* edgelist_major_vertices, + int64_t* edgelist_minor_vertices, + float* edgelist_weights, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +template std::unique_ptr> call_shuffle( + raft::handle_t const& handle, + int64_t* edgelist_major_vertices, + int64_t* edgelist_minor_vertices, + double* edgelist_weights, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned); + +// TODO: add the remaining relevant EIDIr's: +// +template std::unique_ptr> call_renumber( + raft::handle_t const& handle, + int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, + int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + int32_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu); + +template std::unique_ptr> call_renumber( + raft::handle_t const& handle, + int32_t* shuffled_edgelist_major_vertices /* [INOUT] */, + int32_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu); + +template std::unique_ptr> call_renumber( + raft::handle_t const& handle, + int64_t* shuffled_edgelist_major_vertices /* [INOUT] */, + int64_t* shuffled_edgelist_minor_vertices /* [INOUT] */, + int64_t num_edgelist_edges, + bool is_hypergraph_partitioned, + bool do_expensive_check, + bool multi_gpu); + } // namespace cython } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 9b57ad4557c..5425c68e896 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -1,6 +1,6 @@ #============================================================================= # -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -19,7 +19,7 @@ ################################################################################################### # - compiler function ----------------------------------------------------------------------------- -function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) +function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC) add_executable(${CMAKE_TEST_NAME} ${CMAKE_TEST_SRC}) @@ -30,21 +30,36 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) "${CUCO_INCLUDE_DIR}" "${LIBCUDACXX_INCLUDE_DIR}" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" - "${GTEST_INCLUDE_DIR}" "${RMM_INCLUDE}" "${CUDF_INCLUDE}" "${CUDF_INCLUDE}/libcudf/libcudacxx" "${NCCL_INCLUDE_DIRS}" - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio" - "${CMAKE_SOURCE_DIR}/include" - "${CMAKE_SOURCE_DIR}/src" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio" + "${CMAKE_CURRENT_SOURCE_DIR}/../include" + "${CMAKE_CURRENT_SOURCE_DIR}/../src" "${CMAKE_CURRENT_SOURCE_DIR}" "${RAFT_DIR}/cpp/include" ) + target_link_directories(${CMAKE_TEST_NAME} + PRIVATE + # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported + # variable containing the link directories for nvcc. + "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}") + target_link_libraries(${CMAKE_TEST_NAME} PRIVATE - gtest gmock_main gmock cugraph ${CUDF_LIBRARY} ${CMAKE_EXTRA_LIBS} ${NCCL_LIBRARIES} cudart cuda cublas cusparse cusolver curand) + cugraph + GTest::GTest + GTest::Main + ${CUDF_LIBRARY} + ${NCCL_LIBRARIES} + cudart + cuda + cublas + cusparse + cusolver + curand) if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE @@ -96,8 +111,17 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) ### BUILD_RPATH "${TARGET_BUILD_RPATH}") ${OpenMP_CXX_LIB_NAMES}) endif(OpenMP_CXX_FOUND) + + # CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the + # compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent + # cmake warnings about policy CMP0104. With this setting, arch flags must be + # manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism + # used in cpp/CMakeLists.txt for setting arch options). + # Run "cmake --help-policy CMP0104" for policy details. + # NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to + # the findcudatoolkit features in cmake 3.17+ set_target_properties(${CMAKE_TEST_NAME} PROPERTIES - RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/gtests/") + CUDA_ARCHITECTURES OFF) add_test(NAME ${CMAKE_TEST_NAME} COMMAND ${CMAKE_TEST_NAME}) endfunction() @@ -116,206 +140,288 @@ endif(RAPIDS_DATASET_ROOT_DIR) ### test sources ################################################################################## ################################################################################################### +# FIXME: consider adding a "add_library(cugraph_testing SHARED ...) instead of +# adding the same test utility sources to each test target. There may need to be +# an additional cugraph_mg_testing lib due to the optional inclusion of MPI. + ################################################################################################### # - katz centrality tests ------------------------------------------------------------------------- set(KATZ_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/katz_centrality_test.cu") - ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}" "") + ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}") ################################################################################################### # - betweenness centrality tests ------------------------------------------------------------------ set(BETWEENNESS_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/betweenness_centrality_test.cu") - ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}" "") + ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}") set(EDGE_BETWEENNESS_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/centrality/edge_betweenness_centrality_test.cu") - ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}" "") - -################################################################################################### -# - pagerank tests -------------------------------------------------------------------------------- - -set(PAGERANK_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/pagerank_test.cpp") - -ConfigureTest(PAGERANK_TEST "${PAGERANK_TEST_SRC}" "") + ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}") ################################################################################################### # - SSSP tests ------------------------------------------------------------------------------------ set(SSSP_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") -ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}" "") +ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}") ################################################################################################### # - BFS tests ------------------------------------------------------------------------------------- set(BFS_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") -ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}" "") +ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}") ################################################################################################### # - LOUVAIN tests --------------------------------------------------------------------------------- set(LOUVAIN_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp") -ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}" "") +ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}") ################################################################################################### # - LEIDEN tests --------------------------------------------------------------------------------- set(LEIDEN_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/leiden_test.cpp") -ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}" "") +ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}") ################################################################################################### # - ECG tests --------------------------------------------------------------------------------- set(ECG_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cpp") -ConfigureTest(ECG_TEST "${ECG_TEST_SRC}" "") +ConfigureTest(ECG_TEST "${ECG_TEST_SRC}") ################################################################################################### # - Balanced cut clustering tests ----------------------------------------------------------------- set(BALANCED_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/balanced_edge_test.cpp") -ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}" "") +ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}") ################################################################################################### # - TRIANGLE tests -------------------------------------------------------------------------------- set(TRIANGLE_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/community/triangle_test.cu") -ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}" "") +ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}") +################################################################################################### +# - EGO tests -------------------------------------------------------------------------------- + +set(EGO_TEST_SRC + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/community/egonet_test.cu") + +ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "") ################################################################################################### # - RENUMBERING tests ----------------------------------------------------------------------------- set(RENUMBERING_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/renumber/renumber_test.cu") -ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}" "") +ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}") ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- set(FA2_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/layout/force_atlas2_test.cu") -ConfigureTest(FA2_TEST "${FA2_TEST_SRC}" "") +ConfigureTest(FA2_TEST "${FA2_TEST_SRC}") + +################################################################################################### +# - TSP tests -------------------------------------------------------------------------- + +set(TSP_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/traversal/tsp_test.cu") + + ConfigureTest(TSP_TEST "${TSP_TEST_SRC}" "") ################################################################################################### # - CONNECTED COMPONENTS tests ------------------------------------------------------------------- set(CONNECT_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/con_comp_test.cu") -ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}" "") +ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}") ################################################################################################### # - STRONGLY CONNECTED COMPONENTS tests ---------------------------------------------------------- set(SCC_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/components/scc_test.cu") -ConfigureTest(SCC_TEST "${SCC_TEST_SRC}" "") +ConfigureTest(SCC_TEST "${SCC_TEST_SRC}") ################################################################################################### #-Hungarian (Linear Assignment Problem) tests --------------------------------------------------------------------- set(HUNGARIAN_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/linear_assignment/hungarian_test.cu") -ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}" "") +ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}") ################################################################################################### # - MST tests ---------------------------------------------------------------------------- set(MST_TEST_SRC - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/tree/mst_test.cu") -ConfigureTest(MST_TEST "${MST_TEST_SRC}" "") +ConfigureTest(MST_TEST "${MST_TEST_SRC}") ################################################################################################### # - Experimental Graph tests ---------------------------------------------------------------------- set(EXPERIMENTAL_GRAPH_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/graph_test.cpp") -ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}") + +################################################################################################### +# - Experimental coarsening tests ----------------------------------------------------------------- + +set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp") + +ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}") + +################################################################################################### +# - Experimental induced subgraph tests ----------------------------------------------------------- + +set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp") + +ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}") ################################################################################################### # - Experimental BFS tests ------------------------------------------------------------------------ set(EXPERIMENTAL_BFS_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp") -ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}") ################################################################################################### # - Experimental SSSP tests ----------------------------------------------------------------------- set(EXPERIMENTAL_SSSP_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp") -ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}") ################################################################################################### # - Experimental PAGERANK tests ------------------------------------------------------------------- set(EXPERIMENTAL_PAGERANK_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp") -ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}") ################################################################################################### # - Experimental LOUVAIN tests ------------------------------------------------------------------- set(EXPERIMENTAL_LOUVAIN_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") -ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}") ################################################################################################### # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp") -ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}" "") +ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}") + + +################################################################################################### +# - MG tests -------------------------------------------------------------------------------------- +if(BUILD_CUGRAPH_MG_TESTS) + if(MPI_CXX_FOUND) + ########################################################################################### + # - MG PAGERANK tests --------------------------------------------------------------------- + + set(MG_PAGERANK_TEST_SRCS + "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/utilities/mg_test_utilities.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/pagerank_mg_test.cpp") + + ConfigureTest(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}") + target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX) + + else(MPI_CXX_FOUND) + message(FATAL_ERROR "OpenMPI NOT found, cannot build MG tests.") + endif(MPI_CXX_FOUND) +endif(BUILD_CUGRAPH_MG_TESTS) ################################################################################################### ### enable testing ################################################################################ diff --git a/cpp/tests/README.md b/cpp/tests/README.md new file mode 100644 index 00000000000..b5808822467 --- /dev/null +++ b/cpp/tests/README.md @@ -0,0 +1,31 @@ +# libcugraph C++ tests + +## Prerequisites +### Datasets +``` +/path/to/cuGraph> ./datasets/get_test_data.sh +/path/to/cuGraph> export RAPIDS_DATASET_ROOT_DIR=/path/to/cuGraph/datasets +``` +### System Requirements +* MPI (multi-GPU tests only) + ``` + conda install -c conda-forge openmpi + ``` + +## Building +``` +/path/to/cuGraph> ./build.sh libcugraph +``` +To build the multi-GPU tests: +``` +/path/to/cuGraph> ./build.sh libcugraph cpp-mgtests +``` + +## Running +``` + +``` +To run the multi-GPU tests (example using 2 GPUs): +``` +/path/to/cuGraph> mpirun -n 2 ./cpp/build/gtests/MG_PAGERANK_TEST +``` diff --git a/cpp/tests/community/ecg_test.cu b/cpp/tests/community/ecg_test.cpp similarity index 73% rename from cpp/tests/community/ecg_test.cu rename to cpp/tests/community/ecg_test.cpp index 85b80b1610b..a13ee2fe360 100644 --- a/cpp/tests/community/ecg_test.cu +++ b/cpp/tests/community/ecg_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -77,6 +77,10 @@ TEST(ecg, success) TEST(ecg, dolphin) { + raft::handle_t handle; + + auto stream = handle.get_stream(); + std::vector off_h = {0, 6, 14, 18, 21, 22, 26, 32, 37, 43, 50, 55, 56, 57, 65, 77, 84, 90, 99, 106, 110, 119, 125, 126, 129, 135, 138, 141, 146, 151, 160, 165, 166, 169, 179, 184, 185, 192, 203, @@ -103,38 +107,55 @@ TEST(ecg, dolphin) int num_verts = off_h.size() - 1; int num_edges = ind_h.size(); - thrust::host_vector cluster_id(num_verts, -1); + std::vector cluster_id(num_verts, -1); - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); + rmm::device_uvector offsets_v(num_verts + 1, stream); + rmm::device_uvector indices_v(num_edges, stream); + rmm::device_uvector weights_v(num_edges, stream); + rmm::device_uvector result_v(num_verts, stream); + + raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); + raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); + raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); cugraph::GraphCSRView graph_csr( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); - raft::handle_t handle; - cugraph::ecg(handle, graph_csr, .05, 16, result_v.data().get()); + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + if (handle.get_device_properties().major < 7) { + EXPECT_THROW( + (cugraph::ecg(handle, graph_csr, .05, 16, result_v.data())), + cugraph::logic_error); + } else { + cugraph::ecg(handle, graph_csr, .05, 16, result_v.data()); - cluster_id = result_v; - int max = *max_element(cluster_id.begin(), cluster_id.end()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); + raft::update_host(cluster_id.data(), result_v.data(), num_verts, stream); - ASSERT_EQ((min >= 0), 1); + CUDA_TRY(cudaDeviceSynchronize()); - std::set cluster_ids; - for (auto c : cluster_id) { cluster_ids.insert(c); } + int max = *max_element(cluster_id.begin(), cluster_id.end()); + int min = *min_element(cluster_id.begin(), cluster_id.end()); - ASSERT_EQ(cluster_ids.size(), size_t(max + 1)); + ASSERT_EQ((min >= 0), 1); - float modularity{0.0}; + std::set cluster_ids; + for (auto c : cluster_id) { cluster_ids.insert(c); } - cugraph::ext_raft::analyzeClustering_modularity( - graph_csr, max + 1, result_v.data().get(), &modularity); + ASSERT_EQ(cluster_ids.size(), size_t(max + 1)); + + float modularity{0.0}; + + cugraph::ext_raft::analyzeClustering_modularity( + graph_csr, max + 1, result_v.data(), &modularity); - float random_modularity{0.95 * 0.4962422251701355}; + float random_modularity{0.95 * 0.4962422251701355}; - ASSERT_GT(modularity, random_modularity); + ASSERT_GT(modularity, random_modularity); + } } CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu new file mode 100644 index 00000000000..ec031228998 --- /dev/null +++ b/cpp/tests/community/egonet_test.cu @@ -0,0 +1,173 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +typedef struct InducedEgo_Usecase_t { + std::string graph_file_full_path{}; + std::vector ego_sources{}; + int32_t radius; + bool test_weighted{false}; + + InducedEgo_Usecase_t(std::string const& graph_file_path, + std::vector const& ego_sources, + int32_t radius, + bool test_weighted) + : ego_sources(ego_sources), radius(radius), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} InducedEgo_Usecase; + +class Tests_InducedEgo : public ::testing::TestWithParam { + public: + Tests_InducedEgo() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(InducedEgo_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + auto graph_view = graph.view(); + + rmm::device_uvector d_ego_sources(configuration.ego_sources.size(), + handle.get_stream()); + + raft::update_device(d_ego_sources.data(), + configuration.ego_sources.data(), + configuration.ego_sources.size(), + handle.get_stream()); + + rmm::device_uvector d_ego_edgelist_src(0, handle.get_stream()); + rmm::device_uvector d_ego_edgelist_dst(0, handle.get_stream()); + rmm::device_uvector d_ego_edgelist_weights(0, handle.get_stream()); + rmm::device_uvector d_ego_edge_offsets(0, handle.get_stream()); + + std::tie(d_ego_edgelist_src, d_ego_edgelist_dst, d_ego_edgelist_weights, d_ego_edge_offsets) = + cugraph::experimental::extract_ego(handle, + graph_view, + d_ego_sources.data(), + static_cast(configuration.ego_sources.size()), + configuration.radius); + + std::vector h_cugraph_ego_edge_offsets(d_ego_edge_offsets.size()); + std::vector h_cugraph_ego_edgelist_src(d_ego_edgelist_src.size()); + std::vector h_cugraph_ego_edgelist_dst(d_ego_edgelist_dst.size()); + raft::update_host(h_cugraph_ego_edgelist_src.data(), + d_ego_edgelist_src.data(), + d_ego_edgelist_src.size(), + handle.get_stream()); + raft::update_host(h_cugraph_ego_edgelist_dst.data(), + d_ego_edgelist_dst.data(), + d_ego_edgelist_dst.size(), + handle.get_stream()); + raft::update_host(h_cugraph_ego_edge_offsets.data(), + d_ego_edge_offsets.data(), + d_ego_edge_offsets.size(), + handle.get_stream()); + ASSERT_TRUE(d_ego_edge_offsets.size() == (configuration.ego_sources.size() + 1)); + ASSERT_TRUE(d_ego_edgelist_src.size() == d_ego_edgelist_dst.size()); + if (configuration.test_weighted) + ASSERT_TRUE(d_ego_edgelist_src.size() == d_ego_edgelist_weights.size()); + ASSERT_TRUE(h_cugraph_ego_edge_offsets[configuration.ego_sources.size()] == + d_ego_edgelist_src.size()); + for (size_t i = 0; i < configuration.ego_sources.size(); i++) + ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] < h_cugraph_ego_edge_offsets[i + 1]); + auto n_vertices = graph_view.get_number_of_vertices(); + for (size_t i = 0; i < d_ego_edgelist_src.size(); i++) { + ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] >= 0); + ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] < n_vertices); + ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] >= 0); + ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] < n_vertices); + } + + /* + // For inspecting data + std::vector h_cugraph_ego_edgelist_weights(d_ego_edgelist_weights.size()); + if (configuration.test_weighted) { + raft::update_host(h_cugraph_ego_edgelist_weights.data(), + d_ego_edgelist_weights.data(), + d_ego_edgelist_weights.size(), + handle.get_stream()); + } + raft::print_host_vector("offsets", + &h_cugraph_ego_edge_offsets[0], + h_cugraph_ego_edge_offsets.size(), + std::cout); + raft::print_host_vector("src", + &h_cugraph_ego_edgelist_src[0], + h_cugraph_ego_edgelist_src.size(), + std::cout); + raft::print_host_vector("dst", + &h_cugraph_ego_edgelist_dst[0], + h_cugraph_ego_edgelist_dst.size(), + std::cout); + raft::print_host_vector("weights", + &h_cugraph_ego_edgelist_weights[0], + h_cugraph_ego_edgelist_weights.size(), + std::cout); + */ + } +}; + +TEST_P(Tests_InducedEgo, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_InducedEgo, + ::testing::Values( + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{0}, 1, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{0}, 2, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{1}, 3, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{10, 0, 5}, 2, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{9, 3, 10}, 2, false), + InducedEgo_Usecase("test/datasets/karate.mtx", std::vector{5, 12, 13}, 2, true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index 764ab8bf6cb..9083400f85c 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -19,6 +19,10 @@ TEST(leiden_karate, success) { + raft::handle_t handle; + + auto stream = handle.get_stream(); + std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156}; @@ -46,27 +50,38 @@ TEST(leiden_karate, success) std::vector cluster_id(num_verts, -1); - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); + rmm::device_uvector offsets_v(num_verts + 1, stream); + rmm::device_uvector indices_v(num_edges, stream); + rmm::device_uvector weights_v(num_edges, stream); + rmm::device_uvector result_v(num_verts, stream); + + raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); + raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); + raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); cugraph::GraphCSRView G( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; size_t num_level = 40; - raft::handle_t handle; - std::tie(num_level, modularity) = cugraph::leiden(handle, G, result_v.data().get()); + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + if (handle.get_device_properties().major < 7) { + EXPECT_THROW(cugraph::leiden(handle, G, result_v.data()), cugraph::logic_error); + } else { + std::tie(num_level, modularity) = cugraph::leiden(handle, G, result_v.data()); + + raft::update_host(cluster_id.data(), result_v.data(), num_verts, stream); - cudaMemcpy((void*)&(cluster_id[0]), - result_v.data().get(), - sizeof(int) * num_verts, - cudaMemcpyDeviceToHost); + CUDA_TRY(cudaDeviceSynchronize()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); + int min = *min_element(cluster_id.begin(), cluster_id.end()); - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.41116042 * 0.99); + ASSERT_GE(min, 0); + ASSERT_GE(modularity, 0.41116042 * 0.99); + } } diff --git a/cpp/tests/community/louvain_test.cu b/cpp/tests/community/louvain_test.cpp similarity index 62% rename from cpp/tests/community/louvain_test.cu rename to cpp/tests/community/louvain_test.cpp index 2bac0097212..d3024282be3 100644 --- a/cpp/tests/community/louvain_test.cu +++ b/cpp/tests/community/louvain_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. All rights reserved. * * NVIDIA CORPORATION and its licensors retain all intellectual property * and proprietary rights in and to this software, related documentation @@ -15,10 +15,14 @@ #include -#include +#include TEST(louvain, success) { + raft::handle_t handle; + + auto stream = handle.get_stream(); + std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, 67, 69, 74, 76, 78, 80, 82, 84, 87, 89, 91, 93, 98, 101, 104, 106, 110, 113, 117, 121, 127, 139, 156}; @@ -49,42 +53,54 @@ TEST(louvain, success) std::vector cluster_id(num_verts, -1); - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); + rmm::device_uvector offsets_v(num_verts + 1, stream); + rmm::device_uvector indices_v(num_edges, stream); + rmm::device_uvector weights_v(num_edges, stream); + rmm::device_uvector result_v(num_verts, stream); + + raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); + raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); + raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); cugraph::GraphCSRView G( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; size_t num_level = 40; - raft::handle_t handle; + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + if (handle.get_device_properties().major < 7) { + EXPECT_THROW(cugraph::louvain(handle, G, result_v.data()), cugraph::logic_error); + } else { + std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data()); - std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data().get()); + raft::update_host(cluster_id.data(), result_v.data(), num_verts, stream); - cudaMemcpy((void*)&(cluster_id[0]), - result_v.data().get(), - sizeof(int) * num_verts, - cudaMemcpyDeviceToHost); + CUDA_TRY(cudaDeviceSynchronize()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); + int min = *min_element(cluster_id.begin(), cluster_id.end()); - std::cout << "modularity = " << modularity << std::endl; + std::cout << "modularity = " << modularity << std::endl; - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.402777 * 0.95); - ASSERT_EQ(result_v, result_h); + ASSERT_GE(min, 0); + ASSERT_GE(modularity, 0.402777 * 0.95); + ASSERT_EQ(cluster_id, result_h); + } } TEST(louvain_renumbered, success) { + raft::handle_t handle; + + auto stream = handle.get_stream(); + std::vector off_h = {0, 16, 25, 30, 34, 38, 42, 44, 46, 48, 50, 52, 54, 56, 73, 85, 95, 101, 107, 112, 117, 121, 125, 129, - 132, 135, 138, 141, 144, 147, 149, 151, 153, 155, 156 - - }; + 132, 135, 138, 141, 144, 147, 149, 151, 153, 155, 156}; std::vector ind_h = { 1, 3, 7, 11, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 33, 0, 5, 11, 15, 16, 19, 21, 25, 30, 4, 13, 14, 22, 27, 0, 9, 20, 24, 2, 13, 15, 26, 1, 13, 14, 18, 13, 15, 0, 16, @@ -110,32 +126,42 @@ TEST(louvain_renumbered, success) std::vector cluster_id(num_verts, -1); - rmm::device_vector offsets_v(off_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector result_v(cluster_id); + rmm::device_uvector offsets_v(num_verts + 1, stream); + rmm::device_uvector indices_v(num_edges, stream); + rmm::device_uvector weights_v(num_edges, stream); + rmm::device_uvector result_v(num_verts, stream); + + raft::update_device(offsets_v.data(), off_h.data(), off_h.size(), stream); + raft::update_device(indices_v.data(), ind_h.data(), ind_h.size(), stream); + raft::update_device(weights_v.data(), w_h.data(), w_h.size(), stream); cugraph::GraphCSRView G( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + offsets_v.data(), indices_v.data(), weights_v.data(), num_verts, num_edges); float modularity{0.0}; size_t num_level = 40; - raft::handle_t handle; + // "FIXME": remove this check once we drop support for Pascal + // + // Calling louvain on Pascal will throw an exception, we'll check that + // this is the behavior while we still support Pascal (device_prop.major < 7) + // + if (handle.get_device_properties().major < 7) { + EXPECT_THROW(cugraph::louvain(handle, G, result_v.data()), cugraph::logic_error); + } else { + std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data()); - std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data().get()); + raft::update_host(cluster_id.data(), result_v.data(), num_verts, stream); - cudaMemcpy((void*)&(cluster_id[0]), - result_v.data().get(), - sizeof(int) * num_verts, - cudaMemcpyDeviceToHost); + CUDA_TRY(cudaDeviceSynchronize()); - int min = *min_element(cluster_id.begin(), cluster_id.end()); + int min = *min_element(cluster_id.begin(), cluster_id.end()); - std::cout << "modularity = " << modularity << std::endl; + std::cout << "modularity = " << modularity << std::endl; - ASSERT_GE(min, 0); - ASSERT_GE(modularity, 0.402777 * 0.95); + ASSERT_GE(min, 0); + ASSERT_GE(modularity, 0.402777 * 0.95); + } } CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp new file mode 100644 index 00000000000..b790dfffa69 --- /dev/null +++ b/cpp/tests/experimental/coarsen_graph_test.cpp @@ -0,0 +1,396 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +template +std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, + vertex_t v) +{ + return (v >= 0) && (v < num_vertices); +} + +template +std::enable_if_t::value, bool> is_valid_vertex(vertex_t num_vertices, + vertex_t v) +{ + return v < num_vertices; +} + +template +void check_coarsened_graph_results(edge_t* org_offsets, + vertex_t* org_indices, + weight_t* org_weights, + vertex_t* org_labels, + edge_t* coarse_offsets, + vertex_t* coarse_indices, + weight_t* coarse_weights, + vertex_t* coarse_vertex_labels, + vertex_t num_org_vertices, + vertex_t num_coarse_vertices) +{ + ASSERT_TRUE(((org_weights == nullptr) && (coarse_weights == nullptr)) || + ((org_weights != nullptr) && (coarse_weights != nullptr))); + ASSERT_TRUE(std::is_sorted(org_offsets, org_offsets + num_org_vertices)); + ASSERT_TRUE(std::count_if(org_indices, + org_indices + org_offsets[num_org_vertices], + [num_org_vertices](auto nbr) { + return !is_valid_vertex(num_org_vertices, nbr); + }) == 0); + ASSERT_TRUE(std::is_sorted(coarse_offsets, coarse_offsets + num_coarse_vertices)); + ASSERT_TRUE(std::count_if(coarse_indices, + coarse_indices + coarse_offsets[num_coarse_vertices], + [num_coarse_vertices](auto nbr) { + return !is_valid_vertex(num_coarse_vertices, nbr); + }) == 0); + ASSERT_TRUE(num_coarse_vertices <= num_org_vertices); + + std::vector org_unique_labels(num_org_vertices); + std::iota(org_unique_labels.begin(), org_unique_labels.end(), vertex_t{0}); + std::transform(org_unique_labels.begin(), + org_unique_labels.end(), + org_unique_labels.begin(), + [org_labels](auto v) { return org_labels[v]; }); + std::sort(org_unique_labels.begin(), org_unique_labels.end()); + org_unique_labels.resize(std::distance( + org_unique_labels.begin(), std::unique(org_unique_labels.begin(), org_unique_labels.end()))); + + ASSERT_TRUE(org_unique_labels.size() == static_cast(num_coarse_vertices)); + + { + std::vector tmp_coarse_vertex_labels(coarse_vertex_labels, + coarse_vertex_labels + num_coarse_vertices); + std::sort(tmp_coarse_vertex_labels.begin(), tmp_coarse_vertex_labels.end()); + ASSERT_TRUE(std::unique(tmp_coarse_vertex_labels.begin(), tmp_coarse_vertex_labels.end()) == + tmp_coarse_vertex_labels.end()); + ASSERT_TRUE(std::equal( + org_unique_labels.begin(), org_unique_labels.end(), tmp_coarse_vertex_labels.begin())); + } + + std::vector> label_org_vertex_pairs(num_org_vertices); + for (vertex_t i = 0; i < num_org_vertices; ++i) { + label_org_vertex_pairs[i] = std::make_tuple(org_labels[i], i); + } + std::sort(label_org_vertex_pairs.begin(), label_org_vertex_pairs.end()); + + std::map label_to_coarse_vertex_map{}; + for (vertex_t i = 0; i < num_coarse_vertices; ++i) { + label_to_coarse_vertex_map[coarse_vertex_labels[i]] = i; + } + + auto threshold_ratio = (org_weights == nullptr) ? weight_t{1.0} /* irrelevant */ : weight_t{1e-4}; + auto threshold_magnitude = + (org_weights == nullptr) + ? weight_t{1.0} /* irrelevant */ + : (std::accumulate( + coarse_weights, coarse_weights + coarse_offsets[num_coarse_vertices], weight_t{0.0}) / + static_cast(coarse_offsets[num_coarse_vertices])) * + threshold_ratio; + + for (size_t i = 0; i < org_unique_labels.size(); ++i) { // for each vertex in the coarse graph + auto lb = std::lower_bound( + label_org_vertex_pairs.begin(), + label_org_vertex_pairs.end(), + std::make_tuple(org_unique_labels[i], + cugraph::experimental::invalid_vertex_id::value /* dummy */), + [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); + auto ub = std::upper_bound( + label_org_vertex_pairs.begin(), + label_org_vertex_pairs.end(), + std::make_tuple(org_unique_labels[i], + cugraph::experimental::invalid_vertex_id::value /* dummy */), + [](auto lhs, auto rhs) { return std::get<0>(lhs) < std::get<0>(rhs); }); + auto count = std::distance(lb, ub); + auto offset = std::distance(label_org_vertex_pairs.begin(), lb); + if (org_weights == nullptr) { + std::vector coarse_nbrs0{}; + std::for_each( + lb, + ub, + [org_offsets, org_indices, org_labels, &label_to_coarse_vertex_map, &coarse_nbrs0](auto t) { + auto org_vertex = std::get<1>(t); + std::vector tmp_nbrs(org_offsets[org_vertex + 1] - org_offsets[org_vertex]); + std::transform(org_indices + org_offsets[org_vertex], + org_indices + org_offsets[org_vertex + 1], + tmp_nbrs.begin(), + [org_labels, &label_to_coarse_vertex_map](auto nbr) { + return label_to_coarse_vertex_map[org_labels[nbr]]; + }); + coarse_nbrs0.insert(coarse_nbrs0.end(), tmp_nbrs.begin(), tmp_nbrs.end()); + }); + std::sort(coarse_nbrs0.begin(), coarse_nbrs0.end()); + coarse_nbrs0.resize( + std::distance(coarse_nbrs0.begin(), std::unique(coarse_nbrs0.begin(), coarse_nbrs0.end()))); + + auto coarse_vertex = label_to_coarse_vertex_map[org_unique_labels[i]]; + auto coarse_offset = coarse_offsets[coarse_vertex]; + auto coarse_count = coarse_offsets[coarse_vertex + 1] - coarse_offset; + std::vector coarse_nbrs1(coarse_indices + coarse_offset, + coarse_indices + coarse_offset + coarse_count); + std::sort(coarse_nbrs1.begin(), coarse_nbrs1.end()); + + ASSERT_TRUE(coarse_nbrs0.size() == coarse_nbrs1.size()); + ASSERT_TRUE(std::equal(coarse_nbrs0.begin(), coarse_nbrs0.end(), coarse_nbrs1.begin())); + } else { + std::vector> coarse_nbr_weight_pairs0{}; + std::for_each(lb, + ub, + [org_offsets, + org_indices, + org_weights, + org_labels, + &label_to_coarse_vertex_map, + &coarse_nbr_weight_pairs0](auto t) { + auto org_vertex = std::get<1>(t); + std::vector> tmp_pairs( + org_offsets[org_vertex + 1] - org_offsets[org_vertex]); + for (auto j = org_offsets[org_vertex]; j < org_offsets[org_vertex + 1]; ++j) { + tmp_pairs[j - org_offsets[org_vertex]] = std::make_tuple( + label_to_coarse_vertex_map[org_labels[org_indices[j]]], org_weights[j]); + } + coarse_nbr_weight_pairs0.insert( + coarse_nbr_weight_pairs0.end(), tmp_pairs.begin(), tmp_pairs.end()); + }); + std::sort(coarse_nbr_weight_pairs0.begin(), coarse_nbr_weight_pairs0.end()); + // reduce by key + { + size_t run_start_idx = 0; + for (size_t j = 1; j < coarse_nbr_weight_pairs0.size(); ++j) { + auto& start = coarse_nbr_weight_pairs0[run_start_idx]; + auto& cur = coarse_nbr_weight_pairs0[j]; + if (std::get<0>(start) == std::get<0>(cur)) { + std::get<1>(start) += std::get<1>(cur); + std::get<0>(cur) = cugraph::experimental::invalid_vertex_id::value; + } else { + run_start_idx = j; + } + } + coarse_nbr_weight_pairs0.erase( + std::remove_if(coarse_nbr_weight_pairs0.begin(), + coarse_nbr_weight_pairs0.end(), + [](auto t) { + return std::get<0>(t) == + cugraph::experimental::invalid_vertex_id::value; + }), + coarse_nbr_weight_pairs0.end()); + } + + auto coarse_vertex = label_to_coarse_vertex_map[org_unique_labels[i]]; + std::vector> coarse_nbr_weight_pairs1( + coarse_offsets[coarse_vertex + 1] - coarse_offsets[coarse_vertex]); + for (auto j = coarse_offsets[coarse_vertex]; j < coarse_offsets[coarse_vertex + 1]; ++j) { + coarse_nbr_weight_pairs1[j - coarse_offsets[coarse_vertex]] = + std::make_tuple(coarse_indices[j], coarse_weights[j]); + } + std::sort(coarse_nbr_weight_pairs1.begin(), coarse_nbr_weight_pairs1.end()); + + ASSERT_TRUE(coarse_nbr_weight_pairs0.size() == coarse_nbr_weight_pairs1.size()); + ASSERT_TRUE(std::equal( + coarse_nbr_weight_pairs0.begin(), + coarse_nbr_weight_pairs0.end(), + coarse_nbr_weight_pairs1.begin(), + [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::get<0>(lhs) == std::get<0>(rhs) + ? (std::abs(std::get<1>(lhs) - std::get<1>(rhs)) <= + std::max(std::max(std::abs(std::get<1>(lhs)), std::abs(std::get<1>(rhs))) * + threshold_ratio, + threshold_magnitude)) + : false; + })); + } + } + + return; +} + +typedef struct CoarsenGraph_Usecase_t { + std::string graph_file_full_path{}; + double coarsen_ratio{0.0}; + bool test_weighted{false}; + + CoarsenGraph_Usecase_t(std::string const& graph_file_path, + double coarsen_ratio, + bool test_weighted) + : coarsen_ratio(coarsen_ratio), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} CoarsenGraph_Usecase; + +class Tests_CoarsenGraph : public ::testing::TestWithParam { + public: + Tests_CoarsenGraph() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(CoarsenGraph_Usecase const& configuration) + { + raft::handle_t handle{}; + + // FIXME: remove this once we drop Pascal support + if (handle.get_device_properties().major < 7) { // Pascal is not supported, skip testing + return; + } + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + auto graph_view = graph.view(); + + if (graph_view.get_number_of_vertices() == 0) { return; } + + std::vector h_labels(graph_view.get_number_of_vertices()); + auto num_labels = + std::max(static_cast(h_labels.size() * configuration.coarsen_ratio), vertex_t{1}); + + std::default_random_engine generator{}; + std::uniform_int_distribution distribution{0, num_labels - 1}; + + std::for_each(h_labels.begin(), h_labels.end(), [&distribution, &generator](auto& label) { + label = distribution(generator); + }); + + rmm::device_uvector d_labels(h_labels.size(), handle.get_stream()); + raft::update_device(d_labels.data(), h_labels.data(), h_labels.size(), handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::unique_ptr< + cugraph::experimental::graph_t> + coarse_graph{}; + rmm::device_uvector coarse_vertices_to_labels(0, handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::tie(coarse_graph, coarse_vertices_to_labels) = + cugraph::experimental::coarsen_graph(handle, graph_view, d_labels.begin()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_org_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_org_indices(graph_view.get_number_of_edges()); + std::vector h_org_weights{}; + raft::update_host(h_org_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_org_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + if (graph_view.is_weighted()) { + h_org_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_org_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + } + + auto coarse_graph_view = coarse_graph->view(); + + std::vector h_coarse_offsets(coarse_graph_view.get_number_of_vertices() + 1); + std::vector h_coarse_indices(coarse_graph_view.get_number_of_edges()); + std::vector h_coarse_weights{}; + raft::update_host(h_coarse_offsets.data(), + coarse_graph_view.offsets(), + coarse_graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_coarse_indices.data(), + coarse_graph_view.indices(), + coarse_graph_view.get_number_of_edges(), + handle.get_stream()); + if (graph_view.is_weighted()) { + h_coarse_weights.resize(coarse_graph_view.get_number_of_edges()); + raft::update_host(h_coarse_weights.data(), + coarse_graph_view.weights(), + coarse_graph_view.get_number_of_edges(), + handle.get_stream()); + } + + std::vector h_coarse_vertices_to_labels(coarse_vertices_to_labels.size()); + raft::update_host(h_coarse_vertices_to_labels.data(), + coarse_vertices_to_labels.data(), + coarse_vertices_to_labels.size(), + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + check_coarsened_graph_results(h_org_offsets.data(), + h_org_indices.data(), + h_org_weights.data(), + h_labels.data(), + h_coarse_offsets.data(), + h_coarse_indices.data(), + h_coarse_weights.data(), + h_coarse_vertices_to_labels.data(), + graph_view.get_number_of_vertices(), + coarse_graph_view.get_number_of_vertices()); + } +}; + +// FIXME: add tests for type combinations + +TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatTransposed) +{ + run_current_test(GetParam()); +} + +TEST_P(Tests_CoarsenGraph, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_CoarsenGraph, + ::testing::Values(CoarsenGraph_Usecase("test/datasets/karate.mtx", 0.2, false), + CoarsenGraph_Usecase("test/datasets/karate.mtx", 0.2, true), + CoarsenGraph_Usecase("test/datasets/web-Google.mtx", 0.1, false), + CoarsenGraph_Usecase("test/datasets/web-Google.mtx", 0.1, true), + CoarsenGraph_Usecase("test/datasets/ljournal-2008.mtx", 0.1, false), + CoarsenGraph_Usecase("test/datasets/ljournal-2008.mtx", 0.1, true), + CoarsenGraph_Usecase("test/datasets/webbase-1M.mtx", 0.1, false), + CoarsenGraph_Usecase("test/datasets/webbase-1M.mtx", 0.1, true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp new file mode 100644 index 00000000000..72894a9349f --- /dev/null +++ b/cpp/tests/experimental/induced_subgraph_test.cpp @@ -0,0 +1,322 @@ +/* + * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +template +std::tuple, std::vector, std::vector, std::vector> +extract_induced_subgraph_reference(edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights, + size_t const* subgraph_offsets, + vertex_t const* subgraph_vertices, + vertex_t num_vertices, + size_t num_subgraphs) +{ + std::vector edgelist_majors{}; + std::vector edgelist_minors{}; + std::vector edgelist_weights{}; + std::vector subgraph_edge_offsets{0}; + + for (size_t i = 0; i < num_subgraphs; ++i) { + std::for_each(subgraph_vertices + subgraph_offsets[i], + subgraph_vertices + subgraph_offsets[i + 1], + [offsets, + indices, + weights, + subgraph_vertices, + subgraph_offsets, + &edgelist_majors, + &edgelist_minors, + &edgelist_weights, + i](auto v) { + auto first = offsets[v]; + auto last = offsets[v + 1]; + for (auto j = first; j < last; ++j) { + if (std::binary_search(subgraph_vertices + subgraph_offsets[i], + subgraph_vertices + subgraph_offsets[i + 1], + indices[j])) { + edgelist_majors.push_back(v); + edgelist_minors.push_back(indices[j]); + if (weights != nullptr) { edgelist_weights.push_back(weights[j]); } + } + } + }); + subgraph_edge_offsets.push_back(edgelist_majors.size()); + } + + return std::make_tuple(edgelist_majors, edgelist_minors, edgelist_weights, subgraph_edge_offsets); +} + +typedef struct InducedSubgraph_Usecase_t { + std::string graph_file_full_path{}; + std::vector subgraph_sizes{}; + bool test_weighted{false}; + + InducedSubgraph_Usecase_t(std::string const& graph_file_path, + std::vector const& subgraph_sizes, + bool test_weighted) + : subgraph_sizes(subgraph_sizes), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} InducedSubgraph_Usecase; + +class Tests_InducedSubgraph : public ::testing::TestWithParam { + public: + Tests_InducedSubgraph() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(InducedSubgraph_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = cugraph::test:: + read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + std::vector h_weights{}; + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + if (graph_view.is_weighted()) { + h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + } + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::vector h_subgraph_offsets(configuration.subgraph_sizes.size() + 1, 0); + std::partial_sum(configuration.subgraph_sizes.begin(), + configuration.subgraph_sizes.end(), + h_subgraph_offsets.begin() + 1); + std::vector h_subgraph_vertices( + h_subgraph_offsets.back(), cugraph::experimental::invalid_vertex_id::value); + std::default_random_engine generator{}; + std::uniform_int_distribution distribution{0, + graph_view.get_number_of_vertices() - 1}; + + for (size_t i = 0; i < configuration.subgraph_sizes.size(); ++i) { + auto start = h_subgraph_offsets[i]; + auto last = h_subgraph_offsets[i + 1]; + ASSERT_TRUE(last - start <= graph_view.get_number_of_vertices()) << "Invalid subgraph size."; + // this is inefficient if last - start << graph_view.get_number_of_vertices() but this is for + // the test puspose only and the time & memory cost is only linear to + // graph_view.get_number_of_vertices(), so this may not matter. + std::vector vertices(graph_view.get_number_of_vertices()); + std::iota(vertices.begin(), vertices.end(), vertex_t{0}); + std::random_shuffle(vertices.begin(), vertices.end()); + std::copy( + vertices.begin(), vertices.begin() + (last - start), h_subgraph_vertices.begin() + start); + std::sort(h_subgraph_vertices.begin() + start, h_subgraph_vertices.begin() + last); + } + + rmm::device_uvector d_subgraph_offsets(h_subgraph_offsets.size(), handle.get_stream()); + rmm::device_uvector d_subgraph_vertices(h_subgraph_vertices.size(), + handle.get_stream()); + raft::update_device(d_subgraph_offsets.data(), + h_subgraph_offsets.data(), + h_subgraph_offsets.size(), + handle.get_stream()); + raft::update_device(d_subgraph_vertices.data(), + h_subgraph_vertices.data(), + h_subgraph_vertices.size(), + handle.get_stream()); + + std::vector h_reference_subgraph_edgelist_majors{}; + std::vector h_reference_subgraph_edgelist_minors{}; + std::vector h_reference_subgraph_edgelist_weights{}; + std::vector h_reference_subgraph_edge_offsets{}; + std::tie(h_reference_subgraph_edgelist_majors, + h_reference_subgraph_edgelist_minors, + h_reference_subgraph_edgelist_weights, + h_reference_subgraph_edge_offsets) = + extract_induced_subgraph_reference( + h_offsets.data(), + h_indices.data(), + h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + h_subgraph_offsets.data(), + h_subgraph_vertices.data(), + graph_view.get_number_of_vertices(), + configuration.subgraph_sizes.size()); + + rmm::device_uvector d_subgraph_edgelist_majors(0, handle.get_stream()); + rmm::device_uvector d_subgraph_edgelist_minors(0, handle.get_stream()); + rmm::device_uvector d_subgraph_edgelist_weights(0, handle.get_stream()); + rmm::device_uvector d_subgraph_edge_offsets(0, handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + // FIXME: turn-off do_expensive_check once verified. + std::tie(d_subgraph_edgelist_majors, + d_subgraph_edgelist_minors, + d_subgraph_edgelist_weights, + d_subgraph_edge_offsets) = + cugraph::experimental::extract_induced_subgraphs(handle, + graph_view, + d_subgraph_offsets.data(), + d_subgraph_vertices.data(), + configuration.subgraph_sizes.size(), + true); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_subgraph_edgelist_majors(d_subgraph_edgelist_majors.size()); + std::vector h_cugraph_subgraph_edgelist_minors(d_subgraph_edgelist_minors.size()); + std::vector h_cugraph_subgraph_edgelist_weights(d_subgraph_edgelist_weights.size()); + std::vector h_cugraph_subgraph_edge_offsets(d_subgraph_edge_offsets.size()); + + raft::update_host(h_cugraph_subgraph_edgelist_majors.data(), + d_subgraph_edgelist_majors.data(), + d_subgraph_edgelist_majors.size(), + handle.get_stream()); + raft::update_host(h_cugraph_subgraph_edgelist_minors.data(), + d_subgraph_edgelist_minors.data(), + d_subgraph_edgelist_minors.size(), + handle.get_stream()); + if (configuration.test_weighted) { + raft::update_host(h_cugraph_subgraph_edgelist_weights.data(), + d_subgraph_edgelist_weights.data(), + d_subgraph_edgelist_weights.size(), + handle.get_stream()); + } + raft::update_host(h_cugraph_subgraph_edge_offsets.data(), + d_subgraph_edge_offsets.data(), + d_subgraph_edge_offsets.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(h_reference_subgraph_edge_offsets.size() == h_cugraph_subgraph_edge_offsets.size()) + << "Returned subgraph edge offset vector has an invalid size."; + ASSERT_TRUE(std::equal(h_reference_subgraph_edge_offsets.begin(), + h_reference_subgraph_edge_offsets.end(), + h_cugraph_subgraph_edge_offsets.begin())) + << "Returned subgraph edge offset values do not match with the reference values."; + + for (size_t i = 0; i < configuration.subgraph_sizes.size(); ++i) { + auto start = h_reference_subgraph_edge_offsets[i]; + auto last = h_reference_subgraph_edge_offsets[i + 1]; + if (configuration.test_weighted) { + std::vector> reference_tuples(last - start); + std::vector> cugraph_tuples(last - start); + for (auto j = start; j < last; ++j) { + reference_tuples[j - start] = std::make_tuple(h_reference_subgraph_edgelist_majors[j], + h_reference_subgraph_edgelist_minors[j], + h_reference_subgraph_edgelist_weights[j]); + cugraph_tuples[j - start] = std::make_tuple(h_cugraph_subgraph_edgelist_majors[j], + h_cugraph_subgraph_edgelist_minors[j], + h_cugraph_subgraph_edgelist_weights[j]); + } + ASSERT_TRUE( + std::equal(reference_tuples.begin(), reference_tuples.end(), cugraph_tuples.begin())) + << "Extracted subgraph edges do not match with the edges extracted by the reference " + "implementation."; + } else { + std::vector> reference_tuples(last - start); + std::vector> cugraph_tuples(last - start); + for (auto j = start; j < last; ++j) { + reference_tuples[j - start] = std::make_tuple(h_reference_subgraph_edgelist_majors[j], + h_reference_subgraph_edgelist_minors[j]); + cugraph_tuples[j - start] = std::make_tuple(h_cugraph_subgraph_edgelist_majors[j], + h_cugraph_subgraph_edgelist_minors[j]); + } + ASSERT_TRUE( + std::equal(reference_tuples.begin(), reference_tuples.end(), cugraph_tuples.begin())) + << "Extracted subgraph edges do not match with the edges extracted by the reference " + "implementation."; + } + } + } +}; + +// FIXME: add tests for type combinations + +TEST_P(Tests_InducedSubgraph, CheckInt32Int32FloatTransposed) +{ + run_current_test(GetParam()); +} + +TEST_P(Tests_InducedSubgraph, CheckInt32Int32FloatUntransposed) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_InducedSubgraph, + ::testing::Values( + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{0}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{1}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{10}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{34}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{10, 0, 5}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{9, 3, 10}, false), + InducedSubgraph_Usecase("test/datasets/karate.mtx", std::vector{5, 12, 13}, true), + InducedSubgraph_Usecase("test/datasets/web-Google.mtx", + std::vector{250, 130, 15}, + false), + InducedSubgraph_Usecase("test/datasets/web-Google.mtx", + std::vector{125, 300, 70}, + true), + InducedSubgraph_Usecase("test/datasets/ljournal-2008.mtx", + std::vector{300, 20, 400}, + false), + InducedSubgraph_Usecase("test/datasets/ljournal-2008.mtx", + std::vector{9130, 1200, 300}, + true), + InducedSubgraph_Usecase("test/datasets/webbase-1M.mtx", std::vector{700}, false), + InducedSubgraph_Usecase("test/datasets/webbase-1M.mtx", std::vector{500}, true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp index cdbe3688248..3e9f0b478a0 100644 --- a/cpp/tests/experimental/katz_centrality_test.cpp +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -198,8 +198,8 @@ class Tests_KatzCentrality : public ::testing::TestWithParam(graph_view.get_number_of_vertices())) * threshold_ratio; // skip comparison for low Katz Centrality verties (lowly ranked vertices) auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - auto diff = std::abs(lhs - rhs); - return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); }; ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu index 4a47b1a1aca..35a26923df6 100644 --- a/cpp/tests/experimental/louvain_test.cu +++ b/cpp/tests/experimental/louvain_test.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp index 70c83ef8192..53143bf0bf3 100644 --- a/cpp/tests/experimental/pagerank_test.cpp +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -274,8 +274,8 @@ class Tests_PageRank : public ::testing::TestWithParam { (1.0 / static_cast(graph_view.get_number_of_vertices())) * threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { - auto diff = std::abs(lhs - rhs); - return (diff < std::max(lhs, rhs) * threshold_ratio) || (diff < threshold_magnitude); + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); }; ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), diff --git a/cpp/tests/pagerank/pagerank_mg_test.cpp b/cpp/tests/pagerank/pagerank_mg_test.cpp new file mode 100644 index 00000000000..7f789226bf1 --- /dev/null +++ b/cpp/tests/pagerank/pagerank_mg_test.cpp @@ -0,0 +1,229 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include + +#include +#include + +#include + +#include + +//////////////////////////////////////////////////////////////////////////////// +// Test param object. This defines the input and expected output for a test, and +// will be instantiated as the parameter to the tests defined below using +// INSTANTIATE_TEST_CASE_P() +// +typedef struct Pagerank_Testparams_t { + std::string graph_file_full_path{}; + double personalization_ratio{0.0}; + bool test_weighted{false}; + + Pagerank_Testparams_t(std::string const& graph_file_path, + double personalization_ratio, + bool test_weighted) + : personalization_ratio(personalization_ratio), test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} Pagerank_Testparams_t; + +//////////////////////////////////////////////////////////////////////////////// +// Parameterized test fixture, to be used with TEST_P(). This defines common +// setup and teardown steps as well as common utilities used by each E2E MG +// test. In this case, each test is identical except for the inputs and +// expected outputs, so the entire test is defined in the run_test() method. +// +class Pagerank_E2E_MG_Testfixture_t : public cugraph::test::MG_TestFixture_t, + public ::testing::WithParamInterface { + public: + Pagerank_E2E_MG_Testfixture_t() {} + + // Run once for each test instance + virtual void SetUp() {} + virtual void TearDown() {} + + // Return the results of running pagerank on a single GPU for the dataset in + // graph_file_path. + template + std::vector get_sg_results(raft::handle_t& handle, + const std::string& graph_file_path, + const result_t alpha, + const result_t epsilon) + { + auto graph = + cugraph::test::read_graph_from_matrix_market_file( + handle, graph_file_path, true); // FIXME: should use param.test_weighted instead of true + + auto graph_view = graph.view(); + cudaStream_t stream = handle.get_stream(); + rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), stream); + + cugraph::experimental::pagerank( + handle, + graph_view, + static_cast(nullptr), // adj_matrix_row_out_weight_sums + static_cast(nullptr), // personalization_vertices + static_cast(nullptr), // personalization_values + static_cast(0), // personalization_vector_size + d_pageranks.begin(), // pageranks + alpha, // alpha (damping factor) + epsilon, // error tolerance for convergence + std::numeric_limits::max(), // max_iterations + false, // has_initial_guess + true); // do_expensive_check + + std::vector h_pageranks(graph_view.get_number_of_vertices()); + raft::update_host(h_pageranks.data(), d_pageranks.data(), d_pageranks.size(), stream); + + return h_pageranks; + } + + // Compare the results of running pagerank on multiple GPUs to that of a + // single-GPU run for the configuration in param. + template + void run_test(const Pagerank_Testparams_t& param) + { + result_t constexpr alpha{0.85}; + result_t constexpr epsilon{1e-6}; + + raft::handle_t handle; + raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD); + const auto& comm = handle.get_comms(); + + cudaStream_t stream = handle.get_stream(); + + // Assuming 2 GPUs which means 1 row, 2 cols. 2 cols = row_comm_size of 2. + // FIXME: DO NOT ASSUME 2 GPUs, add code to compute prows, pcols + size_t row_comm_size{2}; + cugraph::partition_2d::subcomm_factory_t + subcomm_factory(handle, row_comm_size); + + int my_rank = comm.get_rank(); + + // FIXME: graph must be weighted! + std::unique_ptr> // store_transposed=true, + // multi_gpu=true + mg_graph_ptr{}; + rmm::device_uvector d_renumber_map_labels(0, handle.get_stream()); + + std::tie(mg_graph_ptr, d_renumber_map_labels) = cugraph::test:: + create_graph_for_gpu // store_transposed=true + (handle, param.graph_file_full_path); + + auto mg_graph_view = mg_graph_ptr->view(); + + rmm::device_uvector d_mg_pageranks(mg_graph_view.get_number_of_vertices(), stream); + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::pagerank( + handle, + mg_graph_view, + static_cast(nullptr), // adj_matrix_row_out_weight_sums + static_cast(nullptr), // personalization_vertices + static_cast(nullptr), // personalization_values + static_cast(0), // personalization_vector_size + d_mg_pageranks.begin(), // pageranks + alpha, // alpha (damping factor) + epsilon, // error tolerance for convergence + std::numeric_limits::max(), // max_iterations + false, // has_initial_guess + true); // do_expensive_check + + std::vector h_mg_pageranks(mg_graph_view.get_number_of_vertices()); + + raft::update_host(h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), stream); + + std::vector h_renumber_map_labels(mg_graph_view.get_number_of_vertices()); + raft::update_host(h_renumber_map_labels.data(), + d_renumber_map_labels.data(), + d_renumber_map_labels.size(), + stream); + + // Compare MG to SG + // Each GPU will have pagerank values for their range, so ech GPU must + // compare to specific SG results for their respective range. + + auto h_sg_pageranks = get_sg_results( + handle, param.graph_file_full_path, alpha, epsilon); + + // For this test, each GPU will have the full set of vertices and + // therefore the pageranks vectors should be equal in size. + ASSERT_EQ(h_sg_pageranks.size(), h_mg_pageranks.size()); + + auto threshold_ratio = 1e-3; + auto threshold_magnitude = + (1.0 / static_cast(mg_graph_view.get_number_of_vertices())) * + threshold_ratio; // skip comparison for low PageRank verties (lowly ranked vertices) + auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { + return std::abs(lhs - rhs) < + std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); + }; + + vertex_t mapped_vertex{0}; + for (vertex_t i = 0; + i + mg_graph_view.get_local_vertex_first() < mg_graph_view.get_local_vertex_last(); + ++i) { + mapped_vertex = h_renumber_map_labels[i]; + ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex])) + << "MG PageRank value for vertex: " << i << " in rank: " << my_rank + << " has value: " << h_mg_pageranks[i] + << " which exceeds the error margin for comparing to SG value: " << h_sg_pageranks[i]; + } + } +}; + +//////////////////////////////////////////////////////////////////////////////// +TEST_P(Pagerank_E2E_MG_Testfixture_t, CheckInt32Int32FloatFloat) +{ + run_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + e2e, + Pagerank_E2E_MG_Testfixture_t, + + // FIXME: the personalization_ratio and use_weighted boo are not used + // (personilization vectors are not used, and all datasets are assumed + // weighted). update this to use personilization vectors and non-weighted + // graphs. + ::testing::Values(Pagerank_Testparams_t("test/datasets/karate.mtx", 0.0, true), + // FIXME: The commented datasets contain isolate vertices + // which result in a different number of vertices in the + // renumbered MG graph (because the renumbering function + // does not include them) vs. the SG graph object used for + // the pagerank comparison because the SG graph reads the + // COO as-is without renumbering. Update the utility that + // reads a .mtx and constructs a SG graph object to also + // renumber and return the renumber vertices vector. This + // will result in a comparison of an equal number of + // pagerank values. + // + // Pagerank_Testparams_t("test/datasets/web-Google.mtx", 0.0, true), + // Pagerank_Testparams_t("test/datasets/ljournal-2008.mtx", 0.0, true), + Pagerank_Testparams_t("test/datasets/webbase-1M.mtx", 0.0, true))); + +// FIXME: Enable proper RMM configuration by using CUGRAPH_TEST_PROGRAM_MAIN(). +// Currently seeing a RMM failure during init, need to investigate. +// CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/pagerank/pagerank_test.cpp b/cpp/tests/pagerank/pagerank_test.cpp deleted file mode 100644 index 48705f7f324..00000000000 --- a/cpp/tests/pagerank/pagerank_test.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2018-2020, NVIDIA CORPORATION. All rights reserved. - * - * NVIDIA CORPORATION and its licensors retain all intellectual property - * and proprietary rights in and to this software, related documentation - * and any modifications thereto. Any use, reproduction, disclosure or - * distribution of this software and related documentation without an express - * license agreement from NVIDIA CORPORATION is strictly prohibited. - * - */ - -// Pagerank solver tests -// Author: Alex Fender afender@nvidia.com - -#include -#include -#include - -#include -#include - -#include -#include - -#include - -#include - -#include - -// do the perf measurements -// enabled by command line parameter s'--perf' -static int PERF = 0; - -// iterations for perf tests -// enabled by command line parameter '--perf-iters" -static int PERF_MULTIPLIER = 5; - -typedef struct Pagerank_Usecase_t { - std::string matrix_file; - std::string result_file; - Pagerank_Usecase_t(const std::string& a, const std::string& b) - { - // assume relative paths are relative to RAPIDS_DATASET_ROOT_DIR - const std::string& rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); - if ((a != "") && (a[0] != '/')) { - matrix_file = rapidsDatasetRootDir + "/" + a; - } else { - matrix_file = a; - } - if ((b != "") && (b[0] != '/')) { - result_file = rapidsDatasetRootDir + "/" + b; - } else { - result_file = b; - } - } - Pagerank_Usecase_t& operator=(const Pagerank_Usecase_t& rhs) - { - matrix_file = rhs.matrix_file; - result_file = rhs.result_file; - return *this; - } -} Pagerank_Usecase; - -class Tests_Pagerank : public ::testing::TestWithParam { - public: - Tests_Pagerank() {} - static void SetupTestCase() {} - static void TearDownTestCase() - { - if (PERF) { - for (unsigned int i = 0; i < pagerank_time.size(); ++i) { - std::cout << pagerank_time[i] / PERF_MULTIPLIER << std::endl; - } - } - } - virtual void SetUp() {} - virtual void TearDown() {} - - static std::vector pagerank_time; - - template - void run_current_test(const Pagerank_Usecase& param) - { - const ::testing::TestInfo* const test_info = - ::testing::UnitTest::GetInstance()->current_test_info(); - std::stringstream ss; - std::string test_id = std::string(test_info->test_case_name()) + std::string(".") + - std::string(test_info->name()) + std::string("_") + - cugraph::test::getFileName(param.matrix_file) + std::string("_") + - ss.str().c_str(); - - int m, k, nnz; - MM_typecode mc; - - float tol = 1E-5f; - - // Default parameters - /* - float alpha = 0.85; - int max_iter = 500; - bool has_guess = false; - */ - - HighResClock hr_clock; - double time_tmp; - - FILE* fpin = fopen(param.matrix_file.c_str(), "r"); - ASSERT_NE(fpin, nullptr) << "fopen (" << param.matrix_file << ") failure."; - - ASSERT_EQ(cugraph::test::mm_properties(fpin, 1, &mc, &m, &k, &nnz), 0) - << "could not read Matrix Market file properties" - << "\n"; - ASSERT_TRUE(mm_is_matrix(mc)); - ASSERT_TRUE(mm_is_coordinate(mc)); - ASSERT_FALSE(mm_is_complex(mc)); - ASSERT_FALSE(mm_is_skew(mc)); - - // Allocate memory on host - std::vector cooRowInd(nnz), cooColInd(nnz); - std::vector cooVal(nnz), pagerank(m); - - // device alloc - rmm::device_uvector pagerank_vector(static_cast(m), nullptr); - T* d_pagerank = pagerank_vector.data(); - - // Read - ASSERT_EQ((cugraph::test::mm_to_coo( - fpin, 1, nnz, &cooRowInd[0], &cooColInd[0], &cooVal[0], NULL)), - 0) - << "could not read matrix data" - << "\n"; - ASSERT_EQ(fclose(fpin), 0); - - // Pagerank runs on CSC, so feed COOtoCSR the row/col backwards. - raft::handle_t handle; - cugraph::GraphCOOView G_coo(&cooColInd[0], &cooRowInd[0], &cooVal[0], m, nnz); - auto G_unique = cugraph::coo_to_csr(G_coo); - cugraph::GraphCSCView G(G_unique->view().offsets, - G_unique->view().indices, - G_unique->view().edge_data, - G_unique->view().number_of_vertices, - G_unique->view().number_of_edges); - - cudaDeviceSynchronize(); - if (PERF) { - hr_clock.start(); - for (int i = 0; i < PERF_MULTIPLIER; ++i) { - cugraph::pagerank(handle, G, d_pagerank); - cudaDeviceSynchronize(); - } - hr_clock.stop(&time_tmp); - pagerank_time.push_back(time_tmp); - } else { - cudaProfilerStart(); - cugraph::pagerank(handle, G, d_pagerank); - cudaProfilerStop(); - cudaDeviceSynchronize(); - } - - // Check vs golden data - if (param.result_file.length() > 0) { - std::vector calculated_res(m); - - CUDA_TRY(cudaMemcpy(&calculated_res[0], d_pagerank, sizeof(T) * m, cudaMemcpyDeviceToHost)); - std::sort(calculated_res.begin(), calculated_res.end()); - fpin = fopen(param.result_file.c_str(), "rb"); - ASSERT_TRUE(fpin != NULL) << " Cannot read file with reference data: " << param.result_file - << std::endl; - std::vector expected_res(m); - ASSERT_EQ(cugraph::test::read_binary_vector(fpin, m, expected_res), 0); - fclose(fpin); - T err; - int n_err = 0; - for (int i = 0; i < m; i++) { - err = fabs(expected_res[i] - calculated_res[i]); - if (err > tol * 1.1) { - n_err++; // count the number of mismatches - } - } - if (n_err) { - EXPECT_LE(n_err, 0.001 * m); // we tolerate 0.1% of values with a litte difference - } - } - } -}; - -std::vector Tests_Pagerank::pagerank_time; - -TEST_P(Tests_Pagerank, CheckFP32_T) { run_current_test(GetParam()); } - -TEST_P(Tests_Pagerank, CheckFP64_T) { run_current_test(GetParam()); } - -// --gtest_filter=*simple_test* -INSTANTIATE_TEST_CASE_P( - simple_test, - Tests_Pagerank, - ::testing::Values(Pagerank_Usecase("test/datasets/karate.mtx", ""), - Pagerank_Usecase("test/datasets/web-Google.mtx", - "test/ref/pagerank/web-Google.pagerank_val_0.85.bin"), - Pagerank_Usecase("test/datasets/ljournal-2008.mtx", - "test/ref/pagerank/ljournal-2008.pagerank_val_0.85.bin"), - Pagerank_Usecase("test/datasets/webbase-1M.mtx", - "test/ref/pagerank/webbase-1M.pagerank_val_0.85.bin"))); - -CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/traversal/tsp_test.cu b/cpp/tests/traversal/tsp_test.cu new file mode 100644 index 00000000000..383427a56cf --- /dev/null +++ b/cpp/tests/traversal/tsp_test.cu @@ -0,0 +1,245 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + * + */ + +// TSP solver tests +// Author: Hugo Linsenmaier hlinsenmaier@nvidia.com + +/* + * Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved. + * + * NVIDIA CORPORATION and its licensors retain all intellectual property + * and proprietary rights in and to this software, related documentation + * and any modifications thereto. Any use, reproduction, disclosure or + * distribution of this software and related documentation without an express + * license agreement from NVIDIA CORPORATION is strictly prohibited. + * + */ + +// TSP solver tests +// Author: Hugo Linsenmaier hlinsenmaier@nvidia.com + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include + +#include +#include +#include + +typedef struct Tsp_Usecase_t { + std::string tsp_file; + float ref_cost; + Tsp_Usecase_t(const std::string& a, const float c) + { + // assume relative paths are relative to RAPIDS_DATASET_ROOT_DIR + const std::string& rapidsDatasetRootDir = cugraph::test::get_rapids_dataset_root_dir(); + if ((a != "") && (a[0] != '/')) { + tsp_file = rapidsDatasetRootDir + "/" + a; + } else { + tsp_file = a; + } + ref_cost = c; + } + Tsp_Usecase_t& operator=(const Tsp_Usecase_t& rhs) + { + tsp_file = rhs.tsp_file; + ref_cost = rhs.ref_cost; + return *this; + } +} Tsp_Usecase; + +static std::vector euc_2d{ + {"tsplib/datasets/a280.tsp", 2579}, {"tsplib/datasets/berlin52.tsp", 7542}, + {"tsplib/datasets/bier127.tsp", 118282}, {"tsplib/datasets/ch130.tsp", 6110}, + {"tsplib/datasets/ch150.tsp", 6528}, {"tsplib/datasets/d1291.tsp", 50801}, + {"tsplib/datasets/d1655.tsp", 62128}, {"tsplib/datasets/d198.tsp", 15780}, + {"tsplib/datasets/d2103.tsp", 80450}, {"tsplib/datasets/d493.tsp", 35002}, + {"tsplib/datasets/d657.tsp", 48912}, {"tsplib/datasets/eil101.tsp", 629}, + {"tsplib/datasets/eil51.tsp", 426}, {"tsplib/datasets/eil76.tsp", 538}, + {"tsplib/datasets/fl1400.tsp", 20127}, {"tsplib/datasets/fl1577.tsp", 22249}, + {"tsplib/datasets/fl417.tsp", 11861}, {"tsplib/datasets/gil262.tsp", 2378}, + {"tsplib/datasets/kroA100.tsp", 21282}, {"tsplib/datasets/kroA150.tsp", 26524}, + {"tsplib/datasets/kroA200.tsp", 29368}, {"tsplib/datasets/kroB100.tsp", 22141}, + {"tsplib/datasets/kroB150.tsp", 26130}, {"tsplib/datasets/kroB200.tsp", 29437}, + {"tsplib/datasets/kroC100.tsp", 20749}, {"tsplib/datasets/kroD100.tsp", 21294}, + {"tsplib/datasets/kroE100.tsp", 22068}, {"tsplib/datasets/lin105.tsp", 14379}, + {"tsplib/datasets/lin318.tsp", 42029}, {"tsplib/datasets/nrw1379.tsp", 56638}, + {"tsplib/datasets/p654.tsp", 34643}, {"tsplib/datasets/pcb1173.tsp", 56892}, + {"tsplib/datasets/pcb442.tsp", 50778}, {"tsplib/datasets/pr1002.tsp", 259045}, + {"tsplib/datasets/pr107.tsp", 44303}, {"tsplib/datasets/pr136.tsp", 96772}, + {"tsplib/datasets/pr144.tsp", 58537}, {"tsplib/datasets/pr152.tsp", 73682}, + {"tsplib/datasets/pr226.tsp", 80369}, {"tsplib/datasets/pr264.tsp", 49135}, + {"tsplib/datasets/pr299.tsp", 48191}, {"tsplib/datasets/pr439.tsp", 107217}, + {"tsplib/datasets/pr76.tsp", 108159}, {"tsplib/datasets/rat195.tsp", 2323}, + {"tsplib/datasets/rat575.tsp", 6773}, {"tsplib/datasets/rat783.tsp", 8806}, + {"tsplib/datasets/rat99.tsp", 1211}, {"tsplib/datasets/rd100.tsp", 7910}, + {"tsplib/datasets/rd400.tsp", 15281}, {"tsplib/datasets/rl1323.tsp", 270199}, + {"tsplib/datasets/st70.tsp", 675}, {"tsplib/datasets/ts225.tsp", 126643}, + {"tsplib/datasets/tsp225.tsp", 3916}, {"tsplib/datasets/u1060.tsp", 224094}, + {"tsplib/datasets/u1432.tsp", 152970}, {"tsplib/datasets/u159.tsp", 42080}, + {"tsplib/datasets/u574.tsp", 36905}, {"tsplib/datasets/u724.tsp", 41910}, + {"tsplib/datasets/vm1084.tsp", 239297}, +}; + +struct Route { + std::vector cities; + std::vector x_pos; + std::vector y_pos; +}; + +class Tests_Tsp : public ::testing::TestWithParam { + public: + Tests_Tsp() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + virtual void SetUp() {} + virtual void TearDown() {} + + void run_current_test(const Tsp_Usecase& param) + { + const ::testing::TestInfo* const test_info = + ::testing::UnitTest::GetInstance()->current_test_info(); + std::stringstream ss; + std::string test_id = std::string(test_info->test_case_name()) + std::string(".") + + std::string(test_info->name()) + std::string("_") + + cugraph::test::getFileName(param.tsp_file) + std::string("_") + + ss.str().c_str(); + + float tol = 1E-1f; + HighResClock hr_clock; + double time_tmp; + Route input; + + std::cout << "File: " << param.tsp_file.c_str() << "\n"; + int nodes = load_tsp(param.tsp_file.c_str(), &input); + + // Device alloc + raft::handle_t handle; + rmm::device_uvector vertices(static_cast(nodes), nullptr); + rmm::device_uvector route(static_cast(nodes), nullptr); + rmm::device_uvector x_pos(static_cast(nodes), nullptr); + rmm::device_uvector y_pos(static_cast(nodes), nullptr); + + int* vtx_ptr = vertices.data(); + int* d_route = route.data(); + float* d_x_pos = x_pos.data(); + float* d_y_pos = y_pos.data(); + + CUDA_TRY(cudaMemcpy(vtx_ptr, input.cities.data(), sizeof(int) * nodes, cudaMemcpyHostToDevice)); + CUDA_TRY( + cudaMemcpy(d_x_pos, input.x_pos.data(), sizeof(float) * nodes, cudaMemcpyHostToDevice)); + CUDA_TRY( + cudaMemcpy(d_y_pos, input.y_pos.data(), sizeof(float) * nodes, cudaMemcpyHostToDevice)); + + // Default parameters + int restarts = 4096; + bool beam_search = true; + int k = 4; + int nstart = 0; + bool verbose = false; + + hr_clock.start(); + cudaDeviceSynchronize(); + cudaProfilerStart(); + + float final_cost = cugraph::traveling_salesperson( + handle, vtx_ptr, d_x_pos, d_y_pos, nodes, restarts, beam_search, k, nstart, verbose, d_route); + cudaProfilerStop(); + cudaDeviceSynchronize(); + hr_clock.stop(&time_tmp); + + std::vector h_route; + h_route.resize(nodes); + std::vector h_vertices; + h_vertices.resize(nodes); + CUDA_TRY(cudaMemcpy(h_route.data(), d_route, sizeof(int) * nodes, cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + CUDA_TRY(cudaMemcpy(h_vertices.data(), vtx_ptr, sizeof(int) * nodes, cudaMemcpyDeviceToHost)); + cudaDeviceSynchronize(); + + std::cout << "tsp_time: " << time_tmp << " us" << std::endl; + std::cout << "Ref cost is: " << param.ref_cost << "\n"; + std::cout << "Final cost is: " << final_cost << "\n"; + float err = fabs(final_cost - param.ref_cost); + err /= param.ref_cost; + std::cout << "Approximation error is: " << err * 100 << "%\n"; + EXPECT_LE(err, tol); + + // Check route goes through each vertex once + size_t u_nodes = nodes; + std::set node_set(h_route.begin(), h_route.end()); + ASSERT_EQ(node_set.size(), u_nodes); + + // Bound check + int max = *std::max_element(h_vertices.begin(), h_vertices.end()); + int min = *std::min_element(h_vertices.begin(), h_vertices.end()); + EXPECT_GE(*node_set.begin(), min); + EXPECT_LE(*node_set.rbegin(), max); + } + + private: + std::vector split(const std::string& s, char delimiter) + { + std::vector tokens; + std::string token; + std::istringstream tokenStream(s); + while (std::getline(tokenStream, token, delimiter)) { + if (token.size() == 0) continue; + tokens.push_back(token); + } + return tokens; + } + + // FIXME: At the moment TSP does not accept a graph_t as input and therefore + // deviates from the standard testing I/O pattern. Once other input types + // are supported we want to reconcile TSP testing with the rest of cugraph. + int load_tsp(const char* fname, Route* input) + { + std::fstream fs; + fs.open(fname); + std::string line; + std::vector tokens; + int nodes = 0; + while (std::getline(fs, line) && line.find(':') != std::string::npos) { + tokens = split(line, ':'); + auto strip_token = split(tokens[0], ' ')[0]; + if (strip_token == "DIMENSION") nodes = std::stof(tokens[1]); + } + + while (std::getline(fs, line) && line.find(' ') != std::string::npos) { + tokens = split(line, ' '); + auto city_id = std::stof(tokens[0]); + auto x = std::stof(tokens[1]); + auto y = std::stof(tokens[2]); + input->cities.push_back(city_id); + input->x_pos.push_back(x); + input->y_pos.push_back(y); + } + fs.close(); + assert(nodes == input->cities.size()); + return nodes; + } +}; + +TEST_P(Tests_Tsp, CheckFP32_T) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P(simple_test, Tests_Tsp, ::testing::ValuesIn(euc_2d)); +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp index 535b4b9c79e..3525db73425 100644 --- a/cpp/tests/utilities/base_fixture.hpp +++ b/cpp/tests/utilities/base_fixture.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, NVIDIA CORPORATION. + * Copyright (c) 2020-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -32,6 +32,12 @@ namespace cugraph { namespace test { +// FIXME: The BaseFixture class is not used in any tests. This file is only +// needed for the CUGRAPH_TEST_PROGRAM_MAIN macro and the code that it calls, so +// consider removing the BaseFixture class and renaming this file, or moving +// CUGRAPH_TEST_PROGRAM_MAIN to the test_utilities.hpp file and removing this +// file completely. + /** * @brief Base test fixture class from which all libcudf tests should inherit. * diff --git a/cpp/tests/utilities/mg_test_utilities.cu b/cpp/tests/utilities/mg_test_utilities.cu new file mode 100644 index 00000000000..26f2450b589 --- /dev/null +++ b/cpp/tests/utilities/mg_test_utilities.cu @@ -0,0 +1,180 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace test { + +// Given a raft handle and a path to a dataset (must be a .mtx file), returns a +// tuple containing: +// * graph_t instance for the partition accesible from the raft handle +// * vector of indices representing the original unrenumberd vertices +// +// This function creates a graph_t instance appropriate for MG graph +// applications from the edgelist graph data file passed in by filtering out the +// vertices not to be assigned to the GPU in this rank, then renumbering the +// vertices appropriately. The returned vector of vertices contains the original +// vertex IDs, ordered by the new sequential renumbered IDs (this is needed for +// unrenumbering). +template +std::tuple< + std::unique_ptr>, // multi_gpu=true + rmm::device_uvector> +create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path) +{ + const auto& comm = handle.get_comms(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + + int my_rank = comm.get_rank(); + + auto edgelist_from_mm = + ::cugraph::test::read_edgelist_from_matrix_market_file( + graph_file_path); + + edge_t total_number_edges = static_cast(edgelist_from_mm.h_rows.size()); + + ////////// + // Copy COO to device + rmm::device_uvector d_edgelist_rows(total_number_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(total_number_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(total_number_edges, handle.get_stream()); + + raft::update_device(d_edgelist_rows.data(), + edgelist_from_mm.h_rows.data(), + total_number_edges, + handle.get_stream()); + raft::update_device(d_edgelist_cols.data(), + edgelist_from_mm.h_cols.data(), + total_number_edges, + handle.get_stream()); + raft::update_device(d_edgelist_weights.data(), + edgelist_from_mm.h_weights.data(), + total_number_edges, + handle.get_stream()); + + ////////// + // Filter out edges that are not to be associated with this rank + // + // Create a edge_gpu_identifier, which will be used by the individual jobs to + // identify if a edge belongs to a particular rank + cugraph::experimental::detail::compute_gpu_id_from_edge_t edge_gpu_identifier{ + false, comm.get_size(), row_comm.get_size(), col_comm.get_size()}; + + auto edgelist_zip_it_begin = thrust::make_zip_iterator(thrust::make_tuple( + d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin())); + bool is_transposed{store_transposed}; + + // Do the removal - note: remove_if does not delete items, it moves "removed" + // items to the back of the vector and returns the iterator (new_end) that + // represents the items kept. Actual removal of items can be done by + // resizing (see below). + auto new_end = thrust::remove_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + edgelist_zip_it_begin, + edgelist_zip_it_begin + total_number_edges, + [my_rank, is_transposed, edge_gpu_identifier] __device__(auto tup) { + if (is_transposed) { + return (edge_gpu_identifier(thrust::get<1>(tup), thrust::get<0>(tup)) != my_rank); + } else { + return (edge_gpu_identifier(thrust::get<0>(tup), thrust::get<1>(tup)) != my_rank); + } + }); + + edge_t local_number_edges = thrust::distance(edgelist_zip_it_begin, new_end); + // Free the memory used for the items remove_if "removed". This not only + // frees memory, but keeps the actual vector sizes consistent with the data + // being used from this point forward. + d_edgelist_rows.resize(local_number_edges, handle.get_stream()); + d_edgelist_rows.shrink_to_fit(handle.get_stream()); + d_edgelist_cols.resize(local_number_edges, handle.get_stream()); + d_edgelist_cols.shrink_to_fit(handle.get_stream()); + d_edgelist_weights.resize(local_number_edges, handle.get_stream()); + d_edgelist_weights.shrink_to_fit(handle.get_stream()); + + ////////// + // renumber filtered edgelist_from_mm + vertex_t* major_vertices{nullptr}; + vertex_t* minor_vertices{nullptr}; + if (is_transposed) { + major_vertices = d_edgelist_cols.data(); + minor_vertices = d_edgelist_rows.data(); + } else { + major_vertices = d_edgelist_rows.data(); + minor_vertices = d_edgelist_cols.data(); + } + + rmm::device_uvector renumber_map_labels(0, handle.get_stream()); + cugraph::experimental::partition_t partition( + std::vector(comm.get_size() + 1, 0), + false, // is_hypergraph_partitioned() + row_comm.get_size(), + col_comm.get_size(), + row_comm.get_rank(), + col_comm.get_rank()); + vertex_t number_of_vertices{}; + edge_t number_of_edges{}; + std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) = + ::cugraph::experimental::renumber_edgelist // multi_gpu=true + (handle, + major_vertices, // edgelist_major_vertices, INOUT of vertex_t* + minor_vertices, // edgelist_minor_vertices, INOUT of vertex_t* + local_number_edges, + false, // is_hypergraph_partitioned + true); // do_expensive_check + + cugraph::experimental::edgelist_t edgelist{ + d_edgelist_rows.data(), d_edgelist_cols.data(), d_edgelist_weights.data(), local_number_edges}; + + std::vector> edgelist_vect; + edgelist_vect.push_back(edgelist); + cugraph::experimental::graph_properties_t properties; + properties.is_symmetric = edgelist_from_mm.is_symmetric; + properties.is_multigraph = false; + + // Finally, create instance of graph_t using filtered & renumbered edgelist + return std::make_tuple( + std::make_unique< + cugraph::experimental::graph_t>( + handle, + edgelist_vect, + partition, + number_of_vertices, + total_number_edges, + properties, + false, // sorted_by_global_degree_within_vertex_partition + true), // do_expensive_check + std::move(renumber_map_labels)); +} + +// explicit instantiation +template std::tuple< + std::unique_ptr< + cugraph::experimental::graph_t>, // store_transposed=true + // multi_gpu=true + rmm::device_uvector> +create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/mg_test_utilities.hpp b/cpp/tests/utilities/mg_test_utilities.hpp new file mode 100644 index 00000000000..c23f6c43a6d --- /dev/null +++ b/cpp/tests/utilities/mg_test_utilities.hpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include + +namespace cugraph { +namespace test { + +// Given a raft handle and a path to a dataset (must be a .mtx file), returns a +// tuple containing: +// * graph_t instance for the partition accesible from the raft handle +// * 4-tuple containing renumber info resulting from renumbering the +// edgelist for the partition +template +std::tuple< + std::unique_ptr>, // multi_gpu=true + rmm::device_uvector> +create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path); + +/** + * @brief Base test fixture class, responsible for handling common operations + * needed by all MG tests. + * + * It's expected this class will be built out and refactored often as new MG C++ + * tests are added and new patterns evolve. + * + * Example: + * ``` + * class MyTestFixture : public cugraph::test::MG_TestFixture_t {}; + * ``` + **/ + +// FIXME: consider moving this to a separate file? (eg. mg_test_fixture.cpp)? + +class MG_TestFixture_t : public ::testing::Test { + public: + static void SetUpTestCase() + { + MPI_TRY(MPI_Init(NULL, NULL)); + + int rank, size; + MPI_TRY(MPI_Comm_rank(MPI_COMM_WORLD, &rank)); + MPI_TRY(MPI_Comm_size(MPI_COMM_WORLD, &size)); + + int nGpus; + CUDA_CHECK(cudaGetDeviceCount(&nGpus)); + + ASSERT( + nGpus >= size, "Number of GPUs are lesser than MPI ranks! ngpus=%d, nranks=%d", nGpus, size); + + CUDA_CHECK(cudaSetDevice(rank)); + } + + static void TearDownTestCase() { MPI_TRY(MPI_Finalize()); } +}; + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.cpp b/cpp/tests/utilities/test_utilities.cpp new file mode 100644 index 00000000000..abb416a632d --- /dev/null +++ b/cpp/tests/utilities/test_utilities.cpp @@ -0,0 +1,442 @@ +/* + * Copyright (c) 2021, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include + +#include +#include +#include + +#include + +extern "C" { +#include "mmio.h" +} + +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace test { + +std::string getFileName(const std::string& s) +{ + char sep = '/'; +#ifdef _WIN32 + sep = '\\'; +#endif + size_t i = s.rfind(sep, s.length()); + if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } + return (""); +} + +/// Read matrix properties from Matrix Market file +/** Matrix Market file is assumed to be a sparse matrix in coordinate + * format. + * + * @param f File stream for Matrix Market file. + * @param tg Boolean indicating whether to convert matrix to general + * format (from symmetric, Hermitian, or skew symmetric format). + * @param t (Output) MM_typecode with matrix properties. + * @param m (Output) Number of matrix rows. + * @param n (Output) Number of matrix columns. + * @param nnz (Output) Number of non-zero matrix entries. + * @return Zero if properties were read successfully. Otherwise + * non-zero. + */ +template +int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz) +{ + // Read matrix properties from file + int mint, nint, nnzint; + if (fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "Error: could not set position in file\n"); + return -1; + } + if (mm_read_banner(f, t)) { + fprintf(stderr, "Error: could not read Matrix Market file banner\n"); + return -1; + } + if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) { + fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); + return -1; + } + if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) { + fprintf(stderr, "Error: could not read matrix dimensions\n"); + return -1; + } + if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) { + fprintf(stderr, "Error: matrix entries are not valid type\n"); + return -1; + } + *m = mint; + *n = nint; + *nnz = nnzint; + + // Find total number of non-zero entries + if (tg && !mm_is_general(*t)) { + // Non-diagonal entries should be counted twice + *nnz *= 2; + + // Diagonal entries should not be double-counted + int st; + for (int i = 0; i < nnzint; ++i) { + // Read matrix entry + // MTX only supports int for row and col idx + int row, col; + double rval, ival; + if (mm_is_pattern(*t)) + st = fscanf(f, "%d %d\n", &row, &col); + else if (mm_is_real(*t) || mm_is_integer(*t)) + st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); + else // Complex matrix + st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); + if (ferror(f) || (st == EOF)) { + fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); + return -1; + } + + // Check if entry is diagonal + if (row == col) --(*nnz); + } + } + + return 0; +} + +/// Read Matrix Market file and convert to COO format matrix +/** Matrix Market file is assumed to be a sparse matrix in coordinate + * format. + * + * @param f File stream for Matrix Market file. + * @param tg Boolean indicating whether to convert matrix to general + * format (from symmetric, Hermitian, or skew symmetric format). + * @param nnz Number of non-zero matrix entries. + * @param cooRowInd (Output) Row indices for COO matrix. Should have + * at least nnz entries. + * @param cooColInd (Output) Column indices for COO matrix. Should + * have at least nnz entries. + * @param cooRVal (Output) Real component of COO matrix + * entries. Should have at least nnz entries. Ignored if null + * pointer. + * @param cooIVal (Output) Imaginary component of COO matrix + * entries. Should have at least nnz entries. Ignored if null + * pointer. + * @return Zero if matrix was read successfully. Otherwise non-zero. + */ +template +int mm_to_coo(FILE* f, + int tg, + IndexType_ nnz, + IndexType_* cooRowInd, + IndexType_* cooColInd, + ValueType_* cooRVal, + ValueType_* cooIVal) +{ + // Read matrix properties from file + MM_typecode t; + int m, n, nnzOld; + if (fseek(f, 0, SEEK_SET)) { + fprintf(stderr, "Error: could not set position in file\n"); + return -1; + } + if (mm_read_banner(f, &t)) { + fprintf(stderr, "Error: could not read Matrix Market file banner\n"); + return -1; + } + if (!mm_is_matrix(t) || !mm_is_coordinate(t)) { + fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); + return -1; + } + if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) { + fprintf(stderr, "Error: could not read matrix dimensions\n"); + return -1; + } + if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) { + fprintf(stderr, "Error: matrix entries are not valid type\n"); + return -1; + } + + // Add each matrix entry in file to COO format matrix + int i; // Entry index in Matrix Market file; can only be int in the MTX format + int j = 0; // Entry index in COO format matrix; can only be int in the MTX format + for (i = 0; i < nnzOld; ++i) { + // Read entry from file + int row, col; + double rval, ival; + int st; + if (mm_is_pattern(t)) { + st = fscanf(f, "%d %d\n", &row, &col); + rval = 1.0; + ival = 0.0; + } else if (mm_is_real(t) || mm_is_integer(t)) { + st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); + ival = 0.0; + } else // Complex matrix + st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); + if (ferror(f) || (st == EOF)) { + fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); + return -1; + } + + // Switch to 0-based indexing + --row; + --col; + + // Record entry + cooRowInd[j] = row; + cooColInd[j] = col; + if (cooRVal != NULL) cooRVal[j] = rval; + if (cooIVal != NULL) cooIVal[j] = ival; + ++j; + + // Add symmetric complement of non-diagonal entries + if (tg && !mm_is_general(t) && (row != col)) { + // Modify entry value if matrix is skew symmetric or Hermitian + if (mm_is_skew(t)) { + rval = -rval; + ival = -ival; + } else if (mm_is_hermitian(t)) { + ival = -ival; + } + + // Record entry + cooRowInd[j] = col; + cooColInd[j] = row; + if (cooRVal != NULL) cooRVal[j] = rval; + if (cooIVal != NULL) cooIVal[j] = ival; + ++j; + } + } + return 0; +} + +int read_binary_vector(FILE* fpin, int n, std::vector& val) +{ + size_t is_read1; + + double* t_storage = new double[n]; + is_read1 = fread(t_storage, sizeof(double), n, fpin); + for (int i = 0; i < n; i++) { + if (t_storage[i] == DBL_MAX) + val[i] = FLT_MAX; + else if (t_storage[i] == -DBL_MAX) + val[i] = -FLT_MAX; + else + val[i] = static_cast(t_storage[i]); + } + delete[] t_storage; + + if (is_read1 != (size_t)n) { + printf("%s", "I/O fail\n"); + return 1; + } + return 0; +} + +int read_binary_vector(FILE* fpin, int n, std::vector& val) +{ + size_t is_read1; + + is_read1 = fread(&val[0], sizeof(double), n, fpin); + + if (is_read1 != (size_t)n) { + printf("%s", "I/O fail\n"); + return 1; + } + return 0; +} + +// FIXME: A similar function could be useful for CSC format +// There are functions above that operate coo -> csr and coo->csc +/** + * @tparam + */ +template +std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file) +{ + vertex_t number_of_vertices; + edge_t number_of_edges; + + FILE* fpin = fopen(mm_file.c_str(), "r"); + EXPECT_NE(fpin, nullptr); + + vertex_t number_of_columns = 0; + MM_typecode mm_typecode{0}; + EXPECT_EQ(mm_properties( + fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges), + 0); + EXPECT_TRUE(mm_is_matrix(mm_typecode)); + EXPECT_TRUE(mm_is_coordinate(mm_typecode)); + EXPECT_FALSE(mm_is_complex(mm_typecode)); + EXPECT_FALSE(mm_is_skew(mm_typecode)); + + directed = !mm_is_symmetric(mm_typecode); + + // Allocate memory on host + std::vector coo_row_ind(number_of_edges); + std::vector coo_col_ind(number_of_edges); + std::vector coo_val(number_of_edges); + + // Read + EXPECT_EQ((mm_to_coo( + fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)), + 0); + EXPECT_EQ(fclose(fpin), 0); + + cugraph::GraphCOOView cooview( + &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); + + return cugraph::coo_to_csr(cooview); +} + +template +edgelist_from_market_matrix_file_t read_edgelist_from_matrix_market_file( + std::string const& graph_file_full_path) +{ + edgelist_from_market_matrix_file_t ret{}; + + MM_typecode mc{}; + vertex_t m{}; + edge_t nnz{}; + + FILE* file = fopen(graph_file_full_path.c_str(), "r"); + CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); + + edge_t tmp_m{}; + edge_t tmp_k{}; + auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); + CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); + m = static_cast(tmp_m); + CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), + "invalid Matrix Market file properties."); + + ret.h_rows.assign(nnz, vertex_t{0}); + ret.h_cols.assign(nnz, vertex_t{0}); + ret.h_weights.assign(nnz, weight_t{0.0}); + ret.number_of_vertices = m; + ret.is_symmetric = mm_is_symmetric(mc); + + mm_ret = cugraph::test::mm_to_coo( + file, 1, nnz, ret.h_rows.data(), ret.h_cols.data(), ret.h_weights.data(), nullptr); + CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data"); + + auto file_ret = fclose(file); + CUGRAPH_EXPECTS(file_ret == 0, "fclose failure."); + + return std::move(ret); +} + +template +cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted) +{ + auto mm_graph = + read_edgelist_from_matrix_market_file(graph_file_full_path); + edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); + + rmm::device_uvector d_edgelist_rows(number_of_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(number_of_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(test_weighted ? number_of_edges : 0, + handle.get_stream()); + + raft::update_device( + d_edgelist_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); + raft::update_device( + d_edgelist_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); + if (test_weighted) { + raft::update_device( + d_edgelist_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); + } + + cugraph::experimental::edgelist_t edgelist{ + d_edgelist_rows.data(), + d_edgelist_cols.data(), + test_weighted ? d_edgelist_weights.data() : nullptr, + number_of_edges}; + + return cugraph::experimental::graph_t( + handle, + edgelist, + mm_graph.number_of_vertices, + cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, + false, + true); +} + +// explicit instantiations + +template int mm_to_coo( + FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, int* cooRVal, int* cooIVal); + +template int mm_to_coo( + FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, double* cooRVal, double* cooIVal); + +template int mm_to_coo( + FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, float* cooRVal, float* cooIVal); + +template std::unique_ptr> +generate_graph_csr_from_mm(bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file( + raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file( + raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); + +template cugraph::experimental::graph_t +read_graph_from_matrix_market_file( + raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted); + +} // namespace test +} // namespace cugraph diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 518e7c2860e..406f09048e0 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020, NVIDIA CORPORATION. + * Copyright (c) 2019-2021, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,37 +16,20 @@ #pragma once #include -#include -#include +#include -#include +#include +#include +#include extern "C" { #include "mmio.h" } -#include - -#include -#include -#include -#include - namespace cugraph { namespace test { -std::string getFileName(const std::string& s) -{ - char sep = '/'; - -#ifdef _WIN32 - sep = '\\'; -#endif - - size_t i = s.rfind(sep, s.length()); - if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); } - return (""); -} +std::string getFileName(const std::string& s); /// Read matrix properties from Matrix Market file /** Matrix Market file is assumed to be a sparse matrix in coordinate @@ -63,64 +46,7 @@ std::string getFileName(const std::string& s) * non-zero. */ template -int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz) -{ - // Read matrix properties from file - int mint, nint, nnzint; - if (fseek(f, 0, SEEK_SET)) { - fprintf(stderr, "Error: could not set position in file\n"); - return -1; - } - if (mm_read_banner(f, t)) { - fprintf(stderr, "Error: could not read Matrix Market file banner\n"); - return -1; - } - if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) { - fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); - return -1; - } - if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) { - fprintf(stderr, "Error: could not read matrix dimensions\n"); - return -1; - } - if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) { - fprintf(stderr, "Error: matrix entries are not valid type\n"); - return -1; - } - *m = mint; - *n = nint; - *nnz = nnzint; - - // Find total number of non-zero entries - if (tg && !mm_is_general(*t)) { - // Non-diagonal entries should be counted twice - *nnz *= 2; - - // Diagonal entries should not be double-counted - int st; - for (int i = 0; i < nnzint; ++i) { - // Read matrix entry - // MTX only supports int for row and col idx - int row, col; - double rval, ival; - if (mm_is_pattern(*t)) - st = fscanf(f, "%d %d\n", &row, &col); - else if (mm_is_real(*t) || mm_is_integer(*t)) - st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); - else // Complex matrix - st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); - if (ferror(f) || (st == EOF)) { - fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); - return -1; - } - - // Check if entry is diagonal - if (row == col) --(*nnz); - } - } - - return 0; -} +int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz); /// Read Matrix Market file and convert to COO format matrix /** Matrix Market file is assumed to be a sparse matrix in coordinate @@ -149,169 +75,20 @@ int mm_to_coo(FILE* f, IndexType_* cooRowInd, IndexType_* cooColInd, ValueType_* cooRVal, - ValueType_* cooIVal) -{ - // Read matrix properties from file - MM_typecode t; - int m, n, nnzOld; - if (fseek(f, 0, SEEK_SET)) { - fprintf(stderr, "Error: could not set position in file\n"); - return -1; - } - if (mm_read_banner(f, &t)) { - fprintf(stderr, "Error: could not read Matrix Market file banner\n"); - return -1; - } - if (!mm_is_matrix(t) || !mm_is_coordinate(t)) { - fprintf(stderr, "Error: file does not contain matrix in coordinate format\n"); - return -1; - } - if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) { - fprintf(stderr, "Error: could not read matrix dimensions\n"); - return -1; - } - if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) { - fprintf(stderr, "Error: matrix entries are not valid type\n"); - return -1; - } - - // Add each matrix entry in file to COO format matrix - int i; // Entry index in Matrix Market file; can only be int in the MTX format - int j = 0; // Entry index in COO format matrix; can only be int in the MTX format - for (i = 0; i < nnzOld; ++i) { - // Read entry from file - int row, col; - double rval, ival; - int st; - if (mm_is_pattern(t)) { - st = fscanf(f, "%d %d\n", &row, &col); - rval = 1.0; - ival = 0.0; - } else if (mm_is_real(t) || mm_is_integer(t)) { - st = fscanf(f, "%d %d %lg\n", &row, &col, &rval); - ival = 0.0; - } else // Complex matrix - st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival); - if (ferror(f) || (st == EOF)) { - fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1); - return -1; - } - - // Switch to 0-based indexing - --row; - --col; - - // Record entry - cooRowInd[j] = row; - cooColInd[j] = col; - if (cooRVal != NULL) cooRVal[j] = rval; - if (cooIVal != NULL) cooIVal[j] = ival; - ++j; - - // Add symmetric complement of non-diagonal entries - if (tg && !mm_is_general(t) && (row != col)) { - // Modify entry value if matrix is skew symmetric or Hermitian - if (mm_is_skew(t)) { - rval = -rval; - ival = -ival; - } else if (mm_is_hermitian(t)) { - ival = -ival; - } - - // Record entry - cooRowInd[j] = col; - cooColInd[j] = row; - if (cooRVal != NULL) cooRVal[j] = rval; - if (cooIVal != NULL) cooIVal[j] = ival; - ++j; - } - } - return 0; -} + ValueType_* cooIVal); -int read_binary_vector(FILE* fpin, int n, std::vector& val) -{ - size_t is_read1; - - double* t_storage = new double[n]; - is_read1 = fread(t_storage, sizeof(double), n, fpin); - for (int i = 0; i < n; i++) { - if (t_storage[i] == DBL_MAX) - val[i] = FLT_MAX; - else if (t_storage[i] == -DBL_MAX) - val[i] = -FLT_MAX; - else - val[i] = static_cast(t_storage[i]); - } - delete[] t_storage; - - if (is_read1 != (size_t)n) { - printf("%s", "I/O fail\n"); - return 1; - } - return 0; -} - -int read_binary_vector(FILE* fpin, int n, std::vector& val) -{ - size_t is_read1; +int read_binary_vector(FILE* fpin, int n, std::vector& val); - is_read1 = fread(&val[0], sizeof(double), n, fpin); - - if (is_read1 != (size_t)n) { - printf("%s", "I/O fail\n"); - return 1; - } - return 0; -} +int read_binary_vector(FILE* fpin, int n, std::vector& val); // FIXME: A similar function could be useful for CSC format // There are functions above that operate coo -> csr and coo->csc /** * @tparam */ -template -std::unique_ptr> generate_graph_csr_from_mm(bool& directed, - std::string mm_file) -{ - VT number_of_vertices; - ET number_of_edges; - - FILE* fpin = fopen(mm_file.c_str(), "r"); - EXPECT_NE(fpin, nullptr); - - VT number_of_columns = 0; - MM_typecode mm_typecode{0}; - EXPECT_EQ(mm_properties( - fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges), - 0); - EXPECT_TRUE(mm_is_matrix(mm_typecode)); - EXPECT_TRUE(mm_is_coordinate(mm_typecode)); - EXPECT_FALSE(mm_is_complex(mm_typecode)); - EXPECT_FALSE(mm_is_skew(mm_typecode)); - - directed = !mm_is_symmetric(mm_typecode); - - // Allocate memory on host - std::vector coo_row_ind(number_of_edges); - std::vector coo_col_ind(number_of_edges); - std::vector coo_val(number_of_edges); - - // Read - EXPECT_EQ((mm_to_coo( - fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)), - 0); - EXPECT_EQ(fclose(fpin), 0); - - cugraph::GraphCOOView cooview( - &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges); - - return cugraph::coo_to_csr(cooview); -} - -//////////////////////////////////////////////////////////////////////////////// -// FIXME: move this code to rapids-core -//////////////////////////////////////////////////////////////////////////////// +template +std::unique_ptr> generate_graph_csr_from_mm( + bool& directed, std::string mm_file); // Define RAPIDS_DATASET_ROOT_DIR using a preprocessor variable to // allow for a build to override the default. This is useful for @@ -342,79 +119,13 @@ struct edgelist_from_market_matrix_file_t { template edgelist_from_market_matrix_file_t read_edgelist_from_matrix_market_file( - std::string const& graph_file_full_path) -{ - edgelist_from_market_matrix_file_t ret{}; - - MM_typecode mc{}; - vertex_t m{}; - edge_t nnz{}; - - FILE* file = fopen(graph_file_full_path.c_str(), "r"); - CUGRAPH_EXPECTS(file != nullptr, "fopen failure."); - - edge_t tmp_m{}; - edge_t tmp_k{}; - auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); - CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); - m = static_cast(tmp_m); - CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), - "invalid Matrix Market file properties."); - - ret.h_rows.assign(nnz, vertex_t{0}); - ret.h_cols.assign(nnz, vertex_t{0}); - ret.h_weights.assign(nnz, weight_t{0.0}); - ret.number_of_vertices = m; - ret.is_symmetric = mm_is_symmetric(mc); - - mm_ret = cugraph::test::mm_to_coo( - file, 1, nnz, ret.h_rows.data(), ret.h_cols.data(), ret.h_weights.data(), nullptr); - CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data"); - - auto file_ret = fclose(file); - CUGRAPH_EXPECTS(file_ret == 0, "fclose failure."); - - return std::move(ret); -} + std::string const& graph_file_full_path); template cugraph::experimental::graph_t read_graph_from_matrix_market_file(raft::handle_t const& handle, std::string const& graph_file_full_path, - bool test_weighted) -{ - auto mm_graph = - read_edgelist_from_matrix_market_file(graph_file_full_path); - edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); - - rmm::device_uvector d_edgelist_rows(number_of_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_cols(number_of_edges, handle.get_stream()); - rmm::device_uvector d_edgelist_weights(test_weighted ? number_of_edges : 0, - handle.get_stream()); - - raft::update_device( - d_edgelist_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); - raft::update_device( - d_edgelist_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); - if (test_weighted) { - raft::update_device( - d_edgelist_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); - } - - cugraph::experimental::edgelist_t edgelist{ - d_edgelist_rows.data(), - d_edgelist_cols.data(), - test_weighted ? d_edgelist_weights.data() : nullptr, - number_of_edges}; - - return cugraph::experimental::graph_t( - handle, - edgelist, - mm_graph.number_of_vertices, - cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, - false, - true); -} + bool test_weighted); } // namespace test } // namespace cugraph diff --git a/datasets/eil51.tsp b/datasets/eil51.tsp new file mode 100644 index 00000000000..543d1013c14 --- /dev/null +++ b/datasets/eil51.tsp @@ -0,0 +1,58 @@ +NAME : eil51 +COMMENT : 51-city problem (Christofides/Eilon) +TYPE : TSP +DIMENSION : 51 +EDGE_WEIGHT_TYPE : EUC_2D +NODE_COORD_SECTION +1 37 52 +2 49 49 +3 52 64 +4 20 26 +5 40 30 +6 21 47 +7 17 63 +8 31 62 +9 52 33 +10 51 21 +11 42 41 +12 31 32 +13 5 25 +14 12 42 +15 36 16 +16 52 41 +17 27 23 +18 17 33 +19 13 13 +20 57 58 +21 62 42 +22 42 57 +23 16 57 +24 8 52 +25 7 38 +26 27 68 +27 30 48 +28 43 67 +29 58 48 +30 58 27 +31 37 69 +32 38 46 +33 46 10 +34 61 33 +35 62 63 +36 63 69 +37 32 22 +38 45 35 +39 59 15 +40 5 6 +41 10 17 +42 21 10 +43 5 64 +44 30 15 +45 39 10 +46 32 39 +47 25 32 +48 25 55 +49 48 28 +50 56 37 +51 30 40 +EOF diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh index 071a4b8dea3..3e0b6c55c37 100755 --- a/datasets/get_test_data.sh +++ b/datasets/get_test_data.sh @@ -1,3 +1,16 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + #!/bin/bash set -e set -o pipefail @@ -31,6 +44,10 @@ benchmark # ~1s download https://s3.us-east-2.amazonaws.com/rapidsai-data/cugraph/benchmark/hibench/hibench_1_small.tgz benchmark + +# ~0.6s download +https://rapidsai-data.s3.us-east-2.amazonaws.com/cugraph/test/tsplib/datasets.tar.gz +tsplib " EXTENDED_DATASET_DATA=" diff --git a/datasets/gil262.tsp b/datasets/gil262.tsp new file mode 100755 index 00000000000..cfcb15c3b78 --- /dev/null +++ b/datasets/gil262.tsp @@ -0,0 +1,269 @@ +NAME : gil262 +COMMENT : 262-city problem (Gillet/Johnson) +TYPE : TSP +DIMENSION : 262 +EDGE_WEIGHT_TYPE : EUC_2D +NODE_COORD_SECTION +1 -99 -97 +2 -59 50 +3 0 14 +4 -17 -66 +5 -69 -19 +6 31 12 +7 5 -41 +8 -12 10 +9 -64 70 +10 -12 85 +11 -18 64 +12 -77 -16 +13 -53 88 +14 83 -24 +15 24 41 +16 17 21 +17 42 96 +18 -65 0 +19 -47 -26 +20 85 36 +21 -35 -54 +22 54 -21 +23 64 -17 +24 55 89 +25 17 -25 +26 -61 66 +27 -61 26 +28 17 -72 +29 79 38 +30 -62 -2 +31 -90 -68 +32 52 66 +33 -54 -50 +34 8 -84 +35 37 -90 +36 -83 49 +37 35 -1 +38 7 59 +39 12 48 +40 57 95 +41 92 28 +42 -3 97 +43 -7 52 +44 42 -15 +45 77 -43 +46 59 -49 +47 25 91 +48 69 -19 +49 -82 -14 +50 74 -70 +51 69 59 +52 29 33 +53 -97 9 +54 -58 9 +55 28 93 +56 7 73 +57 -28 73 +58 -76 55 +59 41 42 +60 92 40 +61 -84 -29 +62 -12 42 +63 51 -45 +64 -37 46 +65 -97 35 +66 14 89 +67 60 58 +68 -63 -75 +69 -18 34 +70 -46 -82 +71 -86 -79 +72 -43 -30 +73 -44 7 +74 -3 -20 +75 36 41 +76 -30 -94 +77 79 -62 +78 51 70 +79 -61 -26 +80 6 94 +81 -19 -62 +82 -20 51 +83 -81 37 +84 7 31 +85 52 12 +86 83 -91 +87 -7 -92 +88 82 -74 +89 -70 85 +90 -83 -30 +91 71 -61 +92 85 11 +93 66 -48 +94 78 -87 +95 9 -79 +96 -36 4 +97 66 39 +98 92 -17 +99 -46 -79 +100 -30 -63 +101 -42 63 +102 20 42 +103 15 98 +104 1 -17 +105 64 20 +106 -96 85 +107 93 -29 +108 -40 -84 +109 86 35 +110 91 36 +111 62 -8 +112 -24 4 +113 11 96 +114 -53 62 +115 -28 -71 +116 7 -4 +117 95 -9 +118 -3 17 +119 53 -90 +120 58 -19 +121 -83 84 +122 -1 49 +123 -4 17 +124 -82 -3 +125 -43 47 +126 6 -6 +127 70 99 +128 68 -29 +129 -94 -30 +130 -94 -20 +131 -21 77 +132 64 37 +133 -70 -19 +134 88 65 +135 2 29 +136 33 57 +137 -70 6 +138 -38 -56 +139 -80 -95 +140 -5 -39 +141 8 -22 +142 -61 -76 +143 76 -22 +144 49 -71 +145 -30 -68 +146 1 34 +147 77 79 +148 -58 64 +149 82 -97 +150 -80 55 +151 81 -86 +152 39 -49 +153 -67 72 +154 -25 -89 +155 -44 -95 +156 32 -68 +157 -17 49 +158 93 49 +159 99 81 +160 10 -49 +161 63 -41 +162 38 39 +163 -28 39 +164 -2 -47 +165 38 8 +166 -42 -6 +167 -67 88 +168 19 93 +169 40 27 +170 -61 56 +171 43 33 +172 -18 -39 +173 -69 19 +174 75 -18 +175 31 85 +176 25 58 +177 -16 36 +178 91 15 +179 60 -39 +180 49 -47 +181 42 33 +182 16 -81 +183 -78 53 +184 53 -80 +185 -46 -26 +186 -25 -54 +187 69 -46 +188 0 -78 +189 -84 74 +190 -16 16 +191 -63 -14 +192 51 -77 +193 -39 61 +194 5 97 +195 -55 39 +196 70 -14 +197 0 95 +198 -45 7 +199 38 -24 +200 50 -37 +201 59 71 +202 -73 -96 +203 -29 72 +204 -47 12 +205 -88 -61 +206 -88 36 +207 -46 -3 +208 26 -37 +209 -39 -67 +210 92 27 +211 -80 -31 +212 93 -50 +213 -20 -5 +214 -22 73 +215 -4 -7 +216 54 -48 +217 -70 39 +218 54 -82 +219 29 41 +220 -87 51 +221 -96 -36 +222 49 8 +223 -5 54 +224 -26 43 +225 -11 60 +226 40 61 +227 82 35 +228 -92 12 +229 -93 -86 +230 -66 63 +231 -72 -87 +232 -57 -84 +233 23 52 +234 -56 -62 +235 -19 59 +236 63 -14 +237 -13 38 +238 -19 87 +239 44 -84 +240 98 -17 +241 -16 62 +242 3 66 +243 26 22 +244 -38 -81 +245 70 80 +246 17 -35 +247 96 -83 +248 -77 80 +249 -14 44 +250 -33 33 +251 33 -33 +252 70 0 +253 -50 60 +254 -50 -60 +255 75 0 +256 0 75 +257 -75 0 +258 0 -75 +259 40 80 +260 40 -80 +261 -60 20 +262 -60 -20 +EOF diff --git a/datasets/kroA100.tsp b/datasets/kroA100.tsp new file mode 100644 index 00000000000..05ebae994ac --- /dev/null +++ b/datasets/kroA100.tsp @@ -0,0 +1,107 @@ +NAME: kroA100 +TYPE: TSP +COMMENT: 100-city problem A (Krolak/Felts/Nelson) +DIMENSION: 100 +EDGE_WEIGHT_TYPE : EUC_2D +NODE_COORD_SECTION +1 1380 939 +2 2848 96 +3 3510 1671 +4 457 334 +5 3888 666 +6 984 965 +7 2721 1482 +8 1286 525 +9 2716 1432 +10 738 1325 +11 1251 1832 +12 2728 1698 +13 3815 169 +14 3683 1533 +15 1247 1945 +16 123 862 +17 1234 1946 +18 252 1240 +19 611 673 +20 2576 1676 +21 928 1700 +22 53 857 +23 1807 1711 +24 274 1420 +25 2574 946 +26 178 24 +27 2678 1825 +28 1795 962 +29 3384 1498 +30 3520 1079 +31 1256 61 +32 1424 1728 +33 3913 192 +34 3085 1528 +35 2573 1969 +36 463 1670 +37 3875 598 +38 298 1513 +39 3479 821 +40 2542 236 +41 3955 1743 +42 1323 280 +43 3447 1830 +44 2936 337 +45 1621 1830 +46 3373 1646 +47 1393 1368 +48 3874 1318 +49 938 955 +50 3022 474 +51 2482 1183 +52 3854 923 +53 376 825 +54 2519 135 +55 2945 1622 +56 953 268 +57 2628 1479 +58 2097 981 +59 890 1846 +60 2139 1806 +61 2421 1007 +62 2290 1810 +63 1115 1052 +64 2588 302 +65 327 265 +66 241 341 +67 1917 687 +68 2991 792 +69 2573 599 +70 19 674 +71 3911 1673 +72 872 1559 +73 2863 558 +74 929 1766 +75 839 620 +76 3893 102 +77 2178 1619 +78 3822 899 +79 378 1048 +80 1178 100 +81 2599 901 +82 3416 143 +83 2961 1605 +84 611 1384 +85 3113 885 +86 2597 1830 +87 2586 1286 +88 161 906 +89 1429 134 +90 742 1025 +91 1625 1651 +92 1187 706 +93 1787 1009 +94 22 987 +95 3640 43 +96 3756 882 +97 776 392 +98 1724 1642 +99 198 1810 +100 3950 1558 +EOF diff --git a/datasets/tsp225.tsp b/datasets/tsp225.tsp new file mode 100644 index 00000000000..ac9e06cecc1 --- /dev/null +++ b/datasets/tsp225.tsp @@ -0,0 +1,232 @@ +NAME : tsp225 +COMMENT : A TSP problem (Reinelt) +TYPE : TSP +DIMENSION : 225 +EDGE_WEIGHT_TYPE : EUC_2D +NODE_COORD_SECTION +1 155.42 150.65 +2 375.92 164.65 +3 183.92 150.65 +4 205.42 150.65 +5 205.42 171.65 +6 226.42 171.65 +7 226.42 186.15 +8 226.42 207.15 +9 226.42 235.65 +10 226.42 264.15 +11 226.42 292.65 +12 226.42 314.15 +13 226.42 335.65 +14 205.42 335.65 +15 190.92 335.65 +16 190.92 328.15 +17 176.92 328.15 +18 176.92 299.65 +19 155.42 299.65 +20 155.42 328.15 +21 155.42 356.65 +22 183.92 356.65 +23 219.42 356.65 +24 240.92 356.65 +25 269.42 356.65 +26 290.42 356.65 +27 387.42 136.15 +28 318.92 356.65 +29 318.92 335.65 +30 318.92 328.15 +31 318.92 299.65 +32 297.92 299.65 +33 290.42 328.15 +34 290.42 335.65 +35 297.92 328.15 +36 254.92 335.65 +37 254.92 314.15 +38 254.92 292.65 +39 254.92 271.65 +40 254.92 243.15 +41 254.92 221.65 +42 254.92 193.15 +43 254.92 171.65 +44 276.42 171.65 +45 296.42 150.65 +46 276.42 150.65 +47 375.92 150.65 +48 308.92 150.65 +49 354.92 164.65 +50 338.42 174.65 +51 354.92 174.65 +52 338.42 200.15 +53 338.42 221.65 +54 354.92 221.65 +55 354.92 200.15 +56 361.92 200.15 +57 361.92 186.15 +58 383.42 186.15 +59 383.42 179.15 +60 404.42 179.15 +61 404.42 186.15 +62 418.92 186.15 +63 418.92 200.15 +64 432.92 200.15 +65 432.92 221.65 +66 418.92 221.65 +67 418.92 235.65 +68 397.42 235.65 +69 397.42 243.15 +70 375.92 243.15 +71 375.92 257.15 +72 368.92 257.15 +73 368.92 264.15 +74 347.42 264.15 +75 347.42 278.65 +76 336.42 278.65 +77 336.42 328.15 +78 347.42 328.15 +79 347.42 342.65 +80 368.92 342.65 +81 368.92 353.65 +82 418.92 353.65 +83 418.92 342.65 +84 432.92 342.65 +85 432.92 356.65 +86 447.42 356.65 +87 447.42 321.15 +88 447.42 292.65 +89 432.92 292.65 +90 432.92 314.15 +91 418.92 314.15 +92 418.92 321.15 +93 397.42 321.15 +94 397.42 333.65 +95 375.92 333.65 +96 375.92 321.15 +97 361.92 321.15 +98 361.92 299.65 +99 375.92 299.65 +100 375.92 285.65 +101 397.42 285.65 +102 397.42 271.65 +103 418.92 271.65 +104 418.92 264.15 +105 439.92 264.15 +106 439.92 250.15 +107 454.42 250.15 +108 454.42 243.15 +109 461.42 243.15 +110 461.42 214.65 +111 461.42 193.15 +112 447.42 193.15 +113 447.42 179.15 +114 439.92 179.15 +115 439.92 167.65 +116 419.92 167.65 +117 419.92 150.65 +118 439.92 150.65 +119 454.42 150.65 +120 475.92 150.65 +121 475.92 171.65 +122 496.92 171.65 +123 496.92 193.15 +124 496.92 214.65 +125 496.92 243.15 +126 496.92 271.65 +127 496.92 292.65 +128 496.92 317.15 +129 496.92 335.65 +130 470.42 335.65 +131 470.42 356.65 +132 496.92 356.65 +133 347.42 150.65 +134 539.92 356.65 +135 560.92 356.65 +136 589.42 356.65 +137 589.42 342.65 +138 603.92 342.65 +139 610.92 342.65 +140 610.92 335.65 +141 610.92 321.15 +142 624.92 321.15 +143 624.92 278.65 +144 610.92 278.65 +145 610.92 257.15 +146 589.42 257.15 +147 589.42 250.15 +148 575.42 250.15 +149 560.92 250.15 +150 542.92 250.15 +151 542.92 264.15 +152 560.92 264.15 +153 575.42 264.15 +154 575.42 271.65 +155 582.42 271.65 +156 582.42 285.65 +157 596.42 285.65 +158 560.92 335.65 +159 596.42 314.15 +160 582.42 314.15 +161 582.42 321.15 +162 575.42 321.15 +163 575.42 335.65 +164 525.42 335.65 +165 525.42 314.15 +166 525.42 299.65 +167 525.42 281.65 +168 525.42 233.15 +169 525.42 214.65 +170 525.42 193.15 +171 525.42 171.65 +172 546.92 171.65 +173 546.92 150.65 +174 568.42 150.65 +175 475.92 160.65 +176 603.92 150.65 +177 624.92 150.65 +178 624.92 136.15 +179 596.42 136.15 +180 575.42 136.15 +181 553.92 136.15 +182 532.42 136.15 +183 575.42 356.65 +184 489.92 136.15 +185 468.42 136.15 +186 447.42 136.15 +187 425.92 136.15 +188 404.42 136.15 +189 370.42 136.15 +190 361.92 150.65 +191 340.42 136.15 +192 326.42 136.15 +193 301.92 136.15 +194 276.42 136.15 +195 254.92 136.15 +196 315.92 136.15 +197 212.42 136.15 +198 190.92 136.15 +199 338.92 150.65 +200 155.42 136.15 +201 624.92 299.65 +202 318.92 321.65 +203 155.42 314.15 +204 311.92 356.65 +205 355.42 136.15 +206 318.92 314.15 +207 362.92 164.65 +208 254.92 356.65 +209 383.42 333.65 +210 447.42 335.65 +211 470.42 345.65 +212 525.42 250.15 +213 546.92 335.65 +214 525.42 261.15 +215 525.42 356.65 +216 336.42 298.65 +217 336.42 313.15 +218 293.42 136.15 +219 336.42 306.15 +220 425.92 264.15 +221 391.42 353.65 +222 482.92 335.65 +223 429.92 167.65 +224 330.92 150.65 +225 368.42 150.65 +EOF diff --git a/docs/source/api.rst b/docs/source/api.rst index 459e5fbf4f1..dcdf3e6ff33 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -40,13 +40,6 @@ Betweenness Centrality :members: :undoc-members: -Edge Betweenness Centrality ---------------------------- - -.. automodule:: cugraph.centrality.edge_betweenness_centrality - :members: - :undoc-members: - Katz Centrality --------------- @@ -58,6 +51,13 @@ Katz Centrality Community ========= +EgoNet +------------------------------------ + +.. automodule:: cugraph.community.egonet + :members: + :undoc-members: + Ensemble clustering for graphs (ECG) ------------------------------------ diff --git a/docs/source/conf.py b/docs/source/conf.py index 6b484a5f57b..eb4745a61f0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -80,9 +80,9 @@ # built documents. # # The short X.Y version. -version = '0.18' +version = '0.19' # The full version, including alpha/beta/rc tags. -release = '0.18.0' +release = '0.19.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. diff --git a/docs/source/cugraph_blogs.rst b/docs/source/cugraph_blogs.rst index 84e31d40a19..cbbc93a1b14 100644 --- a/docs/source/cugraph_blogs.rst +++ b/docs/source/cugraph_blogs.rst @@ -23,6 +23,12 @@ BLOGS * `Status of RAPIDS cuGraph — Refactoring Code And Rethinking Graphs `_ * `Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs `_ * `RAPIDS cuGraph adds NetworkX and DiGraph Compatibility `_ + * `Large Graph Visualization with RAPIDS cuGraph `_ + +2021 +------ + * + Media diff --git a/docs/source/cugraph_intro.md b/docs/source/cugraph_intro.md index 5bf2b715462..142395fb719 100644 --- a/docs/source/cugraph_intro.md +++ b/docs/source/cugraph_intro.md @@ -1,8 +1,55 @@ # cuGraph Introduction +The Data Scientist has a collection of techniques within their +proverbial toolbox. Data engineering, statistical analysis, and +machine learning are among the most commonly known. However, there +are numerous cases where the focus of the analysis is on the +relationship between data elements. In those cases, the data is best +represented as a graph. Graph analysis, also called network analysis, +is a collection of algorithms for answering questions posed against +graph data. Graph analysis is not new. +The first graph problem was posed by Euler in 1736, the [Seven Bridges of +Konigsberg](https://en.wikipedia.org/wiki/Seven_Bridges_of_K%C3%B6nigsberg), +and laid the foundation for the mathematical field of graph theory. +The application of graph analysis covers a wide variety of fields, including +marketing, biology, physics, computer science, sociology, and cyber to name a few. -## Terminology +RAPIDS cuGraph is a library of graph algorithms that seamlessly integrates +into the RAPIDS data science ecosystem and allows the data scientist to easily +call graph algorithms using data stored in a GPU DataFrame, NetworkX Graphs, or even +CuPy or SciPy sparse Matrix. + + +# Vision +The vision of RAPIDS cuGraph is to ___make graph analysis ubiquitous to the +point that users just think in terms of analysis and not technologies or +frameworks___. This is a goal that many of us on the cuGraph team have been +working on for almost twenty years. Many of the early attempts focused on +solving one problem or using one technique. Those early attempts worked for +the initial goal but tended to break as the scope changed (e.g., shifting +to solving a dynamic graph problem with a static graph solution). The limiting +factors usually came down to compute power, ease-of-use, or choosing a data +structure that was not suited for all problems. NVIDIA GPUs, CUDA, and RAPIDS +have totally changed the paradigm and the goal of an accelerated unified graph +analytic library is now possible. + +The compute power of the latest NVIDIA GPUs (RAPIDS supports Pascal and later +GPU architectures) make graph analytics 1000x faster on average over NetworkX. +Moreover, the internal memory speed within a GPU allows cuGraph to rapidly +switch the data structure to best suit the needs of the analytic rather than +being restricted to a single data structure. cuGraph is working with several +frameworks for both static and dynamic graph data structures so that we always +have a solution to any graph problem. Since Python has emerged as the de facto +language for data science, allowing interactivity and the ability to run graph +analytics in Python makes cuGraph familiar and approachable. RAPIDS wraps all +the graph analytic goodness mentioned above with the ability to perform +high-speed ETL, statistics, and machine learning. To make things even better, +RAPIDS and DASK allows cuGraph to scale to multiple GPUs to support +multi-billion edge graphs. + + +# Terminology cuGraph is a collection of GPU accelerated graph algorithms and graph utility functions. The application of graph analysis covers a lot of areas. diff --git a/notebooks/community/ECG.ipynb b/notebooks/community/ECG.ipynb index d7595dadb26..4a9eedd3c3a 100644 --- a/notebooks/community/ECG.ipynb +++ b/notebooks/community/ECG.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Ensemble Clustering for Graphs (ECG)\n", + "# Does not run on Pascal\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using the Ensemble Clustering for Graph approach. \n", "\n", "\n", diff --git a/notebooks/community/Louvain.ipynb b/notebooks/community/Louvain.ipynb index e5e5e6a04ed..bfb8e299f49 100755 --- a/notebooks/community/Louvain.ipynb +++ b/notebooks/community/Louvain.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Louvain Community Detection\n", + "# Does not run on Pascal\n", "\n", "\n", "In this notebook, we will use cuGraph to identify the cluster in a test graph using the Louvain algorithm \n", diff --git a/notebooks/community/Subgraph-Extraction.ipynb b/notebooks/community/Subgraph-Extraction.ipynb index e068ef53aa5..cac52262d4d 100755 --- a/notebooks/community/Subgraph-Extraction.ipynb +++ b/notebooks/community/Subgraph-Extraction.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Subgraph Extraction\n", + "# Does not run on Pascal\n", "\n", "In this notebook, we will use cuGraph to extract a subgraph from the test graph. \n", "\n", diff --git a/notebooks/community/Triangle-Counting.ipynb b/notebooks/community/Triangle-Counting.ipynb index 09d7906a526..19d3f838fc6 100755 --- a/notebooks/community/Triangle-Counting.ipynb +++ b/notebooks/community/Triangle-Counting.ipynb @@ -21,7 +21,7 @@ "\n", "\n", "## Introduction\n", - "Triancle Counting, as the name implies, finds the number of triangles in a graph. Triangles are important in computing the clustering Coefficient and can be used for clustering. \n", + "Triangle Counting, as the name implies, finds the number of triangles in a graph. Triangles are important in computing the clustering Coefficient and can be used for clustering. \n", "\n", "\n", "To compute the Pagerank scores for a graph in cuGraph we use:
\n", diff --git a/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb b/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb index 58eb94bf0ee..6ae695e206e 100644 --- a/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# BFS Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running BFS within cuGraph against NetworkX. \n", "\n", diff --git a/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb b/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb index a12b7c4bcc2..00e99a28617 100644 --- a/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Louvain Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance improvement of running the Louvain clustering algorithm within cuGraph against NetworkX. The test is run over eight test networks (graphs) and then results plotted. \n", "

\n", diff --git a/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb b/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb index 6f76868f9a4..403c317ac0a 100644 --- a/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb +++ b/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Benchmarking NetworkX compatibility\n", + "# Skip notebook test\n", "This notebook benchmark the use of a NetworkX Graph object as input into algorithms.

\n", "The intention of the feature is to be able to drop cuGraph into existing NetworkX code in spot where performance is not optimal.\n", "\n", diff --git a/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb b/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb index c2933a10c7d..d0416efdd87 100644 --- a/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# PageRank Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running PageRank within cuGraph against NetworkX. NetworkX contains several implementations of PageRank. This benchmark will compare cuGraph versus the defaukt Nx implementation as well as the SciPy version\n", "\n", diff --git a/notebooks/cugraph_benchmarks/release.ipynb b/notebooks/cugraph_benchmarks/release.ipynb index d3110da3621..3c6da55abc0 100644 --- a/notebooks/cugraph_benchmarks/release.ipynb +++ b/notebooks/cugraph_benchmarks/release.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# Release Benchmarking\n", + "# Skip notebook test\n", "\n", "With every release, RAPIDS publishes a release slide deck that includes the current performance state of cuGraph. \n", "This notebook, starting with release 0.15, runs all the various algorithms to computes the performance gain. \n", diff --git a/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb b/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb index 2d040e0acaf..32b562e7a1e 100644 --- a/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/sssp_benchmark.ipynb @@ -5,6 +5,7 @@ "metadata": {}, "source": [ "# SSSP Performance Benchmarking\n", + "# Skip notebook test\n", "\n", "This notebook benchmarks performance of running SSSP within cuGraph against NetworkX. \n", "\n", diff --git a/notebooks/demo/uvm.ipynb b/notebooks/demo/uvm.ipynb index d279be8ed54..8fa2b08b6d1 100644 --- a/notebooks/demo/uvm.ipynb +++ b/notebooks/demo/uvm.ipynb @@ -6,6 +6,7 @@ "source": [ "# Oversubscribing GPU memory in cuGraph\n", "#### Author : Alex Fender\n", + "# Skip notebook test\n", "\n", "In this notebook, we will show how to **scale to 4x larger graphs than before** without incurring a performance drop using managed memory features in cuGraph. We will compute the PageRank of each user in Twitter's dataset on a single GPU as an example. This technique applies to all features.\n", "\n", diff --git a/notebooks/link_analysis/Pagerank.ipynb b/notebooks/link_analysis/Pagerank.ipynb index c43561ff48c..a81e1ccf6c3 100755 --- a/notebooks/link_analysis/Pagerank.ipynb +++ b/notebooks/link_analysis/Pagerank.ipynb @@ -11,7 +11,7 @@ "Notebook Credits\n", "* Original Authors: Bradley Rees and James Wyles\n", "* Created: 08/13/2019\n", - "* Updated: 08/16/2020\n", + "* Updated: 01/17/2021\n", "\n", "RAPIDS Versions: 0.14 \n", "\n", @@ -190,7 +190,7 @@ "metadata": {}, "source": [ "### Read in the data - GPU\n", - "cuGraph depends on cuDF for data loading and the initial Dataframe creation\n", + "cuGraph graphs can be created from cuDF, dask_cuDF and Pandas dataframes\n", "\n", "The data file contains an edge list, which represents the connection of a vertex to another. The `source` to `destination` pairs is in what is known as Coordinate Format (COO). In this test case, the data is just two columns. However a third, `weight`, column is also possible" ] @@ -219,8 +219,7 @@ "outputs": [], "source": [ "# create a Graph using the source (src) and destination (dst) vertex pairs from the Dataframe \n", - "G = cugraph.Graph()\n", - "G.from_cudf_edgelist(gdf, source='src', destination='dst')" + "G = cugraph.from_edgelist(gdf, source='src', destination='dst')" ] }, { diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index d752c868237..8a847d1f1d4 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -24,11 +24,15 @@ analyzeClustering_ratio_cut, subgraph, triangles, + ego_graph, + batched_ego_graphs, ) from cugraph.structure import ( Graph, DiGraph, + MultiGraph, + MultiDiGraph, from_edgelist, from_cudf_edgelist, from_pandas_edgelist, @@ -77,7 +81,8 @@ sssp, shortest_path, filter_unreachable, - shortest_path_length + shortest_path_length, + traveling_salesperson ) from cugraph.tree import minimum_spanning_tree, maximum_spanning_tree @@ -89,7 +94,7 @@ from cugraph.proto.components import strong_connected_component from cugraph.proto.structure import find_bicliques -from cugraph.linear_assignment import hungarian +from cugraph.linear_assignment import hungarian, dense_hungarian from cugraph.layout import force_atlas2 from cugraph.raft import raft_include_test from cugraph.comms import comms diff --git a/python/cugraph/centrality/__init__.py b/python/cugraph/centrality/__init__.py index da882a61850..f33df2fe61a 100644 --- a/python/cugraph/centrality/__init__.py +++ b/python/cugraph/centrality/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.centrality.katz_centrality import katz_centrality -from cugraph.centrality.betweenness_centrality import betweenness_centrality from cugraph.centrality.betweenness_centrality import ( + betweenness_centrality, edge_betweenness_centrality, ) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index 93bdce7c515..3b7cfe6b68f 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -36,8 +36,10 @@ def betweenness_centrality( Betweenness centrality is a measure of the number of shortest paths that pass through a vertex. A vertex with a high betweenness centrality score has more paths passing through it and is therefore believed to be more - important. Rather than doing an all-pair shortest path, a sample of k - starting vertices can be used. + important. + + To improve performance. rather than doing an all-pair shortest path, + a sample of k starting vertices can be used. CuGraph does not currently support the 'endpoints' and 'weight' parameters as seen in the corresponding networkX call. @@ -52,19 +54,18 @@ def betweenness_centrality( k : int or list or None, optional, default=None If k is not None, use k node samples to estimate betweenness. Higher - values give better approximation - If k is a list, use the content of the list for estimation: the list - should contain vertices identifiers. - If k is None (the default), all the vertices are used to estimate - betweenness. - Vertices obtained through sampling or defined as a list will be used as - sources for traversals inside the algorithm. + values give better approximation. If k is a list, use the content + of the list for estimation: the list should contain vertex + identifiers. If k is None (the default), all the vertices are used + to estimate betweenness. Vertices obtained through sampling or + defined as a list will be used assources for traversals inside the + algorithm. normalized : bool, optional Default is True. If true, the betweenness values are normalized by - 2 / ((n - 1) * (n - 2)) for Graphs (undirected), and - 1 / ((n - 1) * (n - 2)) for DiGraphs (directed graphs) + __2 / ((n - 1) * (n - 2))__ for Graphs (undirected), and + __1 / ((n - 1) * (n - 2))__ for DiGraphs (directed graphs) where n is the number of nodes in G. Normalization will ensure that values are in [0, 1], this normalization scales for the highest possible value where one @@ -145,15 +146,22 @@ def betweenness_centrality( def edge_betweenness_centrality( - G, k=None, normalized=True, weight=None, seed=None, result_dtype=np.float64 + G, + k=None, + normalized=True, + weight=None, + seed=None, + result_dtype=np.float64 ): """ Compute the edge betweenness centrality for all edges of the graph G. Betweenness centrality is a measure of the number of shortest paths that pass over an edge. An edge with a high betweenness centrality score has more paths passing over it and is therefore believed to be - more important. Rather than doing an all-pair shortest path, a sample - of k starting vertices can be used. + more important. + + To improve performance, rather than doing an all-pair shortest path, + a sample of k starting vertices can be used. CuGraph does not currently support the 'weight' parameter as seen in the corresponding networkX call. @@ -168,7 +176,7 @@ def edge_betweenness_centrality( k : int or list or None, optional, default=None If k is not None, use k node samples to estimate betweenness. Higher - values give better approximation + values give better approximation. If k is a list, use the content of the list for estimation: the list should contain vertices identifiers. Vertices obtained through sampling or defined as a list will be used as diff --git a/python/cugraph/comms/comms.py b/python/cugraph/comms/comms.py index 925f4a1a060..85fc426f373 100644 --- a/python/cugraph/comms/comms.py +++ b/python/cugraph/comms/comms.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,7 @@ # limitations under the License. from cugraph.raft.dask.common.comms import Comms as raftComms -from cugraph.raft.dask.common.comms import worker_state +from cugraph.raft.dask.common.comms import get_raft_comm_state from cugraph.raft.common.handle import Handle from cugraph.comms.comms_wrapper import init_subcomms as c_init_subcomms from dask.distributed import default_client @@ -196,12 +196,12 @@ def get_default_handle(): # Functions to be called from within workers def get_handle(sID): - sessionstate = worker_state(sID) + sessionstate = get_raft_comm_state(sID) return sessionstate['handle'] def get_worker_id(sID): - sessionstate = worker_state(sID) + sessionstate = get_raft_comm_state(sID) return sessionstate['wid'] @@ -216,5 +216,5 @@ def get_n_workers(sID=None): if sID is None: return read_utils.get_n_workers() else: - sessionstate = worker_state(sID) + sessionstate = get_raft_comm_state(sID) return sessionstate['nworkers'] diff --git a/python/cugraph/community/__init__.py b/python/cugraph/community/__init__.py index d3bb6472894..9cc92637e20 100644 --- a/python/cugraph/community/__init__.py +++ b/python/cugraph/community/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,3 +25,5 @@ from cugraph.community.triangle_count import triangles from cugraph.community.ktruss_subgraph import ktruss_subgraph from cugraph.community.ktruss_subgraph import k_truss +from cugraph.community.egonet import ego_graph +from cugraph.community.egonet import batched_ego_graphs diff --git a/python/cugraph/community/egonet.pxd b/python/cugraph/community/egonet.pxd new file mode 100644 index 00000000000..3ddf929674f --- /dev/null +++ b/python/cugraph/community/egonet.pxd @@ -0,0 +1,23 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from cugraph.structure.graph_primtypes cimport * + +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + cdef unique_ptr[cy_multi_edgelists_t] call_egonet[vertex_t, weight_t]( + const handle_t &handle, + const graph_container_t &g, + vertex_t* source_vertex, + vertex_t n_subgraphs, + vertex_t radius) except + diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py new file mode 100644 index 00000000000..9ff12158b13 --- /dev/null +++ b/python/cugraph/community/egonet.py @@ -0,0 +1,145 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.community import egonet_wrapper +import cudf +from cugraph.utilities import ( + ensure_cugraph_obj, + import_optional, +) +from cugraph.utilities import cugraph_to_nx + +# optional dependencies used for handling different input types +nx = import_optional("networkx") + + +def _convert_graph_to_output_type(G, input_type): + """ + Given a cugraph.Graph, convert it to a new type appropriate for the + graph algos in this module, based on input_type. + """ + if (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): + return cugraph_to_nx(G) + + else: + return G + + +def _convert_df_series_to_output_type(df, offsets, input_type): + """ + Given a cudf.DataFrame df, convert it to a new type appropriate for the + graph algos in this module, based on input_type. + """ + if (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): + return df.to_pandas(), offsets.values_host.tolist() + + else: + return df, offsets + + +def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None): + """ + Compute the induced subgraph of neighbors centered at node n, + within a given radius. + + Parameters + ---------- + G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix + Graph or matrix object, which should contain the connectivity + information. Edge weights, if present, should be single or double + precision floating point values. + n : integer + A single node + radius: integer, optional + Include all neighbors of distance<=radius from n. + center: bool, optional + Defaults to True. False is not supported + undirected: bool, optional + Defaults to False. True is not supported + distance: key, optional + Distances are counted in hops from n. Other cases are not supported. + + Returns + ------- + G_ego : cuGraph.Graph or networkx.Graph + A graph descriptor with a minimum spanning tree or forest. + The networkx graph will not have all attributes copied over + """ + + (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") + result_graph = type(G)() + + if G.renumbered is True: + n = G.lookup_internal_vertex_id(cudf.Series([n])) + + df, offsets = egonet_wrapper.egonet(G, n, radius) + + if G.renumbered: + df = G.unrenumber(df, "src") + df = G.unrenumber(df, "dst") + + if G.edgelist.weights: + result_graph.from_cudf_edgelist( + df, source="src", destination="dst", edge_attr="weight" + ) + else: + result_graph.from_cudf_edgelist(df, source="src", destination="dst") + return _convert_graph_to_output_type(result_graph, input_type) + + +def batched_ego_graphs( + G, seeds, radius=1, center=True, undirected=False, distance=None +): + """ + Compute the induced subgraph of neighbors for each node in seeds + within a given radius. + + Parameters + ---------- + G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix + Graph or matrix object, which should contain the connectivity + information. Edge weights, if present, should be single or double + precision floating point values. + seeds : cudf.Series or list + Specifies the seeds of the induced egonet subgraphs + radius: integer, optional + Include all neighbors of distance<=radius from n. + center: bool, optional + Defaults to True. False is not supported + undirected: bool, optional + Defaults to False. True is not supported + distance: key, optional + Distances are counted in hops from n. Other cases are not supported. + + Returns + ------- + ego_edge_lists : cudf.DataFrame or pandas.DataFrame + GPU data frame containing all induced sources identifiers, + destination identifiers, edge weights + seeds_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each seed. + """ + + (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight") + + if G.renumbered is True: + seeds = G.lookup_internal_vertex_id(cudf.Series(seeds)) + + df, offsets = egonet_wrapper.egonet(G, seeds, radius) + + if G.renumbered: + df = G.unrenumber(df, "src", preserve_order=True) + df = G.unrenumber(df, "dst", preserve_order=True) + + return _convert_df_series_to_output_type(df, offsets, input_type) diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx new file mode 100644 index 00000000000..122dedbfabd --- /dev/null +++ b/python/cugraph/community/egonet_wrapper.pyx @@ -0,0 +1,116 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.community.egonet cimport call_egonet +from cugraph.structure.graph_primtypes cimport * +from libcpp cimport bool +from libc.stdint cimport uintptr_t +from cugraph.structure import graph_primtypes_wrapper +import cudf +import rmm +import numpy as np +import numpy.ctypeslib as ctypeslib +from rmm._lib.device_buffer cimport DeviceBuffer +from cudf.core.buffer import Buffer + + +def egonet(input_graph, vertices, radius=1): + """ + Call egonet + """ + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + [src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']] + vertex_t = src.dtype + edge_t = np.dtype("int32") + weights = None + if input_graph.edgelist.weights: + weights = input_graph.edgelist.edgelist_df['weights'] + + num_verts = input_graph.number_of_vertices() + num_edges = input_graph.number_of_edges(directed_edges=True) + num_partition_edges = num_edges + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] + weight_t = weights.dtype + else: + weight_t = np.dtype("float32") + + # Pointers for egonet + cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0] + n_subgraphs = vertices.size + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + + cdef graph_container_t graph_container + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_partition_edges, + num_verts, + num_edges, + False, + False, False) + + if(weight_t==np.dtype("float32")): + el_struct_ptr = move(call_egonet[int, float](handle_[0], + graph_container, + c_source_vertex_ptr, + n_subgraphs, + radius)) + else: + el_struct_ptr = move(call_egonet[int, double](handle_[0], + graph_container, + c_source_vertex_ptr, + n_subgraphs, + radius)) + + el_struct = move(el_struct_ptr.get()[0]) + src = DeviceBuffer.c_from_unique_ptr(move(el_struct.src_indices)) + dst = DeviceBuffer.c_from_unique_ptr(move(el_struct.dst_indices)) + wgt = DeviceBuffer.c_from_unique_ptr(move(el_struct.edge_data)) + src = Buffer(src) + dst = Buffer(dst) + wgt = Buffer(wgt) + + src = cudf.Series(data=src, dtype=vertex_t) + dst = cudf.Series(data=dst, dtype=vertex_t) + + df = cudf.DataFrame() + df['src'] = src + df['dst'] = dst + if wgt.nbytes != 0: + wgt = cudf.Series(data=wgt, dtype=weight_t) + df['weight'] = wgt + + offsets = DeviceBuffer.c_from_unique_ptr(move(el_struct.subgraph_offsets)) + offsets = Buffer(offsets) + offsets = cudf.Series(data=offsets, dtype="int") + + return df, offsets + diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py index b5f175e8237..443e2169711 100644 --- a/python/cugraph/community/spectral_clustering.py +++ b/python/cugraph/community/spectral_clustering.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,23 +32,23 @@ def spectralBalancedCutClustering( Parameters ---------- G : cugraph.Graph or networkx.Graph - cuGraph graph descriptor + graph descriptor num_clusters : integer - Specifies the number of clusters to find + Specifies the number of clusters to find, must be greater than 1 num_eigen_vects : integer Specifies the number of eigenvectors to use. Must be lower or equal to - num_clusters. + num_clusters. Default is 2 evs_tolerance: float - Specifies the tolerance to use in the eigensolver + Specifies the tolerance to use in the eigensolver. Default is 0.00001 evs_max_iter: integer - Specifies the maximum number of iterations for the eigensolver + Specifies the maximum number of iterations for the eigensolver. Default is 100 kmean_tolerance: float - Specifies the tolerance to use in the k-means solver + Specifies the tolerance to use in the k-means solver. Default is 0.00001 kmean_max_iter: integer - Specifies the maximum number of iterations for the k-means solver + Specifies the maximum number of iterations for the k-means solver. Default is 100 Returns @@ -73,6 +73,8 @@ def spectralBalancedCutClustering( >>> df = cugraph.spectralBalancedCutClustering(G, 5) """ + # Error checking in C++ code + G, isNx = check_nx_graph(G) df = spectral_clustering_wrapper.spectralBalancedCutClustering( @@ -109,24 +111,24 @@ def spectralModularityMaximizationClustering( Parameters ---------- - G : cugraph.Graph + G : cugraph.Graph or networkx.Graph cuGraph graph descriptor. This graph should have edge weights. num_clusters : integer Specifies the number of clusters to find num_eigen_vects : integer Specifies the number of eigenvectors to use. Must be lower or equal to - num_clusters + num_clusters. Default is 2 evs_tolerance: float - Specifies the tolerance to use in the eigensolver + Specifies the tolerance to use in the eigensolver. Default is 0.00001 evs_max_iter: integer - Specifies the maximum number of iterations for the eigensolver + Specifies the maximum number of iterations for the eigensolver. Default is 100 kmean_tolerance: float - Specifies the tolerance to use in the k-means solver + Specifies the tolerance to use in the k-means solver. Default is 0.00001 kmean_max_iter: integer - Specifies the maximum number of iterations for the k-means solver + Specifies the maximum number of iterations for the k-means solver. Default is 100 Returns @@ -148,6 +150,8 @@ def spectralModularityMaximizationClustering( >>> df = cugraph.spectralModularityMaximizationClustering(G, 5) """ + # Error checking in C++ code + G, isNx = check_nx_graph(G) df = spectral_clustering_wrapper.spectralModularityMaximizationClustering( @@ -173,12 +177,15 @@ def analyzeClustering_modularity(G, n_clusters, clustering, vertex_col_name='vertex', cluster_col_name='cluster'): """ - Compute the modularity score for a partitioning/clustering + Compute the modularity score for a given partitioning/clustering. + The assumption is that “clustering” is the results from a call + from a special clustering algorithm and contains columns named + “vertex” and “cluster”. Parameters ---------- - G : cugraph.Graph - cuGraph graph descriptor. This graph should have edge weights. + G : cugraph.Graph or networkx.Graph + graph descriptor. This graph should have edge weights. n_clusters : integer Specifies the number of clusters in the given clustering clustering : cudf.DataFrame @@ -204,10 +211,17 @@ def analyzeClustering_modularity(G, n_clusters, clustering, >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') >>> df = cugraph.spectralBalancedCutClustering(G, 5) - >>> score = cugraph.analyzeClustering_modularity(G, 5, df, - >>> 'vertex', 'cluster') + >>> score = cugraph.analyzeClustering_modularity(G, 5, df) """ + if type(vertex_col_name) is not str: + raise Exception("vertex_col_name must be a string") + + if type(cluster_col_name) is not str: + raise Exception("cluster_col_name must be a string") + + G, isNx = check_nx_graph(G) + if G.renumbered: clustering = G.add_internal_vertex_id(clustering, vertex_col_name, @@ -228,6 +242,9 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, cluster_col_name='cluster'): """ Compute the edge cut score for a partitioning/clustering + The assumption is that “clustering” is the results from a call + from a special clustering algorithm and contains columns named + “vertex” and “cluster”. Parameters ---------- @@ -258,10 +275,15 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None) >>> df = cugraph.spectralBalancedCutClustering(G, 5) - >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df, - >>> 'vertex', 'cluster') + >>> score = cugraph.analyzeClustering_edge_cut(G, 5, df) """ + if type(vertex_col_name) is not str: + raise Exception("vertex_col_name must be a string") + + if type(cluster_col_name) is not str: + raise Exception("cluster_col_name must be a string") + G, isNx = check_nx_graph(G) if G.renumbered: @@ -318,6 +340,12 @@ def analyzeClustering_ratio_cut(G, n_clusters, clustering, >>> 'vertex', 'cluster') """ + if type(vertex_col_name) is not str: + raise Exception("vertex_col_name must be a string") + + if type(cluster_col_name) is not str: + raise Exception("cluster_col_name must be a string") + if G.renumbered: clustering = G.add_internal_vertex_id(clustering, vertex_col_name, diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx index 5dbb6ce1e27..35b3c743987 100644 --- a/python/cugraph/community/subgraph_extraction_wrapper.pyx +++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -45,6 +45,10 @@ def subgraph(input_graph, vertices): if weights.dtype == np.float64: use_float = False + num_verts = input_graph.number_of_vertices() + num_edges = len(src) + num_input_vertices = len(vertices) + cdef GraphCOOView[int,int,float] in_graph_float cdef GraphCOOView[int,int,double] in_graph_double cdef unique_ptr[GraphCOO[int,int,float]] out_graph_float @@ -59,10 +63,6 @@ def subgraph(input_graph, vertices): cdef uintptr_t c_vertices = vertices.__cuda_array_interface__['data'][0] - num_verts = input_graph.number_of_vertices() - num_edges = len(src) - num_input_vertices = len(vertices) - if use_float: in_graph_float = GraphCOOView[int,int,float](c_src, c_dst, c_weights, num_verts, num_edges); df = coo_to_df(move(c_extract_subgraph_vertex(in_graph_float, c_vertices, num_input_vertices))); diff --git a/python/cugraph/dask/structure/renumber.py b/python/cugraph/dask/structure/renumber.py new file mode 100644 index 00000000000..606a6bc4dc1 --- /dev/null +++ b/python/cugraph/dask/structure/renumber.py @@ -0,0 +1,71 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.dask.structure import renumber_wrapper as renumber_w +import cugraph.comms.comms as Comms +import dask_cudf + + +def call_renumber(sID, + data, + num_verts, + num_edges, + is_mnmg): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return renumber_w.mg_renumber(data[0], + num_verts, + num_edges, + wid, + handle, + is_mnmg) + + +def renumber(input_graph): + + client = default_client() + + ddf = input_graph.edgelist.edgelist_df + + num_edges = len(ddf) + + if isinstance(ddf, dask_cudf.DataFrame): + is_mnmg = True + else: + is_mnmg = False + + num_verts = input_graph.number_of_vertices() + + if is_mnmg: + data = get_distributed_data(ddf) + result = [client.submit(call_renumber, + Comms.get_session_id(), + wf[1], + num_verts, + num_edges, + is_mnmg, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + ddf = dask_cudf.from_delayed(result) + else: + call_renumber(Comms.get_session_id(), + ddf, + num_verts, + num_edges, + is_mnmg) + return ddf diff --git a/python/cugraph/dask/structure/renumber_wrapper.pyx b/python/cugraph/dask/structure/renumber_wrapper.pyx new file mode 100644 index 00000000000..40dd80aeb67 --- /dev/null +++ b/python/cugraph/dask/structure/renumber_wrapper.pyx @@ -0,0 +1,460 @@ +# +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cugraph.structure.utils_wrapper import * +import cudf +from cugraph.structure.graph_primtypes cimport * +import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper +from libc.stdint cimport uintptr_t +from cython.operator cimport dereference as deref +import numpy as np + +from libcpp.utility cimport move +from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer + +cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w): + # extract shuffled result: + # + cdef pair[unique_ptr[device_buffer], size_t] pair_s_major = deref(ptr_maj_min_w).get_major_wrap() + cdef pair[unique_ptr[device_buffer], size_t] pair_s_minor = deref(ptr_maj_min_w).get_minor_wrap() + cdef pair[unique_ptr[device_buffer], size_t] pair_s_weights = deref(ptr_maj_min_w).get_weights_wrap() + + shufled_major_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_major.first)) + shufled_major_buffer = Buffer(shufled_major_buffer) + + shufled_major_series = cudf.Series(data=shufled_major_buffer, dtype=vertex_t) + + shufled_minor_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_minor.first)) + shufled_minor_buffer = Buffer(shufled_minor_buffer) + + shufled_minor_series = cudf.Series(data=shufled_minor_buffer, dtype=vertex_t) + + shufled_weights_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_weights.first)) + shufled_weights_buffer = Buffer(shufled_weights_buffer) + + shufled_weights_series = cudf.Series(data=shufled_weights_buffer, dtype=weight_t) + + shuffled_df = cudf.DataFrame() + shuffled_df['src']=shuffled_major_series + shuffled_df['dst']=shuffled_minor_series + shuffled_df['weights']= shuffled_weights_series + + return shuffled_df + +def mg_renumber(input_df, # maybe use cpdef ? + num_global_verts, + num_global_edges, + rank, + handle, + is_multi_gpu): + """ + Call MNMG renumber + """ + cdef size_t handle_size_t = handle.getHandle() + # TODO: get handle_t out of handle... + handle_ptr = handle_size_t + + src = input_df['src'] + dst = input_df['dst'] + cdef uintptr_t c_edge_weights = NULL # set below... + + vertex_t = src.dtype + if num_global_edges > (2**31 - 1): + edge_t = np.dtype("int64") + else: + edge_t = np.dtype("int32") + if "value" in input_df.columns: + weights = input_df['value'] + weight_t = weights.dtype + c_edge_weights = weights.__cuda_array_interface__['data'][0] + else: + weight_t = np.dtype("float32") + + if (vertex_t != np.dtype("int32") and vertex_t != np.dtype("int64")): + raise Exception("Incorrect vertex_t type.") + if (edge_t != np.dtype("int32") and edge_t != np.dtype("int64")): + raise Exception("Incorrect edge_t type.") + if (weight_t != np.dtype("float32") and weight_t != np.dtype("float64")): + raise Exception("Incorrect weight_t type.") + if (vertex_t != np.dtype("int32") and edge_t != np.dtype("int64")): + raise Exception("Incompatible vertex_t and edge_t types.") + + # FIXME: needs to be edge_t type not int + cdef int num_partition_edges = len(src) + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + + cdef bool is_hyper_partitioned = False # for now + + cdef uintptr_t shuffled_major = NULL + cdef uintptr_t shuffled_minor = NULL + + cdef bool do_check = False # ? for now... + cdef bool mg_flag = is_multi_gpu # run Single-GPU or MNMG + + cdef pair[unique_ptr[device_buffer], size_t] pair_original + cdef pair[unique_ptr[device_buffer], size_t] pair_partition + + # tparams: vertex_t, weight_t: + # + cdef unique_ptr[major_minor_weights_t[int, float]] ptr_shuffled_32_32 + cdef unique_ptr[major_minor_weights_t[int, double]] ptr_shuffled_32_64 + cdef unique_ptr[major_minor_weights_t[long, float]] ptr_shuffled_64_32 + cdef unique_ptr[major_minor_weights_t[long, double]] ptr_shuffled_64_64 + + # tparams: vertex_t, edge_t: + # + cdef unique_ptr[renum_quad_t[int, int]] ptr_renum_quad_32_32 + cdef unique_ptr[renum_quad_t[int, long]] ptr_renum_quad_32_64 + cdef unique_ptr[renum_quad_t[long, long]] ptr_renum_quad_64_64 + + # tparam: vertex_t: + # + cdef unique_ptr[vector[int]] uniq_partition_vector_32 + cdef unique_ptr[vector[long]] uniq_partition_vector_64 + + cdef size_t rank_indx = rank + + if (vertex_t == np.dtype("int32")): + if ( edge_t == np.dtype("int32")): + if( weight_t == np.dtype("float32")): + ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_32.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_32 = move(ptr_renum_quad_32_32.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(rank_indx), + uniq_partition_vector_32.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): + ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_64.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_32 = move(ptr_renum_quad_32_32.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(rank_indx), + uniq_partition_vector_32.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif ( edge_t == np.dtype("int64")): + if( weight_t == np.dtype("float32")): + ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_32.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_32_64.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_32 = move(ptr_renum_quad_32_64.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(rank_indx), + uniq_partition_vector_32.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): + ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_32_64.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_32_64.reset(call_renumber[int, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_32_64.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_32 = move(ptr_renum_quad_32_64.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(rank_indx), + uniq_partition_vector_32.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif (vertex_t == np.dtype("int64")): + if ( edge_t == np.dtype("int64")): + if( weight_t == np.dtype("float32")): + ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_64_32.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_64_64.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_64 = move(ptr_renum_quad_64_64.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(rank_indx), + uniq_partition_vector_64.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df + elif( weight_t == np.dtype("float64")): + ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr), + c_src_vertices, + c_dst_vertices, + c_edge_weights, + num_partition_edges, + is_hyper_partitioned).release()) + + shuffled_df = renumber_helper(ptr_shuffled_64_64.get()) + + shuffled_src = shufled_df['src'] + shuffled_dst = shufled_df['dst'] + + shuffled_major = shuffled_src.__cuda_array_interface__['data'][0] + shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0] + + ptr_renum_quad_64_64.reset(call_renumber[long, long](deref(handle_ptr), + shuffled_major, + shuffled_minor, + num_partition_edges, + is_hyper_partitioned, + do_check, + mg_flag).release()) + + pair_original = ptr_renum_quad_64_64.get().get_dv_wrap() # original vertices: see helper + + + original_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_original.first)) + original_buffer = Buffer(original_buffer) + + original_series = cudf.Series(data=original_buffer, dtype=vertex_t) + + # extract unique_ptr[partition_offsets]: + # + uniq_partition_vector_64 = move(ptr_renum_quad_64_64.get().get_partition_offsets()) + + # create series out of a partition range from rank to rank+1: + # + if is_multi_gpu: + new_series = cudf.Series(np.arange(uniq_partition_vector_64.get()[0].at(rank_indx), + uniq_partition_vector_64.get()[0].at(rank_indx+1)), + dtype=vertex_t) + else: + new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t) + + # create new cudf df + # + # and add the previous series to it: + # + renumbered_map = cudf.DataFrame() + renumbered_map['original_ids'] = original_series + renumbered_map['new_ids'] = new_series + + return renumbered_map, shuffled_df diff --git a/python/cugraph/linear_assignment/__init__.py b/python/cugraph/linear_assignment/__init__.py index 9bf09b67ed9..557bbbdf170 100644 --- a/python/cugraph/linear_assignment/__init__.py +++ b/python/cugraph/linear_assignment/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,4 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.linear_assignment.lap import hungarian +from cugraph.linear_assignment.lap import hungarian, dense_hungarian diff --git a/python/cugraph/linear_assignment/lap.pxd b/python/cugraph/linear_assignment/lap.pxd index f7991405b7f..782d5cfef60 100644 --- a/python/cugraph/linear_assignment/lap.pxd +++ b/python/cugraph/linear_assignment/lap.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,9 +20,19 @@ from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": - cdef void hungarian[VT,ET,WT]( + cdef weight_t hungarian[vertex_t,edge_t,weight_t]( const handle_t &handle, - const GraphCOOView[VT,ET,WT] &graph, - VT num_workers, - const VT *workers, - VT *assignment) except + + const GraphCOOView[vertex_t,edge_t,weight_t] &graph, + vertex_t num_workers, + const vertex_t *workers, + vertex_t *assignment) except + + + +cdef extern from "algorithms.hpp": + + cdef weight_t dense_hungarian "cugraph::dense::hungarian" [vertex_t,weight_t]( + const handle_t &handle, + const weight_t *costs, + vertex_t num_rows, + vertex_t num_columns, + vertex_t *assignment) except + diff --git a/python/cugraph/linear_assignment/lap.py b/python/cugraph/linear_assignment/lap.py index 5c501d17935..c634d9aceb4 100644 --- a/python/cugraph/linear_assignment/lap.py +++ b/python/cugraph/linear_assignment/lap.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -46,6 +46,8 @@ def hungarian(G, workers): Returns ------- + cost : matches costs.dtype + The cost of the overall assignment df : cudf.DataFrame df['vertex'][i] gives the vertex id of the i'th vertex. Only vertices in the workers list are defined in this column. @@ -60,7 +62,7 @@ def hungarian(G, workers): >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2') - >>> df = cugraph.hungarian(G, workers) + >>> cost, df = cugraph.hungarian(G, workers) """ @@ -69,9 +71,48 @@ def hungarian(G, workers): else: local_workers = workers - df = lap_wrapper.hungarian(G, local_workers) + df = lap_wrapper.sparse_hungarian(G, local_workers) if G.renumbered: df = G.unrenumber(df, 'vertex') return df + + +def dense_hungarian(costs, num_rows, num_columns): + """ + Execute the Hungarian algorithm against a dense bipartite + graph representation. + + *NOTE*: This API is unstable and subject to change + + The Hungarian algorithm identifies the lowest cost matching of vertices + such that all workers that can be assigned work are assigned exactly + on job. + + Parameters + ---------- + costs : cudf.Series + A dense representation (row major order) of the bipartite + graph. Each row represents a worker, each column represents + a task, cost[i][j] represents the cost of worker i performing + task j. + num_rows : int + Number of rows in the matrix + num_columns : int + Number of columns in the matrix + + + Returns + ------- + cost : matches costs.dtype + The cost of the overall assignment + assignment : cudf.Series + assignment[i] gives the vertex id of the task assigned to the + worker i + + FIXME: Update this with a real example... + + """ + + return lap_wrapper.dense_hungarian(costs, num_rows, num_columns) diff --git a/python/cugraph/linear_assignment/lap_wrapper.pyx b/python/cugraph/linear_assignment/lap_wrapper.pyx index caaa837e859..0769ef42f0f 100644 --- a/python/cugraph/linear_assignment/lap_wrapper.pyx +++ b/python/cugraph/linear_assignment/lap_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -17,6 +17,7 @@ # cython: language_level = 3 from cugraph.linear_assignment.lap cimport hungarian as c_hungarian +from cugraph.linear_assignment.lap cimport dense_hungarian as c_dense_hungarian from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t @@ -25,7 +26,7 @@ from cugraph.structure.graph import Graph as type_Graph import cudf import numpy as np -def hungarian(input_graph, workers): +def sparse_hungarian(input_graph, workers): """ Call the hungarian algorithm """ @@ -76,10 +77,37 @@ def hungarian(input_graph, workers): if weights.dtype == np.float32: g_float = GraphCOOView[int,int,float](c_src, c_dst, c_weights, num_verts, num_edges) - c_hungarian[int,int,float](handle_[0], g_float, len(workers), c_workers, c_assignment) + cost = c_hungarian[int,int,float](handle_[0], g_float, len(workers), c_workers, c_assignment) else: g_double = GraphCOOView[int,int,double](c_src, c_dst, c_weights, num_verts, num_edges) - c_hungarian[int,int,double](handle_[0], g_double, len(workers), c_workers, c_assignment) + cost = c_hungarian[int,int,double](handle_[0], g_double, len(workers), c_workers, c_assignment) - return df + return cost, df + + +def dense_hungarian(costs, num_rows, num_columns): + """ + Call the dense hungarian algorithm + """ + if type(costs) is not cudf.Series: + raise("costs must be a cudf.Series") + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + + assignment = cudf.Series(np.zeros(num_rows, dtype=np.int32)) + + cdef uintptr_t c_costs = costs.__cuda_array_interface__['data'][0] + cdef uintptr_t c_assignment = assignment.__cuda_array_interface__['data'][0] + + + if costs.dtype == np.float32: + cost = c_dense_hungarian[int,float](handle_[0], c_costs, num_rows, num_columns, c_assignment) + elif costs.dtype == np.float64: + cost = c_dense_hungarian[int,double](handle_[0], c_costs, num_rows, num_columns, c_assignment) + else: + raise("unsported type: ", costs.dtype) + + return cost, assignment diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py index 69133d62af7..0bb89195e01 100644 --- a/python/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/link_analysis/pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,7 +32,7 @@ def pagerank( ---------- graph : cugraph.Graph or networkx.Graph cuGraph graph descriptor, should contain the connectivity information - as an edge list (edge weights are not used for this algorithm). + as an edge list. The transposed adjacency list will be computed if not already present. alpha : float The damping factor alpha represents the probability to follow an @@ -68,11 +68,6 @@ def pagerank( Subset of vertices of graph for initial guess for pagerank values nstart['values'] : cudf.Series Pagerank values for vertices - - weight : str - Edge data column to use. Default is None - This version of PageRank current does not use edge weight. - This parameter is here for NetworkX compatibility dangling : dict This parameter is here for NetworkX compatibility and ignored diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index a8c1c9faee8..fea1939db6a 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -33,21 +33,22 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. Call pagerank """ - if not input_graph.transposedadjlist: - input_graph.view_transposed_adj_list() - cdef unique_ptr[handle_t] handle_ptr handle_ptr.reset(new handle_t()) handle_ = handle_ptr.get(); - [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices], [np.int32]) - [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.weights], [np.float32, np.float64]) + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) + weights = None + if input_graph.edgelist.weights: + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) + # FIXME: needs to be edge_t type not int + cdef int num_partition_edges = len(src) df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + df['vertex'] = cudf.Series(np.arange(num_verts, dtype=np.int32)) df['pagerank'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) cdef bool has_guess = 0 @@ -62,19 +63,16 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. cdef uintptr_t c_pers_vtx = NULL cdef uintptr_t c_pers_val = NULL - cdef sz = 0 - - cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] - cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] - cdef uintptr_t c_weights = NULL - cdef uintptr_t c_local_verts = NULL; - cdef uintptr_t c_local_edges = NULL; - cdef uintptr_t c_local_offsets = NULL; + cdef int sz = 0 + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + personalization_id_series = None if weights is not None: - c_weights = weights.__cuda_array_interface__['data'][0] + c_edge_weights = weights.__cuda_array_interface__['data'][0] weight_t = weights.dtype else: weight_t = np.dtype("float32") @@ -94,15 +92,19 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. c_pers_val = personalization['values'].__cuda_array_interface__['data'][0] cdef graph_container_t graph_container - populate_graph_container_legacy(graph_container, - ((graphTypeEnum.LegacyCSC)), - handle_[0], - c_offsets, c_indices, c_weights, - ((numberTypeEnum.int32Type)), - ((numberTypeEnum.int32Type)), - ((numberTypeMap[weight_t])), - num_verts, num_edges, - c_local_verts, c_local_edges, c_local_offsets) + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + NULL, + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), + #num_verts, num_edges, + num_partition_edges, + num_verts, num_edges, + False, + True, + False) if (df['pagerank'].dtype == np.float32): call_pagerank[int, float](handle_[0], graph_container, diff --git a/python/cugraph/structure/__init__.py b/python/cugraph/structure/__init__.py index 34447e80ee9..ad67fe91876 100644 --- a/python/cugraph/structure/__init__.py +++ b/python/cugraph/structure/__init__.py @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure.graph import Graph, DiGraph +from cugraph.structure.graph import Graph, DiGraph, MultiGraph, MultiDiGraph from cugraph.structure.number_map import NumberMap from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf from cugraph.structure.convert_matrix import (from_edgelist, diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index acc0ad8f066..a3024f9d081 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -72,7 +72,6 @@ def __init__(self, offsets, indices, value=None): def __init__( self, m_graph=None, - edge_attr=None, symmetrized=False, bipartite=False, multi=False, @@ -112,24 +111,22 @@ def __init__( self.batch_transposed_adjlists = None if m_graph is not None: - if (type(self) is Graph and type(m_graph) is MultiGraph) or ( - type(self) is DiGraph and type(m_graph) is MultiDiGraph - ): - self.from_cudf_edgelist( - m_graph.edgelist.edgelist_df, - source="src", - destination="dst", - edge_attr=edge_attr, - ) - self.renumbered = m_graph.renumbered - self.renumber_map = m_graph.renumber_map + if type(m_graph) is MultiGraph or type(m_graph) is MultiDiGraph: + elist = m_graph.view_edge_list() + if m_graph.edgelist.weights: + weights = "weights" + else: + weights = None + self.from_cudf_edgelist(elist, + source="src", + destination="dst", + edge_attr=weights) else: msg = ( - "Graph can be initialized using MultiGraph " - "and DiGraph can be initialized using MultiDiGraph" + "Graph can only be initialized using MultiGraph " + "or MultiDiGraph" ) raise Exception(msg) - # self.number_of_vertices = None def enable_batch(self): client = mg_utils.get_client() @@ -277,6 +274,12 @@ def is_multipartite(self): # TO DO: Call coloring algorithm return self.multipartite or self.bipartite + def is_multigraph(self): + """ + Returns True if the graph is a multigraph. Else returns False. + """ + return self.multi + def sets(self): """ Returns the bipartite set of nodes. This solely relies on the user's @@ -408,24 +411,19 @@ def from_cudf_edgelist( source_col = elist[source] dest_col = elist[destination] - if self.multi: - if type(edge_attr) is not list: - raise Exception("edge_attr should be a list of column names") - value_col = {} - for col_name in edge_attr: - value_col[col_name] = elist[col_name] - elif edge_attr is not None: + if edge_attr is not None: value_col = elist[edge_attr] else: value_col = None - if not self.symmetrized and not self.multi: - if value_col is not None: - source_col, dest_col, value_col = symmetrize( - source_col, dest_col, value_col - ) - else: - source_col, dest_col = symmetrize(source_col, dest_col) + if value_col is not None: + source_col, dest_col, value_col = symmetrize( + source_col, dest_col, value_col, multi=self.multi, + symmetrize=not self.symmetrized) + else: + source_col, dest_col = symmetrize( + source_col, dest_col, multi=self.multi, + symmetrize=not self.symmetrized) self.edgelist = Graph.EdgeList(source_col, dest_col, value_col) @@ -700,7 +698,7 @@ def view_edge_list(self): edgelist_df = self.unrenumber(edgelist_df, "src") edgelist_df = self.unrenumber(edgelist_df, "dst") - if type(self) is Graph: + if type(self) is Graph or type(self) is MultiGraph: edgelist_df = edgelist_df[edgelist_df["src"] <= edgelist_df["dst"]] edgelist_df = edgelist_df.reset_index(drop=True) self.edge_count = len(edgelist_df) @@ -992,7 +990,7 @@ def number_of_edges(self, directed_edges=False): return len(self.edgelist.edgelist_df) if self.edge_count is None: if self.edgelist is not None: - if type(self) is Graph: + if type(self) is Graph or type(self) is MultiGraph: self.edge_count = len( self.edgelist.edgelist_df[ self.edgelist.edgelist_df["src"] @@ -1486,17 +1484,26 @@ def add_internal_vertex_id( class DiGraph(Graph): - def __init__(self, m_graph=None, edge_attr=None): + """ + cuGraph directed graph class. Drops parallel edges. + """ + def __init__(self, m_graph=None): super().__init__( - m_graph=m_graph, edge_attr=edge_attr, symmetrized=True + m_graph=m_graph, symmetrized=True ) class MultiGraph(Graph): + """ + cuGraph class to create and store undirected graphs with parallel edges. + """ def __init__(self, renumbered=True): super().__init__(multi=True) class MultiDiGraph(Graph): + """ + cuGraph class to create and store directed graphs with parallel edges. + """ def __init__(self, renumbered=True): super().__init__(symmetrized=True, multi=True) diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index e46f4092dd4..07132df2598 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,8 @@ from libcpp cimport bool from libcpp.memory cimport unique_ptr +from libcpp.utility cimport pair +from libcpp.vector cimport vector from rmm._lib.device_buffer cimport device_buffer @@ -142,6 +144,89 @@ cdef extern from "functions.hpp" namespace "cugraph": ET *map_size) except + +# renumber_edgelist() interface: +# +# +# 1. `cdef extern partition_t`: +# +cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental": + + cdef cppclass partition_t[vertex_t]: + pass + + +# 2. return type for shuffle: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef cppclass major_minor_weights_t[vertex_t, weight_t]: + major_minor_weights_t(const handle_t &handle) + pair[unique_ptr[device_buffer], size_t] get_major_wrap() + pair[unique_ptr[device_buffer], size_t] get_minor_wrap() + pair[unique_ptr[device_buffer], size_t] get_weights_wrap() + + +ctypedef fused shuffled_vertices_t: + major_minor_weights_t[int, float] + major_minor_weights_t[int, double] + major_minor_weights_t[long, float] + major_minor_weights_t[long, double] + +# 3. return type for renumber: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef cppclass renum_quad_t[vertex_t, edge_t]: + renum_quad_t(const handle_t &handle) + pair[unique_ptr[device_buffer], size_t] get_dv_wrap() + vertex_t& get_num_vertices() + edge_t& get_num_edges() + int get_part_row_size() + int get_part_col_size() + int get_part_comm_rank() + unique_ptr[vector[vertex_t]] get_partition_offsets() + pair[vertex_t, vertex_t] get_part_local_vertex_range() + vertex_t get_part_local_vertex_first() + vertex_t get_part_local_vertex_last() + pair[vertex_t, vertex_t] get_part_vertex_partition_range(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx) + vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx) + size_t get_part_number_of_matrix_partitions() + vertex_t get_part_matrix_partition_major_first(size_t partition_idx) + vertex_t get_part_matrix_partition_major_last(size_t partition_idx) + vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx) + pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range() + vertex_t get_part_matrix_partition_minor_first() + vertex_t get_part_matrix_partition_minor_last() + +# 4. `groupby_gpuid_and_shuffle_values()` wrapper: +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t]( + const handle_t &handle, + vertex_t *edgelist_major_vertices, + vertex_t *edgelist_minor_vertices, + weight_t* edgelist_weights, + edge_t num_edges, + bool is_hyper_partitioned) except + + + +# 5. `renumber_edgelist()` wrapper +# +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t]( + const handle_t &handle, + vertex_t *edgelist_major_vertices, + vertex_t *edgelist_minor_vertices, + edge_t num_edges, + bool is_hyper_partitioned, + bool do_check, + bool multi_gpu) except + + + cdef extern from "" namespace "std" nogil: cdef unique_ptr[GraphCOO[int,int,float]] move(unique_ptr[GraphCOO[int,int,float]]) cdef unique_ptr[GraphCOO[int,int,double]] move(unique_ptr[GraphCOO[int,int,double]]) @@ -241,3 +326,16 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": int *local_vertices, int *local_edges, int *local_offsets) except + + + cdef cppclass cy_multi_edgelists_t: + size_t number_of_vertices + size_t number_of_edges + size_t number_of_subgraph + unique_ptr[device_buffer] src_indices + unique_ptr[device_buffer] dst_indices + unique_ptr[device_buffer] edge_data + unique_ptr[device_buffer] subgraph_offsets + +cdef extern from "" namespace "std" nogil: + cdef cy_multi_edgelists_t move(cy_multi_edgelists_t) + cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t]) diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py index f1b8949eb5d..deb2b9f4114 100644 --- a/python/cugraph/structure/number_map.py +++ b/python/cugraph/structure/number_map.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -895,7 +895,7 @@ def unrenumber(self, df, column_name, preserve_order=False): if preserve_order: df = df.sort_values( index_name - ).drop(index_name).reset_index(drop=True) + ).drop(columns=index_name).reset_index(drop=True) if type(df) is dask_cudf.DataFrame: return df.map_partitions( diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index e7fd15144aa..0f4ca90a97c 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -16,7 +16,7 @@ import dask_cudf -def symmetrize_df(df, src_name, dst_name): +def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True): """ Take a COO stored in a DataFrame, along with the column names of the source and destination columns and create a new data frame @@ -42,6 +42,13 @@ def symmetrize_df(df, src_name, dst_name): Name of the column in the data frame containing the source ids dst_name : string Name of the column in the data frame containing the destination ids + multi : bool + Set to True if graph is a Multi(Di)Graph. This allows multiple + edges instead of dropping them. + symmetrize : bool + Default is True to perform symmetrization. If False only duplicate + edges are dropped. + Examples -------- >>> import cugraph.dask as dcg @@ -54,26 +61,30 @@ def symmetrize_df(df, src_name, dst_name): >>> sym_ddf = cugraph.symmetrize_ddf(ddf, "src", "dst", "weight") >>> Comms.destroy() """ - gdf = cudf.DataFrame() - # # Now append the columns. We add sources to the end of destinations, # and destinations to the end of sources. Otherwise we append a # column onto itself. # - for idx, name in enumerate(df.columns): - if name == src_name: - gdf[src_name] = df[src_name].append( - df[dst_name], ignore_index=True - ) - elif name == dst_name: - gdf[dst_name] = df[dst_name].append( - df[src_name], ignore_index=True - ) - else: - gdf[name] = df[name].append(df[name], ignore_index=True) - - return gdf.groupby(by=[src_name, dst_name], as_index=False).min() + if symmetrize: + gdf = cudf.DataFrame() + for idx, name in enumerate(df.columns): + if name == src_name: + gdf[src_name] = df[src_name].append( + df[dst_name], ignore_index=True + ) + elif name == dst_name: + gdf[dst_name] = df[dst_name].append( + df[src_name], ignore_index=True + ) + else: + gdf[name] = df[name].append(df[name], ignore_index=True) + else: + gdf = df + if multi: + return gdf + else: + return gdf.groupby(by=[src_name, dst_name], as_index=False).min() def symmetrize_ddf(df, src_name, dst_name, weight_name=None): @@ -105,6 +116,12 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None): Name of the column in the data frame containing the source ids dst_name : string Name of the column in the data frame containing the destination ids + multi : bool + Set to True if graph is a Multi(Di)Graph. This allows multiple + edges instead of dropping them. + symmetrize : bool + Default is True to perform symmetrization. If False only duplicate + edges are dropped. Examples -------- @@ -129,7 +146,8 @@ def symmetrize_ddf(df, src_name, dst_name, weight_name=None): return result -def symmetrize(source_col, dest_col, value_col=None): +def symmetrize(source_col, dest_col, value_col=None, multi=False, + symmetrize=True): """ Take a COO set of source destination pairs along with associated values stored in a single GPU or distributed @@ -190,7 +208,8 @@ def symmetrize(source_col, dest_col, value_col=None): input_df, "source", "destination", weight_name ).persist() else: - output_df = symmetrize_df(input_df, "source", "destination") + output_df = symmetrize_df(input_df, "source", "destination", multi, + symmetrize) if value_col is not None: return ( diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py old mode 100644 new mode 100755 index 33b2842645d..f338e5aa633 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION.: +# Copyright (c) 2020-2021, NVIDIA CORPORATION.: # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -55,7 +55,7 @@ # Comparison functions # ============================================================================= def calc_betweenness_centrality( - graph_file, + graph_obj_tuple, directed=True, k=None, normalized=False, @@ -120,8 +120,7 @@ def calc_betweenness_centrality( G = None Gnx = None - G, Gnx = utils.build_cu_and_nx_graphs(graph_file, directed=directed, - edgevals=edgevals) + G, Gnx = graph_obj_tuple assert G is not None and Gnx is not None if multi_gpu_batch: @@ -299,46 +298,67 @@ def prepare_test(): gc.collect() +# ============================================================================= +# Pytest Fixtures +# ============================================================================= +DIRECTED = [pytest.param(d) for d in DIRECTED_GRAPH_OPTIONS] +DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL] +DATASETS_UNRENUMBERED = [pytest.param(d) for d in utils.DATASETS_UNRENUMBERED] +WEIGHTED_GRAPH_OPTIONS = [pytest.param(w) for w in WEIGHTED_GRAPH_OPTIONS] + + +small_graph_fixture_params = utils.genFixtureParamsProduct( + (DATASETS_SMALL, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + +unrenumbered_graph_fixture_params = utils.genFixtureParamsProduct( + (DATASETS_UNRENUMBERED, "grph"), + (DIRECTED, "dirctd"), + (WEIGHTED_GRAPH_OPTIONS, "wgtd_gph_opts")) + + +@pytest.fixture(scope="module", params=small_graph_fixture_params) +def get_cu_nx_graph_datasets_small(request): + return utils.build_cu_and_nx_graphs(*request.param) + + +@pytest.fixture(scope="module", params=unrenumbered_graph_fixture_params) +def get_cu_nx_graph_datasets_unrenumbered(request): + return utils.build_cu_and_nx_graphs(*request.param) + + # ============================================================================= # Tests # ============================================================================= -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality( - graph_file, - directed, + get_cu_nx_graph_datasets_small, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, - edgevals ): prepare_test() sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + get_cu_nx_graph_datasets_small, normalized=normalized, k=subset_size, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, - edgevals=edgevals, ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", [None]) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @@ -346,10 +366,8 @@ def test_betweenness_centrality( @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) @pytest.mark.parametrize("use_k_full", [True]) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_k_full( - graph_file, - directed, + get_cu_nx_graph_datasets_small, subset_size, normalized, weight, @@ -357,14 +375,12 @@ def test_betweenness_centrality_k_full( subset_seed, result_dtype, use_k_full, - edgevals ): """Tests full betweenness centrality by using k = G.number_of_vertices() instead of k=None, checks that k scales properly""" prepare_test() sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + get_cu_nx_graph_datasets_small, normalized=normalized, k=subset_size, weight=weight, @@ -372,7 +388,6 @@ def test_betweenness_centrality_k_full( seed=subset_seed, result_dtype=result_dtype, use_k_full=use_k_full, - edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") @@ -381,25 +396,20 @@ def test_betweenness_centrality_k_full( # the function operating the comparison inside is first proceeding # to a random sampling over the number of vertices (thus direct offsets) # in the graph structure instead of actual vertices identifiers -@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", [None]) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_fixed_sample( - graph_file, - directed, + get_cu_nx_graph_datasets_unrenumbered, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, - edgevals ): """Test Betweenness Centrality using a subset @@ -407,38 +417,31 @@ def test_betweenness_centrality_fixed_sample( """ prepare_test() sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + get_cu_nx_graph_datasets_unrenumbered, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, - edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("weight", [[]]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_centrality_weight_except( - graph_file, - directed, + get_cu_nx_graph_datasets_small, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, - edgevals ): """Calls betwenness_centrality with weight @@ -448,53 +451,44 @@ def test_betweenness_centrality_weight_except( prepare_test() with pytest.raises(NotImplementedError): sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + get_cu_nx_graph_datasets_small, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, - edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") -@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) -@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) @pytest.mark.parametrize("weight", [None]) @pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) @pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) @pytest.mark.parametrize("result_dtype", [str]) -@pytest.mark.parametrize("edgevals", WEIGHTED_GRAPH_OPTIONS) def test_betweenness_invalid_dtype( - graph_file, - directed, + get_cu_nx_graph_datasets_small, subset_size, normalized, weight, endpoints, subset_seed, result_dtype, - edgevals ): """Test calls edge_betwenness_centrality an invalid type""" prepare_test() with pytest.raises(TypeError): sorted_df = calc_betweenness_centrality( - graph_file, - directed=directed, + get_cu_nx_graph_datasets_small, k=subset_size, normalized=normalized, weight=weight, endpoints=endpoints, seed=subset_seed, result_dtype=result_dtype, - edgevals=edgevals ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index 4dc01c389cc..ba705a787ee 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,34 +14,40 @@ import gc import pytest - import networkx as nx import cugraph + from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than + +from pathlib import PurePath def cugraph_call(G, min_weight, ensemble_size): df = cugraph.ecg(G, min_weight, ensemble_size) num_parts = df["partition"].max() + 1 - score = cugraph.analyzeClustering_modularity(G, num_parts, df, - 'vertex', 'partition') + score = cugraph.analyzeClustering_modularity( + G, num_parts, df, "vertex", "partition" + ) return score, num_parts def golden_call(graph_file): - if graph_file == "../datasets/dolphins.csv": + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "dolphins.csv": return 0.4962422251701355 - if graph_file == "../datasets/karate.csv": + if graph_file == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "karate.csv": return 0.38428664207458496 - if graph_file == "../datasets/netscience.csv": + if ( + graph_file + == PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / "netscience.csv" + ): return 0.9279554486274719 DATASETS = [ - "../datasets/karate.csv", - "../datasets/dolphins.csv", - "../datasets/netscience.csv", + PurePath(utils.RAPIDS_DATASET_ROOT_DIR) / f + for f in ["karate.csv", "dolphins.csv", "netscience.csv"] ] MIN_WEIGHTS = [0.05, 0.10, 0.15] @@ -55,20 +61,32 @@ def golden_call(graph_file): def test_ecg_clustering(graph_file, min_weight, ensemble_size): gc.collect() - # Read in the graph and get a cugraph object - cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - # Get the modularity score for partitioning versus random assignment - cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) - golden_score = golden_call(graph_file) + # Get the modularity score for partitioning versus random assignment + with pytest.raises(RuntimeError): + cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + else: + # Read in the graph and get a cugraph object + cu_M = utils.read_csv_file(graph_file, read_weights_in_sp=False) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + + # Get the modularity score for partitioning versus random assignment + cu_score, num_parts = cugraph_call(G, min_weight, ensemble_size) + golden_score = golden_call(graph_file) - # Assert that the partitioning has better modularity than the random - # assignment - assert cu_score > (0.95 * golden_score) + # Assert that the partitioning has better modularity than the random + # assignment + assert cu_score > (0.95 * golden_score) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", DATASETS) @pytest.mark.parametrize("min_weight", MIN_WEIGHTS) @pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) @@ -78,9 +96,10 @@ def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): # Read in the graph and get a NetworkX graph M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) G = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", - create_using=nx.Graph() + M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() ) # Get the modularity score for partitioning versus random assignment - _ = cugraph.ecg(G, min_weight, ensemble_size, "weight") + df_dict = cugraph.ecg(G, min_weight, ensemble_size, "weight") + + assert isinstance(df_dict, dict) diff --git a/python/cugraph/tests/test_egonet.py b/python/cugraph/tests/test_egonet.py new file mode 100644 index 00000000000..009fd1252f1 --- /dev/null +++ b/python/cugraph/tests/test_egonet.py @@ -0,0 +1,101 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +import cugraph +from cugraph.tests import utils + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + +print("Networkx version : {} ".format(nx.__version__)) + +SEEDS = [0, 5, 13] +RADIUS = [1, 2, 3] + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("seed", SEEDS) +@pytest.mark.parametrize("radius", RADIUS) +def test_ego_graph_nx(graph_file, seed, radius): + gc.collect() + + # Nx + df = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + Gnx = nx.from_pandas_edgelist( + df, create_using=nx.Graph(), source="0", target="1", edge_attr="weight" + ) + ego_nx = nx.ego_graph(Gnx, seed, radius=radius) + + # cugraph + ego_cugraph = cugraph.ego_graph(Gnx, seed, radius=radius) + + assert nx.is_isomorphic(ego_nx, ego_cugraph) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("seeds", [[0, 5, 13]]) +@pytest.mark.parametrize("radius", [1, 2, 3]) +def test_batched_ego_graphs(graph_file, seeds, radius): + """ + Compute the induced subgraph of neighbors for each node in seeds + within a given radius. + Parameters + ---------- + G : cugraph.Graph, networkx.Graph, CuPy or SciPy sparse matrix + Graph or matrix object, which should contain the connectivity + information. Edge weights, if present, should be single or double + precision floating point values. + seeds : cudf.Series + Specifies the seeds of the induced egonet subgraphs + radius: integer, optional + Include all neighbors of distance<=radius from n. + + Returns + ------- + ego_edge_lists : cudf.DataFrame + GPU data frame containing all induced sources identifiers, + destination identifiers, edge weights + seeds_offsets: cudf.Series + Series containing the starting offset in the returned edge list + for each seed. + """ + gc.collect() + + # Nx + df = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + Gnx = nx.from_pandas_edgelist( + df, create_using=nx.Graph(), source="0", target="1", edge_attr="weight" + ) + + # cugraph + df, offsets = cugraph.batched_ego_graphs(Gnx, seeds, radius=radius) + for i in range(len(seeds)): + ego_nx = nx.ego_graph(Gnx, seeds[i], radius=radius) + ego_df = df[offsets[i]:offsets[i+1]] + ego_cugraph = nx.from_pandas_edgelist(ego_df, + source="src", + target="dst", + edge_attr="weight") + assert nx.is_isomorphic(ego_nx, ego_cugraph) diff --git a/python/cugraph/tests/test_force_atlas2.py b/python/cugraph/tests/test_force_atlas2.py index 4de49cb4088..f399460e5e6 100644 --- a/python/cugraph/tests/test_force_atlas2.py +++ b/python/cugraph/tests/test_force_atlas2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,6 @@ # limitations under the License. import time - import pytest import cugraph @@ -20,6 +19,7 @@ from cugraph.tests import utils from sklearn.manifold import trustworthiness import scipy.io +from pathlib import PurePath # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -61,11 +61,14 @@ def cugraph_call(cu_M, max_iter, pos_list, outbound_attraction_distribution, DATASETS = [ - ("../datasets/karate.csv", 0.70), - ("../datasets/polbooks.csv", 0.75), - ("../datasets/dolphins.csv", 0.66), - ("../datasets/netscience.csv", 0.66), + (PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/f,)+(d,) for (f, d) in [ + ("karate.csv", 0.70), + ("polbooks.csv", 0.75), + ("dolphins.csv", 0.66), + ("netscience.csv", 0.66)] ] + + MAX_ITERATIONS = [500] BARNES_HUT_OPTIMIZE = [False, True] @@ -120,7 +123,7 @@ def test_force_atlas2(graph_file, score, max_iter, iterations on a given graph. """ - matrix_file = graph_file[:-4] + ".mtx" + matrix_file = graph_file.with_suffix(".mtx") M = scipy.io.mmread(matrix_file) M = M.todense() cu_trust = trustworthiness(M, cu_pos[["x", "y"]].to_pandas()) diff --git a/python/cugraph/tests/test_hungarian.py b/python/cugraph/tests/test_hungarian.py index 280903bc303..4183bcc2c89 100644 --- a/python/cugraph/tests/test_hungarian.py +++ b/python/cugraph/tests/test_hungarian.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,7 +12,6 @@ # limitations under the License. import gc -from itertools import product from timeit import default_timer as timer import numpy as np @@ -21,7 +20,6 @@ import cudf import cugraph from scipy.optimize import linear_sum_assignment -import rmm def create_random_bipartite(v1, v2, size, dtype): @@ -54,33 +52,23 @@ def create_random_bipartite(v1, v2, size, dtype): return df1['src'], g, a -SPARSE_SIZES = [[5, 5, 100], [500, 500, 10000], [5000, 5000, 100000]] +SPARSE_SIZES = [[5, 5, 100], [500, 500, 10000]] +DENSE_SIZES = [[5, 100], [500, 10000]] def setup_function(): gc.collect() -# Test all combinations of default/managed and pooled/non-pooled allocation -@pytest.mark.parametrize('managed, pool', - list(product([False, True], [False, True]))) @pytest.mark.parametrize('v1_size, v2_size, weight_limit', SPARSE_SIZES) -def test_hungarian(managed, pool, v1_size, v2_size, weight_limit): - rmm.reinitialize( - managed_memory=managed, - pool_allocator=pool, - initial_pool_size=2 << 27 - ) - - assert(rmm.is_initialized()) - +def test_hungarian(v1_size, v2_size, weight_limit): v1, g, m = create_random_bipartite(v1_size, v2_size, weight_limit, np.float) start = timer() - matching = cugraph.hungarian(g, v1) + cugraph_cost, matching = cugraph.hungarian(g, v1) end = timer() print('cugraph time: ', (end - start)) @@ -93,14 +81,29 @@ def test_hungarian(managed, pool, v1_size, v2_size, weight_limit): scipy_cost = m[np_matching[0], np_matching[1]].sum() - cugraph_df = matching.merge(g.edgelist.edgelist_df, - left_on=['vertex', 'assignment'], - right_on=['src', 'dst'], - how='left') + assert(scipy_cost == cugraph_cost) + - cugraph_cost = cugraph_df['weights'].sum() +@pytest.mark.parametrize('n, weight_limit', DENSE_SIZES) +def test_dense_hungarian(n, weight_limit): + C = np.random.uniform( + 0, weight_limit, size=(n, n) + ).round().astype(np.float32) + + C_series = cudf.Series(C.flatten()) + + start = timer() + cugraph_cost, matching = cugraph.dense_hungarian(C_series, n, n) + end = timer() + + print('cugraph time: ', (end - start)) + + start = timer() + np_matching = linear_sum_assignment(C) + end = timer() + + print('scipy time: ', (end - start)) - print('scipy_cost = ', scipy_cost) - print('cugraph_cost = ', cugraph_cost) + scipy_cost = C[np_matching[0], np_matching[1]].sum() assert(scipy_cost == cugraph_cost) diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index 3c3f6224d83..b61101ef1d0 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,11 +13,11 @@ import gc import time - import pytest import cugraph from cugraph.tests import utils +from pathlib import PurePath # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -113,7 +113,9 @@ def test_jaccard(graph_file): assert err == 0 -@pytest.mark.parametrize("graph_file", ["../datasets/netscience.csv"]) +@pytest.mark.parametrize("graph_file", [PurePath( + utils.RAPIDS_DATASET_ROOT_DIR)/"netscience.csv"] +) def test_jaccard_edgevals(graph_file): gc.collect() diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index e9ccac81cf6..a86490fb561 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -41,7 +41,11 @@ # currently in networkx master and will hopefully will make it to a release # soon. def ktruss_ground_truth(graph_file): - G = nx.read_edgelist(graph_file, nodetype=int, data=(("weights", float),)) + G = nx.read_edgelist( + str(graph_file), + nodetype=int, + data=(("weights", float),) + ) df = nx.to_pandas_edgelist(G) return df @@ -93,9 +97,6 @@ def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): create_using=nx.Graph() ) k_subgraph = cugraph.k_truss(G, k) - df = nx.to_pandas_edgelist(k_subgraph) - k_truss_nx = nx.k_truss(G, k) - nx_df = nx.to_pandas_edgelist(k_truss_nx) - assert len(df) == len(nx_df) + assert nx.is_isomorphic(k_subgraph, k_truss_nx) diff --git a/python/cugraph/tests/test_leiden.py b/python/cugraph/tests/test_leiden.py index d6a7f86b5c5..89203d5014c 100644 --- a/python/cugraph/tests/test_leiden.py +++ b/python/cugraph/tests/test_leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,8 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than + # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from # 'collections.abc' is deprecated, and in 3.8 it will stop working) for @@ -53,6 +55,9 @@ def cugraph_louvain(G, edgevals=False): return parts, mod +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden(graph_file): gc.collect() @@ -73,6 +78,9 @@ def test_leiden(graph_file): assert leiden_mod >= (0.99 * louvain_mod) +@pytest.mark.skipif( + is_device_version_less_than((7, 0)), reason="Not supported on Pascal" +) @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden_nx(graph_file): gc.collect() @@ -81,18 +89,13 @@ def test_leiden_nx(graph_file): NM = utils.read_csv_for_nx(graph_file) if edgevals: - G = nx.from_pandas_edgelist(NM, - create_using=nx.Graph(), - source="0", - target="1" - ) + G = nx.from_pandas_edgelist( + NM, create_using=nx.Graph(), source="0", target="1" + ) else: - G = nx.from_pandas_edgelist(NM, - create_using=nx.Graph(), - source="0", - target="1", - edge_attr="2" - ) + G = nx.from_pandas_edgelist( + NM, create_using=nx.Graph(), source="0", target="1", edge_attr="2" + ) leiden_parts, leiden_mod = cugraph_leiden(G, edgevals=True) louvain_parts, louvain_mod = cugraph_louvain(G, edgevals=True) diff --git a/python/cugraph/tests/test_louvain.py b/python/cugraph/tests/test_louvain.py index d6b0030eb73..50e9ccaa4c5 100644 --- a/python/cugraph/tests/test_louvain.py +++ b/python/cugraph/tests/test_louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,6 +18,7 @@ import cugraph from cugraph.tests import utils +from cugraph.utilities.utils import is_device_version_less_than # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -71,51 +72,63 @@ def networkx_call(M): def test_louvain_with_edgevals(graph_file): gc.collect() - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file) + with pytest.raises(RuntimeError): + cu_parts, cu_mod = cugraph_call(cu_M) + else: + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) - nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() - ) + nx_parts = networkx_call(M) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts['vertex'], cu_parts['partition'])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 @pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_louvain(graph_file): gc.collect() - M = utils.read_csv_for_nx(graph_file) - cu_M = utils.read_csv_file(graph_file) - cu_parts, cu_mod = cugraph_call(cu_M) - nx_parts = networkx_call(M) + if is_device_version_less_than((7, 0)): + cu_M = utils.read_csv_file(graph_file) + with pytest.raises(RuntimeError): + cu_parts, cu_mod = cugraph_call(cu_M) + else: + M = utils.read_csv_for_nx(graph_file) + cu_M = utils.read_csv_file(graph_file) + cu_parts, cu_mod = cugraph_call(cu_M) + nx_parts = networkx_call(M) - # Calculating modularity scores for comparison - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() - ) + # Calculating modularity scores for comparison + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", + edge_attr="weight", create_using=nx.Graph() + ) - cu_parts = cu_parts.to_pandas() - cu_map = dict(zip(cu_parts['vertex'], cu_parts['partition'])) + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts["vertex"], cu_parts["partition"])) - assert set(nx_parts.keys()) == set(cu_map.keys()) + assert set(nx_parts.keys()) == set(cu_map.keys()) - cu_mod_nx = community.modularity(cu_map, Gnx) - nx_mod = community.modularity(nx_parts, Gnx) + cu_mod_nx = community.modularity(cu_map, Gnx) + nx_mod = community.modularity(nx_parts, Gnx) - assert len(cu_parts) == len(nx_parts) - assert cu_mod > (0.82 * nx_mod) - assert abs(cu_mod - cu_mod_nx) < 0.0001 + assert len(cu_parts) == len(nx_parts) + assert cu_mod > (0.82 * nx_mod) + assert abs(cu_mod - cu_mod_nx) < 0.0001 diff --git a/python/cugraph/tests/test_multigraph.py b/python/cugraph/tests/test_multigraph.py new file mode 100644 index 00000000000..cb659bc7e24 --- /dev/null +++ b/python/cugraph/tests/test_multigraph.py @@ -0,0 +1,104 @@ +import cugraph +import networkx as nx +from cugraph.tests import utils +import pytest +import gc +import numpy as np + + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_multigraph(graph_file): + # FIXME: Migrate to new test fixtures for Graph setup once available + cuM = utils.read_csv_file(graph_file) + G = cugraph.MultiDiGraph() + G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") + + nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + Gnx = nx.from_pandas_edgelist( + nxM, + source="0", + target="1", + edge_attr="weight", + create_using=nx.MultiDiGraph(), + ) + + assert G.number_of_edges() == Gnx.number_of_edges() + assert G.number_of_nodes() == Gnx.number_of_nodes() + cuedges = cugraph.to_pandas_edgelist(G) + cuedges.rename(columns={"src": "source", "dst": "target", + "weights": "weight"}, inplace=True) + cuedges["weight"] = cuedges["weight"].round(decimals=3) + nxedges = nx.to_pandas_edgelist(Gnx).astype(dtype={"source": "int32", + "target": "int32", + "weight": "float32"}) + cuedges = cuedges.sort_values(by=["source", "target"]).\ + reset_index(drop=True) + nxedges = nxedges.sort_values(by=["source", "target"]).\ + reset_index(drop=True) + nxedges["weight"] = nxedges["weight"].round(decimals=3) + assert nxedges.equals(cuedges[["source", "target", "weight"]]) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_Graph_from_MultiGraph(graph_file): + # FIXME: Migrate to new test fixtures for Graph setup once available + cuM = utils.read_csv_file(graph_file) + GM = cugraph.MultiGraph() + GM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") + nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + GnxM = nx.from_pandas_edgelist( + nxM, + source="0", + target="1", + edge_attr="weight", + create_using=nx.MultiGraph(), + ) + + G = cugraph.Graph(GM) + Gnx = nx.Graph(GnxM) + assert Gnx.number_of_edges() == G.number_of_edges() + + GdM = cugraph.MultiDiGraph() + GdM.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") + GnxdM = nx.from_pandas_edgelist( + nxM, + source="0", + target="1", + edge_attr="weight", + create_using=nx.MultiGraph(), + ) + Gd = cugraph.DiGraph(GdM) + Gnxd = nx.DiGraph(GnxdM) + assert Gnxd.number_of_edges() == Gd.number_of_edges() + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_multigraph_sssp(graph_file): + # FIXME: Migrate to new test fixtures for Graph setup once available + cuM = utils.read_csv_file(graph_file) + G = cugraph.MultiDiGraph() + G.from_cudf_edgelist(cuM, source="0", destination="1", edge_attr="2") + cu_paths = cugraph.sssp(G, 0) + max_val = np.finfo(cu_paths["distance"].dtype).max + cu_paths = cu_paths[cu_paths["distance"] != max_val] + nxM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + Gnx = nx.from_pandas_edgelist( + nxM, + source="0", + target="1", + edge_attr="weight", + create_using=nx.MultiDiGraph(), + ) + nx_paths = nx.single_source_dijkstra_path_length(Gnx, 0) + + cu_dist = cu_paths.sort_values(by='vertex')['distance'].to_array() + nx_dist = [i[1] for i in sorted(nx_paths.items())] + + assert (cu_dist == nx_dist).all() diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index 1ab370041b5..3ce8dd4ffe9 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -143,7 +143,7 @@ def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): # # https://github.com/rapidsai/cugraph/issues/533 # -# @pytest.mark.parametrize("graph_file", utils.DATASETS) + @pytest.mark.parametrize("graph_file", utils.DATASETS) @pytest.mark.parametrize("max_iter", MAX_ITERATIONS) @pytest.mark.parametrize("tol", TOLERANCE) @@ -159,7 +159,8 @@ def test_pagerank( M = utils.read_csv_for_nx(graph_file) nnz_vtx = np.unique(M[['0', '1']]) Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", create_using=nx.DiGraph() + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph() ) networkx_pr, networkx_prsn = networkx_call( @@ -169,13 +170,13 @@ def test_pagerank( cu_nstart = None if has_guess == 1: cu_nstart = cudify(networkx_pr) - max_iter = 5 + max_iter = 20 cu_prsn = cudify(networkx_prsn) # cuGraph PageRank cu_M = utils.read_csv_file(graph_file) G = cugraph.DiGraph() - G.from_cudf_edgelist(cu_M, source="0", destination="1") + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") cugraph_pr = cugraph_call(G, max_iter, tol, alpha, cu_prsn, cu_nstart) @@ -218,7 +219,7 @@ def test_pagerank_nx( cu_nstart = None if has_guess == 1: cu_nstart = cudify(networkx_pr) - max_iter = 5 + max_iter = 20 cu_prsn = cudify(networkx_prsn) # cuGraph PageRank with Nx Graph diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index a4f36af994a..9e9eccc4347 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -123,10 +123,9 @@ def test_subgraph_extraction_Graph_nx(graph_file): ) nx_sub = nx.subgraph(G, verts) - nx_df = nx.to_pandas_edgelist(nx_sub).to_dict() cu_verts = cudf.Series(verts) cu_sub = cugraph.subgraph(G, cu_verts) - cu_df = nx.to_pandas_edgelist(cu_sub).to_dict() - assert nx_df == cu_df + for (u, v) in cu_sub.edges(): + assert nx_sub.has_edge(u, v) diff --git a/python/cugraph/tests/test_traveling_salesperson.py b/python/cugraph/tests/test_traveling_salesperson.py new file mode 100644 index 00000000000..d43b55c43d0 --- /dev/null +++ b/python/cugraph/tests/test_traveling_salesperson.py @@ -0,0 +1,81 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.tests import utils +import cudf +import cugraph +import gc +import numpy as np +import pytest + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + +print("Networkx version : {} ".format(nx.__version__)) + + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= +def setup_function(): + gc.collect() + + +# ============================================================================= +# Helper functions +# ============================================================================= +def load_tsp(filename=None): + gdf = cudf.read_csv(filename, + delim_whitespace=True, + skiprows=6, + names=["vertex", "x", "y"], + dtypes={"vertex": "int32", + "x": "float32", + "y": "float32"} + ) + gdf = gdf.dropna() + gdf['vertex'] = gdf['vertex'].str.strip() + gdf['vertex'] = gdf['vertex'].astype("int32") + return gdf + + +# ============================================================================= +# Tests +# ============================================================================= +@pytest.mark.parametrize("tsplib_file, ref_cost", utils.DATASETS_TSPLIB) +def test_traveling_salesperson(gpubenchmark, tsplib_file, ref_cost): + pos_list = load_tsp(tsplib_file) + + cu_route, cu_cost = gpubenchmark(cugraph.traveling_salesperson, + pos_list, + restarts=4096) + + print("Cugraph cost: ", cu_cost) + print("Ref cost: ", ref_cost) + error = np.abs(cu_cost - ref_cost) / ref_cost + print("Approximation error is: {:.2f}%".format(error * 100)) + # Check we are within 5% of TSPLIB + assert(error * 100 < 5.) + assert(cu_route.nunique() == pos_list.shape[0]) + assert(cu_route.shape[0] == pos_list.shape[0]) + min_val = pos_list["vertex"].min() + max_val = pos_list["vertex"].max() + assert(cu_route.clip(min_val, max_val).shape[0] == cu_route.shape[0]) diff --git a/python/cugraph/tests/test_utils.py b/python/cugraph/tests/test_utils.py index 22af649ea2e..55410817f90 100644 --- a/python/cugraph/tests/test_utils.py +++ b/python/cugraph/tests/test_utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,16 +13,16 @@ import gc import pytest - import cugraph from cugraph.tests import utils +from pathlib import PurePath def test_bfs_paths(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() - graph_file = '../datasets/karate.csv' + graph_file = PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"karate.csv" cu_M = utils.read_csv_file(graph_file) @@ -47,7 +47,7 @@ def test_bfs_paths_array(): with pytest.raises(ValueError) as ErrorMsg: gc.collect() - graph_file = '../datasets/karate.csv' + graph_file = PurePath(utils.RAPIDS_DATASET_ROOT_DIR)/"karate.csv" cu_M = utils.read_csv_file(graph_file) diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py old mode 100644 new mode 100755 index 164c6efb084..c2c14e0c02d --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,7 +26,7 @@ from scipy.sparse.coo import coo_matrix as sp_coo_matrix from scipy.sparse.csr import csr_matrix as sp_csr_matrix from scipy.sparse.csc import csc_matrix as sp_csc_matrix - +from pathlib import PurePath import cudf import dask_cudf @@ -40,37 +40,59 @@ # # Datasets # -DATASETS_UNDIRECTED = ["../datasets/karate.csv", "../datasets/dolphins.csv"] + + +RAPIDS_DATASET_ROOT_DIR = os.getenv("RAPIDS_DATASET_ROOT_DIR", "../datasets") + +DATASETS_UNDIRECTED = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for + f in ["karate.csv", "dolphins.csv"]] DATASETS_UNDIRECTED_WEIGHTS = [ - "../datasets/netscience.csv", + PurePath(RAPIDS_DATASET_ROOT_DIR)/"netscience.csv" ] -DATASETS_UNRENUMBERED = ["../datasets/karate-disjoint.csv"] +DATASETS_UNRENUMBERED = [PurePath( + RAPIDS_DATASET_ROOT_DIR)/"karate-disjoint.csv" +] -DATASETS = [ - "../datasets/karate-disjoint.csv", - "../datasets/dolphins.csv", - "../datasets/netscience.csv", +DATASETS = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate-disjoint.csv", + "dolphins.csv", + "netscience.csv"] ] + + # '../datasets/email-Eu-core.csv'] STRONGDATASETS = [ - "../datasets/dolphins.csv", - "../datasets/netscience.csv", - "../datasets/email-Eu-core.csv", + PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "dolphins.csv", + "netscience.csv", + "email-Eu-core.csv"] ] -DATASETS_KTRUSS = [ - ("../datasets/polbooks.csv", "../datasets/ref/ktruss/polbooks.csv") + +DATASETS_KTRUSS = [( + PurePath(RAPIDS_DATASET_ROOT_DIR)/"polbooks.csv", + PurePath(RAPIDS_DATASET_ROOT_DIR)/"ref/ktruss/polbooks.csv") +] + +DATASETS_TSPLIB = [ + (PurePath(RAPIDS_DATASET_ROOT_DIR)/f,) + (d,) for (f, d) in [ + ("gil262.tsp", 2378), + ("eil51.tsp", 426), + ("kroA100.tsp", 21282), + ("tsp225.tsp", 3916)] ] DATASETS_SMALL = [ - "../datasets/karate.csv", - "../datasets/dolphins.csv", - "../datasets/polbooks.csv", + PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [ + "karate.csv", + "dolphins.csv", + "polbooks.csv"] ] + MATRIX_INPUT_TYPES = [ pytest.param( cp_coo_matrix, marks=pytest.mark.matrix_types, id="CuPy.coo_matrix" diff --git a/python/cugraph/traversal/__init__.py b/python/cugraph/traversal/__init__.py index 58e37a7add0..5944ebe0865 100644 --- a/python/cugraph/traversal/__init__.py +++ b/python/cugraph/traversal/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,4 +18,5 @@ shortest_path, filter_unreachable, shortest_path_length -) \ No newline at end of file +) +from cugraph.traversal.traveling_salesperson import traveling_salesperson diff --git a/python/cugraph/traversal/sssp.py b/python/cugraph/traversal/sssp.py index 4ba754ad4ed..8d77e6e9312 100644 --- a/python/cugraph/traversal/sssp.py +++ b/python/cugraph/traversal/sssp.py @@ -14,7 +14,7 @@ import numpy as np import cudf -from cugraph.structure import Graph, DiGraph +from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph from cugraph.traversal import sssp_wrapper from cugraph.utilities import (ensure_cugraph_obj, is_matrix_type, @@ -104,7 +104,7 @@ def _convert_df_to_output_type(df, input_type, return_predecessors): return_predecessors is only used for return values from cupy/scipy input types. """ - if input_type in [Graph, DiGraph]: + if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]: return df elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): diff --git a/python/cugraph/traversal/traveling_salesperson.pxd b/python/cugraph/traversal/traveling_salesperson.pxd new file mode 100644 index 00000000000..956c7da0978 --- /dev/null +++ b/python/cugraph/traversal/traveling_salesperson.pxd @@ -0,0 +1,34 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.structure.graph_primtypes cimport * + +cdef extern from "algorithms.hpp" namespace "cugraph": + + cdef float traveling_salesperson(const handle_t &handle, + int *vtx_ptr, + float *x_pos, + float *y_pos, + int nodes, + int restarts, + bool beam_search, + int k, + int nstart, + bool verbose, + int *route) except + + diff --git a/python/cugraph/traversal/traveling_salesperson.py b/python/cugraph/traversal/traveling_salesperson.py new file mode 100644 index 00000000000..ae17555e4ea --- /dev/null +++ b/python/cugraph/traversal/traveling_salesperson.py @@ -0,0 +1,75 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.traversal import traveling_salesperson_wrapper +from cugraph.structure.graph import null_check +import cudf + + +def traveling_salesperson(pos_list, + restarts=100000, + beam_search=True, + k=4, + nstart=None, + verbose=False, + ): + """ + Finds an approximate solution to the traveling salesperson problem (TSP). + cuGraph computes an approximation of the TSP problem using hill climbing + optimization. + + The current implementation does not support a weighted graph. + Parameters + ---------- + pos_list: cudf.DataFrame + Data frame with initial vertex positions containing three columns: + 'vertex' ids and 'x', 'y' positions. + restarts: int + Number of starts to try. The more restarts, the better the solution + will be approximated. The number of restarts depends on the problem + size and should be kept low for instances above 2k cities. + beam_search: bool + Specify if the initial solution should use KNN for an approximation + solution. + k: int + Beam width to use in the search. + nstart: int + Vertex id to use as starting position. + verbose: bool + Logs configuration and iterative improvement. + + Returns + ------- + route : cudf.Series + cudf.Series of size V containing the ordered list of vertices + than needs to be visited. + """ + + if not isinstance(pos_list, cudf.DataFrame): + raise TypeError("Instance should be cudf.DataFrame") + + null_check(pos_list['vertex']) + null_check(pos_list['x']) + null_check(pos_list['y']) + + if nstart is not None and not pos_list[pos_list['vertex'] == nstart].index: + raise ValueError("nstart should be in vertex ids") + + route, cost = traveling_salesperson_wrapper.traveling_salesperson( + pos_list, + restarts, + beam_search, + k, + nstart, + verbose) + return route, cost diff --git a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx new file mode 100644 index 00000000000..5f87c42a638 --- /dev/null +++ b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx @@ -0,0 +1,83 @@ +# Copyright (c) 2021, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.traversal.traveling_salesperson cimport traveling_salesperson as c_traveling_salesperson +from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.graph_primtypes cimport * +from libcpp cimport bool +from libc.stdint cimport uintptr_t +from numba import cuda + +import cudf +import numpy as np + + +def traveling_salesperson(pos_list, + restarts=100000, + beam_search=True, + k=4, + nstart=None, + verbose=False, + renumber=True, +): + """ + Call traveling_salesperson + """ + + nodes = pos_list.shape[0] + cdef uintptr_t x_pos = NULL + cdef uintptr_t y_pos = NULL + + pos_list['vertex'] = pos_list['vertex'].astype(np.int32) + pos_list['x'] = pos_list['x'].astype(np.float32) + pos_list['y'] = pos_list['y'].astype(np.float32) + x_pos = pos_list['x'].__cuda_array_interface__['data'][0] + y_pos = pos_list['y'].__cuda_array_interface__['data'][0] + + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); + + cdef float final_cost = 0.0 + + cdef uintptr_t route_ptr = NULL + route_arr = cuda.device_array(nodes, dtype=np.int32) + route_ptr = route_arr.device_ctypes_pointer.value + + cdef uintptr_t vtx_ptr = NULL + vtx_ptr = pos_list['vertex'].__cuda_array_interface__['data'][0] + + if nstart is None: + renumbered_nstart = 0 + else: + renumbered_nstart = pos_list[pos_list['vertex'] == nstart].index[0] + + final_cost = c_traveling_salesperson(handle_[0], + vtx_ptr, + x_pos, + y_pos, + nodes, + restarts, + beam_search, + k, + renumbered_nstart, + verbose, + route_ptr) + + route = cudf.Series(route_arr) + return route, final_cost diff --git a/python/cugraph/tree/minimum_spanning_tree.pxd b/python/cugraph/tree/minimum_spanning_tree.pxd index 8cea2bee0cc..a38aee96605 100644 --- a/python/cugraph/tree/minimum_spanning_tree.pxd +++ b/python/cugraph/tree/minimum_spanning_tree.pxd @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Copyright (c) 2019-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index b77f6789abe..39b789d7f79 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020, NVIDIA CORPORATION. +# Copyright (c) 2020-2021, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -219,11 +219,11 @@ def ensure_cugraph_obj(obj, nx_weight_attr=None, matrix_graph_type=None): cugraph Graph-type obj to create when converting from a matrix type. """ # FIXME: importing here to avoid circular import - from cugraph.structure import Graph, DiGraph + from cugraph.structure import Graph, DiGraph, MultiGraph, MultiDiGraph from cugraph.utilities.nx_factory import convert_from_nx input_type = type(obj) - if input_type in [Graph, DiGraph]: + if input_type in [Graph, DiGraph, MultiGraph, MultiDiGraph]: return (obj, input_type) elif (nx is not None) and (input_type in [nx.Graph, nx.DiGraph]): diff --git a/python/pytest.ini b/python/pytest.ini index fb8c6ea0948..a1933ea34aa 100644 --- a/python/pytest.ini +++ b/python/pytest.ini @@ -5,6 +5,8 @@ addopts = --benchmark-min-rounds=1 --benchmark-columns="mean, rounds" --benchmark-gpu-disable + --cov=cugraph + --cov-report term-missing:skip-covered markers = managedmem_on: RMM managed memory enabled diff --git a/python/setuputils.py b/python/setuputils.py index 360526c2b56..47eaf74d4b6 100644 --- a/python/setuputils.py +++ b/python/setuputils.py @@ -1,5 +1,5 @@ # -# Copyright (c) 2018-2020, NVIDIA CORPORATION. +# Copyright (c) 2018-2021, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -79,8 +79,8 @@ def use_raft_package(raft_path, cpp_build_path, if you want to change RAFT location. - Uses RAFT located in $RAFT_PATH if $RAFT_PATH exists. - Otherwise it will look for RAFT in the libcugraph build folder, - located either in the default location ../cpp/build or in - $CUGRAPH_BUILD_PATH. + located either in the default locations ../cpp/build/raft, + ../cpp/build/_deps/raft-src, or in $CUGRAPH_BUILD_PATH. -Otherwise it will clone RAFT into _external_repositories. - Branch/git tag cloned is located in git_info_file in this case. @@ -88,17 +88,25 @@ def use_raft_package(raft_path, cpp_build_path, ------- raft_include_path: Str Path to the C++ include folder of RAFT + """ if os.path.isdir('cugraph/raft'): raft_path = os.path.realpath('cugraph/raft') # walk up two dirs from `python/raft` raft_path = os.path.join(raft_path, '..', '..') print("-- Using existing RAFT folder") + elif cpp_build_path and os.path.isdir(os.path.join(cpp_build_path, + '_deps/raft-src')): + raft_path = os.path.join(cpp_build_path, '_deps/raft-src') + raft_path = os.path.realpath(raft_path) + print("-- Using existing RAFT folder in CPP build dir from cmake " + "FetchContent") elif cpp_build_path and os.path.isdir(os.path.join(cpp_build_path, 'raft/src/raft')): raft_path = os.path.join(cpp_build_path, 'raft/src/raft') raft_path = os.path.realpath(raft_path) - print("-- Using existing RAFT folder in CPP build dir") + print("-- Using existing RAFT folder in CPP build dir from cmake " + "ExternalProject") elif isinstance(raft_path, (str, os.PathLike)): print('-- Using RAFT_PATH argument') elif os.environ.get('RAFT_PATH', False) is not False: @@ -254,8 +262,8 @@ def get_repo_cmake_info(names, file_path): `ExternalProject_Add(name` file_path : String Relative path of the location of the CMakeLists.txt (or the cmake - module which contains ExternalProject_Add definitions) to extract - the information. + module which contains FetchContent_Declare or ExternalProject_Add + definitions) to extract the information. Returns ------- @@ -264,22 +272,31 @@ def get_repo_cmake_info(names, file_path): where results[name][0] is the url of the repo and repo_info[repo][1] is the tag/commit hash to be cloned as specified by cmake. - """ with open(file_path, encoding='utf-8') as f: s = f.read() results = {} + cmake_ext_proj_decls = ["FetchContent_Declare", "ExternalProject_Add"] + for name in names: - res = re.findall(r'ExternalProject_Add\(' + re.escape(name) - + '\s.*GIT_REPOSITORY.*\s.*GIT_TAG.*', # noqa: W605 - s) - - res = re.sub(' +', ' ', res[0]) - res = res.split(' ') - res = [res[2][:-1], res[4]] - results[name] = res + res = None + for decl in cmake_ext_proj_decls: + res = re.search(f'{decl}\(\s*' # noqa: W605 + + '(' + re.escape(name) + ')' + + '\s+.*GIT_REPOSITORY\s+(\S+)\s+.+' # noqa: W605 + + '\s+.*GIT_TAG\s+(\S+)', # noqa: W605 + s) + if res: + break + if res is None: + raise RuntimeError('Could not find any of the following ' + f'statements: {cmake_ext_proj_decls}, for ' + f'module "{name}" in file "{file_path}" with ' + 'GIT_REPOSITORY and GIT_TAG settings') + + results[res.group(1)] = [res.group(2), res.group(3)] return results