diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS
index d35c5c02218..ebffd18ca5a 100644
--- a/.github/CODEOWNERS
+++ b/.github/CODEOWNERS
@@ -1,3 +1,17 @@
+# https://docs.github.com/en/github/creating-cloning-and-archiving-repositories/about-code-owners
+# Order matters - match of highest importance goes last (last match wins)
+
+#doc code owners
+datasets/ @rapidsai/cugraph-doc-codeowners
+notebooks/ @rapidsai/cugraph-doc-codeowners
+docs/ @rapidsai/cugraph-doc-codeowners
+**/*.txt @rapidsai/cugraph-doc-codeowners
+**/*.md @rapidsai/cugraph-doc-codeowners
+**/*.rst @rapidsai/cugraph-doc-codeowners
+**/*.ipynb @rapidsai/cugraph-doc-codeowners
+**/*.pdf @rapidsai/cugraph-doc-codeowners
+**/*.png @rapidsai/cugraph-doc-codeowners
+
#cpp code owners
cpp/ @rapidsai/cugraph-cpp-codeowners
@@ -9,7 +23,7 @@ python/ @rapidsai/cugraph-python-codeowners
**/cmake/ @rapidsai/cugraph-cmake-codeowners
#build/ops code owners
-.github/ @rapidsai/ops-codeowners
+.github/ @rapidsai/ops-codeowners
ci/ @rapidsai/ops-codeowners
conda/ @rapidsai/ops-codeowners
**/Dockerfile @rapidsai/ops-codeowners
diff --git a/.github/labeler.yml b/.github/labeler.yml
index 621d0fde833..9c3af6de64b 100644
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@@ -5,26 +5,33 @@
python:
- 'python/**'
- 'notebooks/**'
+
+benchmarks:
- 'benchmarks/**'
doc:
- 'docs/**'
- '**/*.md'
+ - 'datasets/**'
+ - 'notebooks/**'
+ - '**/*.txt'
+ - '**/*.rst'
+ - '**/*.ipynb'
+ - '**/*.pdf'
+ - '**/*.png'
datasets:
- 'datasets/**'
cuGraph:
- 'cpp/**'
-
+
CMake:
- '**/CMakeLists.txt'
- '**/cmake/**'
-
-Ops:
- - '.github/**'
- - 'ci/**'
+
+gpuCI:
+ - 'ci/**'
+
+conda:
- 'conda/**'
- - '**/Dockerfile'
- - '**/.dockerignore'
- - 'docker/**'
diff --git a/.github/workflows/stale.yaml b/.github/workflows/stale.yaml
new file mode 100644
index 00000000000..8b65da69aa2
--- /dev/null
+++ b/.github/workflows/stale.yaml
@@ -0,0 +1,57 @@
+name: Mark inactive issues and pull requests
+
+on:
+ schedule:
+ - cron: "0 * * * *"
+
+jobs:
+ mark-inactive-30d:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Mark 30 day inactive issues and pull requests
+ uses: actions/stale@v3
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ stale-issue-message: >
+ This issue has been labeled `inactive-30d` due to no recent activity in the past 30 days.
+ Please close this issue if no further response or action is needed.
+ Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed.
+ This issue will be labeled `inactive-90d` if there is no activity in the next 60 days.
+ stale-issue-label: "inactive-30d"
+ exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue"
+ days-before-issue-stale: 30
+ days-before-issue-close: -1
+ stale-pr-message: >
+ This PR has been labeled `inactive-30d` due to no recent activity in the past 30 days.
+ Please close this PR if it is no longer required.
+ Otherwise, please respond with a comment indicating any updates.
+ This PR will be labeled `inactive-90d` if there is no activity in the next 60 days.
+ stale-pr-label: "inactive-30d"
+ exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue"
+ days-before-pr-stale: 30
+ days-before-pr-close: -1
+ operations-per-run: 50
+ mark-inactive-90d:
+ runs-on: ubuntu-latest
+ steps:
+ - name: Mark 90 day inactive issues and pull requests
+ uses: actions/stale@v3
+ with:
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
+ stale-issue-message: >
+ This issue has been labeled `inactive-90d` due to no recent activity in the past 90 days.
+ Please close this issue if no further response or action is needed.
+ Otherwise, please respond with a comment indicating any updates or changes to the original issue and/or confirm this issue still needs to be addressed.
+ stale-issue-label: "inactive-90d"
+ exempt-issue-labels: "0 - Blocked,0 - Backlog,good first issue"
+ days-before-issue-stale: 90
+ days-before-issue-close: -1
+ stale-pr-message: >
+ This PR has been labeled `inactive-90d` due to no recent activity in the past 90 days.
+ Please close this PR if it is no longer required.
+ Otherwise, please respond with a comment indicating any updates.
+ stale-pr-label: "inactive-90d"
+ exempt-pr-labels: "0 - Blocked,0 - Backlog,good first issue"
+ days-before-pr-stale: 90
+ days-before-pr-close: -1
+ operations-per-run: 50
diff --git a/CHANGELOG.md b/CHANGELOG.md
index b22d92c902f..2b8247f02d3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,12 +1,14 @@
-
-# cuGraph 0.18.0 (Date TBD)
+# cuGraph 0.19.0 (Date TBD)
## New Features
## Improvements
## Bug Fixes
-- PR #1321 Fix benchmark script trap setup to come after the PATH variable update
+
+# 0.18.0
+
+Please see https://github.com/rapidsai/cugraph/releases/tag/branch-0.18-latest for the latest changes to this development branch.
# cuGraph 0.17.0 (10 Dec 2020)
## New Features
@@ -19,6 +21,7 @@
- PR #1279 Add self loop check variable in graph
- PR #1277 SciPy sparse matrix input support for WCC, SCC, SSSP, and BFS
- PR #1278 Add support for shortest_path_length and fix graph vertex checks
+- PR #1280 Add Multi(Di)Graph support
## Improvements
- PR #1227 Pin cmake policies to cmake 3.17 version
diff --git a/README.md b/README.md
index 8fee5451ac3..62059e9c7b6 100644
--- a/README.md
+++ b/README.md
@@ -8,7 +8,7 @@ The [RAPIDS](https://rapids.ai) cuGraph library is a collection of GPU accelerat
**NOTE:** For the latest stable [README.md](https://github.com/rapidsai/cugraph/blob/main/README.md) ensure you are on the latest branch.
-
+As an example, the following Python snippet loads graph data and computes PageRank:
```python
import cugraph
@@ -30,6 +30,17 @@ for i in range(len(df_page)):
" PageRank is " + str(df_page['pagerank'].iloc[i]))
```
+## Getting cuGraph
+There are 3 ways to get cuGraph :
+1. [Quick start with Docker Repo](#quick)
+2. [Conda Installation](#conda)
+3. [Build from Source](#source)
+
+
+---
+# Currently Supported Features
+As of Release 0.18 - including 0.18 nightly
+
## Supported Algorithms
@@ -40,6 +51,7 @@ for i in range(len(df_page)):
| | Betweenness Centrality | Single-GPU | |
| | Edge Betweenness Centrality | Single-GPU | |
| Community | | | |
+| | EgoNet | Single-GPU | |
| | Leiden | Single-GPU | |
| | Louvain | Multi-GPU | |
| | Ensemble Clustering for Graphs | Single-GPU | |
@@ -56,17 +68,20 @@ for i in range(len(df_page)):
| | Core Number | Single-GPU | |
| Layout | | | |
| | Force Atlas 2 | Single-GPU | |
+| Linear Assignment| | | |
+| | Hungarian | Single-GPU | [README](cpp/src/linear_assignment/README-hungarian.md) |
| Link Analysis| | | |
| | Pagerank | Multi-GPU | |
| | Personal Pagerank | Multi-GPU | |
-| | HITS | Single-GPU | leverages Gunrock |
+| | HITS | Single-GPU | leverages Gunrock |
| Link Prediction | | | |
| | Jaccard Similarity | Single-GPU | |
| | Weighted Jaccard Similarity | Single-GPU | |
| | Overlap Similarity | Single-GPU | |
| Traversal | | | |
-| | Breadth First Search (BFS) | Multi-GPU | |
+| | Breadth First Search (BFS) | Multi-GPU | with cutoff support |
| | Single Source Shortest Path (SSSP) | Multi-GPU | |
+| | Traveling Salesperson Problem (TSP) | Single-GPU | |
| Structure | | | |
| | Renumbering | Single-GPU | multiple columns, any data type |
| | Symmetrize | Multi-GPU | |
@@ -74,7 +89,6 @@ for i in range(len(df_page)):
| | Hungarian Algorithm | Single-GPU | |
| | Minimum Spanning Tree | Single-GPU | |
| | Maximum Spanning Tree | Single-GPU | |
-
| | |
@@ -83,13 +97,13 @@ for i in range(len(df_page)):
| --------------- | --------------------------------------------------- |
| Graph | An undirected Graph |
| DiGraph | A Directed Graph |
-| _Multigraph_ | _coming in 0.18_ |
-| _MultiDigraph_ | _coming in 0.18_ |
+| Multigraph | A Graph with multiple edges between a vertex pair |
+| MultiDigraph | A Directed Graph with multiple edges between a vertex pair |
| | |
## Supported Data Types
-cuGraph supports the creation of a graph several data types:
+cuGraph supports graph creation with Source and Destination being expressed as:
* cuDF DataFrame
* Pandas DataFrame
@@ -123,22 +137,14 @@ The amount of memory required is dependent on the graph structure and the analyt
The use of managed memory for oversubscription can also be used to exceed the above memory limitations. See the recent blog on _Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs_: https://medium.com/rapids-ai/tackling-large-graphs-with-rapids-cugraph-and-unified-virtual-memory-b5b69a065d4
+
-## Getting cuGraph
-### Intro
-There are 3 ways to get cuGraph :
-1. [Quick start with Docker Demo Repo](#quick)
-2. [Conda Installation](#conda)
-3. [Build from Source](#source)
-
-
-
-
+---
## Quick Start
-Please see the [Demo Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize all of the RAPIDS libraries: cuDF, cuML, and cuGraph.
+Please see the [Docker Repository](https://hub.docker.com/r/rapidsai/rapidsai/), choosing a tag based on the NVIDIA CUDA version you’re running. This provides a ready to run Docker container with example notebooks and data, showcasing how you can utilize all of the RAPIDS libraries: cuDF, cuML, and cuGraph.
-### Conda
+## Conda
It is easy to install cuGraph using conda. You can get a minimal conda installation with [Miniconda](https://conda.io/miniconda.html) or get the full installation with [Anaconda](https://www.anaconda.com/download).
Install and update cuGraph using the conda command:
@@ -158,7 +164,7 @@ conda install -c nvidia -c rapidsai -c numba -c conda-forge -c defaults cugraph
Note: This conda installation only applies to Linux and Python versions 3.7/3.8.
-### Build from Source and Contributing
+## Build from Source and Contributing
Please see our [guide for building cuGraph from source](SOURCEBUILD.md)
diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md
index 8acd90c4f7f..0cbf6ccdaa3 100644
--- a/SOURCEBUILD.md
+++ b/SOURCEBUILD.md
@@ -1,6 +1,6 @@
# Building from Source
-The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested.
+The following instructions are for users wishing to build cuGraph from source code. These instructions are tested on supported distributions of Linux, CUDA, and Python - See [RAPIDS Getting Started](https://rapids.ai/start.html) for list of supported environments. Other operating systems _might be_ compatible, but are not currently tested.
The cuGraph package include both a C/C++ CUDA portion and a python portion. Both libraries need to be installed in order for cuGraph to operate correctly.
@@ -9,7 +9,7 @@ The cuGraph package include both a C/C++ CUDA portion and a python portion. Bot
__Compiler__:
* `gcc` version 5.4+
* `nvcc` version 10.0+
-* `cmake` version 3.12+
+* `cmake` version 3.18+
__CUDA:__
* CUDA 10.1+
@@ -97,17 +97,21 @@ There are several other options available on the build script for advanced users
`build.sh` options:
```bash
build.sh [ ...] [ ...]
- clean - remove all existing build artifacts and configuration (start over)
- libcugraph - build the cugraph C++ code
- cugraph - build the cugraph Python package
-
+ where is:
+ clean - remove all existing build artifacts and configuration (start over)
+ libcugraph - build the cugraph C++ code
+ cugraph - build the cugraph Python package
+ docs - build the docs
and is:
-v - verbose build mode
-g - build for debug
-n - no install step
+ --allgpuarch - build for all supported GPU architectures
--show_depr_warn - show cmake deprecation warnings
-h - print this text
+ default action (no args) is to build and install 'libcugraph' then 'cugraph' then 'docs' targets
+
examples:
$ ./build.sh clean # remove prior build artifacts (start over)
$ ./build.sh libcugraph -v # compile and install libcugraph with verbose output
@@ -189,7 +193,7 @@ Run either the C++ or the Python tests with datasets
```bash
cd $CUGRAPH_HOME/datasets
- source get_test_data.sh #This takes about 10 minutes and download 1GB data (>5 GB uncompressed)
+ source get_test_data.sh #This takes about 10 minutes and downloads 1GB data (>5 GB uncompressed)
```
Run the C++ tests on large input:
diff --git a/benchmarks/bench_algos.py b/benchmarks/bench_algos.py
index 9be636ca480..f9f8bf9cf53 100644
--- a/benchmarks/bench_algos.py
+++ b/benchmarks/bench_algos.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -32,6 +32,7 @@ def setFixtureParamNames(*args, **kwargs):
import cugraph
from cugraph.structure.number_map import NumberMap
from cugraph.tests import utils
+from cugraph.utilities.utils import is_device_version_less_than
import rmm
from .params import FIXTURE_PARAMS
@@ -212,6 +213,8 @@ def bench_jaccard(gpubenchmark, graphWithAdjListComputed):
gpubenchmark(cugraph.jaccard, graphWithAdjListComputed)
+@pytest.mark.skipif(
+ is_device_version_less_than((7, 0)), reason="Not supported on Pascal")
def bench_louvain(gpubenchmark, graphWithAdjListComputed):
gpubenchmark(cugraph.louvain, graphWithAdjListComputed)
diff --git a/build.sh b/build.sh
index b3d3463ed4e..54634e2ca6e 100755
--- a/build.sh
+++ b/build.sh
@@ -1,6 +1,6 @@
#!/bin/bash
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# cugraph build script
@@ -19,18 +19,20 @@ ARGS=$*
REPODIR=$(cd $(dirname $0); pwd)
LIBCUGRAPH_BUILD_DIR=${LIBCUGRAPH_BUILD_DIR:=${REPODIR}/cpp/build}
-VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --show_depr_warn -h --help"
+VALIDARGS="clean libcugraph cugraph docs -v -g -n --allgpuarch --buildfaiss --show_depr_warn -h --help"
HELP="$0 [ ...] [ ...]
where is:
clean - remove all existing build artifacts and configuration (start over)
libcugraph - build the cugraph C++ code
cugraph - build the cugraph Python package
+ cpp-mgtests - build libcugraph mnmg tests. Builds MPI communicator, adding MPI as a dependency.
docs - build the docs
and is:
-v - verbose build mode
-g - build for debug
-n - no install step
--allgpuarch - build for all supported GPU architectures
+ --buildfaiss - build faiss statically into cugraph
--show_depr_warn - show cmake deprecation warnings
-h - print this text
@@ -44,10 +46,12 @@ CUGRAPH_BUILD_DIR=${REPODIR}/python/build
BUILD_DIRS="${LIBCUGRAPH_BUILD_DIR} ${CUGRAPH_BUILD_DIR}"
# Set defaults for vars modified by flags to this script
-VERBOSE=""
+VERBOSE_FLAG=""
BUILD_TYPE=Release
INSTALL_TARGET=install
BUILD_DISABLE_DEPRECATION_WARNING=ON
+BUILD_CPP_MG_TESTS=OFF
+BUILD_STATIC_FAISS=OFF
GPU_ARCH=""
# Set defaults for vars that may not have been defined externally
@@ -82,7 +86,7 @@ fi
# Process flags
if hasArg -v; then
- VERBOSE=1
+ VERBOSE_FLAG="-v"
fi
if hasArg -g; then
BUILD_TYPE=Debug
@@ -93,9 +97,15 @@ fi
if hasArg --allgpuarch; then
GPU_ARCH="-DGPU_ARCHS=ALL"
fi
+if hasArg --buildfaiss; then
+ BUILD_STATIC_FAISS=ON
+fi
if hasArg --show_depr_warn; then
BUILD_DISABLE_DEPRECATION_WARNING=OFF
fi
+if hasArg cpp-mgtests; then
+ BUILD_CPP_MG_TESTS=ON
+fi
# If clean given, run it prior to any other steps
if hasArg clean; then
@@ -127,10 +137,13 @@ if buildAll || hasArg libcugraph; then
mkdir -p ${LIBCUGRAPH_BUILD_DIR}
cd ${LIBCUGRAPH_BUILD_DIR}
cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
- ${GPU_ARCH} \
- -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
- -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp
- make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} ${INSTALL_TARGET}
+ ${GPU_ARCH} \
+ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
+ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
+ -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS} \
+ -DBUILD_CUGRAPH_MG_TESTS=${BUILD_CPP_MG_TESTS} \
+ ${REPODIR}/cpp
+ cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target ${INSTALL_TARGET} ${VERBOSE_FLAG}
fi
# Build and install the cugraph Python package
@@ -152,10 +165,11 @@ if buildAll || hasArg docs; then
cd ${LIBCUGRAPH_BUILD_DIR}
cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \
-DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \
- -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp
+ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} ${REPODIR}/cpp \
+ -DBUILD_STATIC_FAISS=${BUILD_STATIC_FAISS}
fi
cd ${LIBCUGRAPH_BUILD_DIR}
- make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} docs_cugraph
+ cmake --build "${LIBCUGRAPH_BUILD_DIR}" -j${PARALLEL_LEVEL} --target docs_cugraph ${VERBOSE_FLAG}
cd ${REPODIR}/docs
make html
fi
diff --git a/ci/cpu/build.sh b/ci/cpu/build.sh
index 2c6dc899be2..d69448cda4e 100755
--- a/ci/cpu/build.sh
+++ b/ci/cpu/build.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
#########################################
# cuGraph CPU conda build script for CI #
#########################################
@@ -24,6 +24,9 @@ fi
export GPUCI_CONDA_RETRY_MAX=1
export GPUCI_CONDA_RETRY_SLEEP=30
+# Use Ninja to build
+export CMAKE_GENERATOR="Ninja"
+
################################################################################
# SETUP - Check environment
################################################################################
diff --git a/ci/getGTestTimes.sh b/ci/getGTestTimes.sh
deleted file mode 100755
index 8a3752d76e2..00000000000
--- a/ci/getGTestTimes.sh
+++ /dev/null
@@ -1,46 +0,0 @@
-#!/bin/bash
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script will print the gtest results sorted by runtime. This will print
-# the results two ways: first by printing all tests sorted by runtime, then by
-# printing all tests grouped by test binary with tests sorted by runtime within
-# the group.
-#
-# To use this script, capture the test run output to a file then run this script
-# with the file as the first arg, or just redirect test output to this script.
-
-awk '/^Running GoogleTest .+$/ {
- testbinary = $3
- }
- /^\[ OK \].+$/ {
- testtime = substr($(NF-1),2)
- newtestdata = testbinary ":" substr($0,14)
- alltestdata = alltestdata newtestdata "\n"
- testdata[testbinary] = testdata[testbinary] newtestdata "\n"
- totaltime = totaltime + testtime
- }
- END {
- # Print all tests sorted by time
- system("echo \"" alltestdata "\" | sort -r -t\\( -nk2")
- print "\n================================================================================"
- # Print test binaries with tests sorted by time
- print "Tests grouped by test binary:"
- for (testbinary in testdata) {
- print testbinary
- system("echo \"" testdata[testbinary] "\" | sort -r -t\\( -nk2")
- }
- print "\n================================================================================"
- print totaltime " milliseconds = " totaltime/60000 " minutes"
- }
-' $1
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 019d03e21da..0fef7b62f8d 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -1,10 +1,10 @@
#!/usr/bin/env bash
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
##########################################
# cuGraph GPU build & testscript for CI #
##########################################
-set -e
-set -o pipefail
+set -e # abort the script on error, this will change for running tests (see below)
+set -o pipefail # piped commands propagate their error
NUMARGS=$#
ARGS=$*
@@ -98,10 +98,15 @@ fi
# TEST - Run GoogleTest and py.tests for libcugraph and cuGraph
################################################################################
-set +e -Eo pipefail
-EXITCODE=0
+# Switch to +e to allow failing commands to continue the script, which is needed
+# so all testing commands run regardless of pass/fail. This requires the desired
+# exit code to be managed using the ERR trap.
+set +e # allow script to continue on error
+set -E # ERR traps are inherited by subcommands
trap "EXITCODE=1" ERR
+EXITCODE=0
+
if hasArg --skip-tests; then
gpuci_logger "Skipping Tests"
else
@@ -117,18 +122,19 @@ else
TEST_MODE_FLAG=""
fi
+ gpuci_logger "Running cuGraph test.sh..."
${WORKSPACE}/ci/test.sh ${TEST_MODE_FLAG} | tee testoutput.txt
+ gpuci_logger "Ran cuGraph test.sh : return code was: $?, gpu/build.sh exit code is now: $EXITCODE"
- echo -e "\nTOP 20 SLOWEST TESTS:\n"
- # Wrap in echo to prevent non-zero exit since this command is non-essential
- echo "$(${WORKSPACE}/ci/getGTestTimes.sh testoutput.txt | head -20)"
-
+ gpuci_logger "Running cuGraph notebook test script..."
${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log
+ gpuci_logger "Ran cuGraph notebook test script : return code was: $?, gpu/build.sh exit code is now: $EXITCODE"
python ${WORKSPACE}/ci/utils/nbtestlog2junitxml.py nbtest.log
fi
-if [ -n "\${CODECOV_TOKEN}" ]; then
- codecov -t \$CODECOV_TOKEN
+if [ -n "${CODECOV_TOKEN}" ]; then
+ codecov -t $CODECOV_TOKEN
fi
+gpuci_logger "gpu/build.sh returning value: $EXITCODE"
return ${EXITCODE}
diff --git a/ci/gpu/notebook_list.py b/ci/gpu/notebook_list.py
new file mode 100644
index 00000000000..bb54913ac8d
--- /dev/null
+++ b/ci/gpu/notebook_list.py
@@ -0,0 +1,48 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import re
+import sys
+import glob
+
+from numba import cuda
+
+#
+# Not strictly true... however what we mean is
+# Pascal or earlier
+#
+pascal = False
+
+device = cuda.get_current_device()
+cc = getattr(device, 'COMPUTE_CAPABILITY')
+if (cc[0] < 7):
+ pascal = True
+
+for filename in glob.iglob('**/*.ipynb', recursive=True):
+ skip = False
+ for line in open(filename, 'r'):
+ if re.search('# Skip notebook test', line):
+ skip = True
+ print(f'SKIPPING {filename} (marked as skip)', file=sys.stderr)
+ break;
+ elif re.search('dask', line):
+ print(f'SKIPPING {filename} (suspected Dask usage, not currently automatable)', file=sys.stderr)
+ skip = True
+ break;
+ elif pascal and re.search('# Does not run on Pascal', line):
+ print(f'SKIPPING {filename} (does not run on Pascal)', file=sys.stderr)
+ skip = True
+ break;
+
+ if not skip:
+ print(filename)
diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh
index 389d3be0bfd..650132f116d 100755
--- a/ci/gpu/test-notebooks.sh
+++ b/ci/gpu/test-notebooks.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -12,23 +12,27 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-#RAPIDS_DIR=/rapids
+# Any failing command will set EXITCODE to non-zero
+set -e # abort the script on error, this will change for running tests (see below)
+set -o pipefail # piped commands propagate their error
+set -E # ERR traps are inherited by subcommands
+trap "EXITCODE=1" ERR
+
NOTEBOOKS_DIR=${WORKSPACE}/notebooks
NBTEST=${WORKSPACE}/ci/utils/nbtest.sh
LIBCUDF_KERNEL_CACHE_PATH=${WORKSPACE}/.jitcache
+EXITCODE=0
cd ${NOTEBOOKS_DIR}
TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u)
-# Add notebooks that should be skipped here
-# (space-separated list of filenames without paths)
-
-SKIPNBS="uvm.ipynb bfs_benchmark.ipynb louvain_benchmark.ipynb pagerank_benchmark.ipynb sssp_benchmark.ipynb release.ipynb nx_cugraph_bc_benchmarking.ipynb"
-
## Check env
env
-EXITCODE=0
+# Do not abort the script on error. This allows all tests to run regardless of
+# pass/fail but relies on the ERR trap above to manage the EXITCODE for the
+# script.
+set +e
# Always run nbtest in all TOPLEVEL_NB_FOLDERS, set EXITCODE to failure
# if any run fails
@@ -37,29 +41,20 @@ for folder in ${TOPLEVEL_NB_FOLDERS}; do
echo "FOLDER: ${folder}"
echo "========================================"
cd ${NOTEBOOKS_DIR}/${folder}
- for nb in $(find . -name "*.ipynb"); do
+ NBLIST=$(python ${WORKSPACE}/ci/gpu/notebook_list.py)
+ for nb in ${NBLIST}; do
nbBasename=$(basename ${nb})
- # Skip all NBs that use dask (in the code or even in their name)
- if ((echo ${nb}|grep -qi dask) || \
- (grep -q dask ${nb})); then
- echo "--------------------------------------------------------------------------------"
- echo "SKIPPING: ${nb} (suspected Dask usage, not currently automatable)"
- echo "--------------------------------------------------------------------------------"
- elif (echo " ${SKIPNBS} " | grep -q " ${nbBasename} "); then
- echo "--------------------------------------------------------------------------------"
- echo "SKIPPING: ${nb} (listed in skip list)"
- echo "--------------------------------------------------------------------------------"
- else
- cd $(dirname ${nb})
- nvidia-smi
- ${NBTEST} ${nbBasename}
- EXITCODE=$((EXITCODE | $?))
- rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/*
- cd ${NOTEBOOKS_DIR}/${folder}
- fi
+ cd $(dirname ${nb})
+ nvidia-smi
+ ${NBTEST} ${nbBasename}
+ echo "Ran nbtest for $nb : return code was: $?, test script exit code is now: $EXITCODE"
+ echo
+ rm -rf ${LIBCUDF_KERNEL_CACHE_PATH}/*
+ cd ${NOTEBOOKS_DIR}/${folder}
done
done
nvidia-smi
+echo "Notebook test script exiting with value: $EXITCODE"
exit ${EXITCODE}
diff --git a/ci/test.sh b/ci/test.sh
index db9390461c0..b0134e97246 100755
--- a/ci/test.sh
+++ b/ci/test.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -12,9 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-# note: do not use set -e in order to allow all gtest invocations to take place,
-# and instead keep track of exit status and exit with an overall exit status
-set -o pipefail
+# Any failing command will set EXITCODE to non-zero
+set -e # abort the script on error, this will change for running tests (see below)
+set -o pipefail # piped commands propagate their error
+set -E # ERR traps are inherited by subcommands
+trap "EXITCODE=1" ERR
NUMARGS=$#
ARGS=$*
@@ -22,7 +24,7 @@ THISDIR=$(cd $(dirname $0);pwd)
CUGRAPH_ROOT=$(cd ${THISDIR}/..;pwd)
GTEST_ARGS="--gtest_output=xml:${CUGRAPH_ROOT}/test-results/"
DOWNLOAD_MODE=""
-ERRORCODE=0
+EXITCODE=0
export RAPIDS_DATASET_ROOT_DIR=${CUGRAPH_ROOT}/datasets
@@ -50,47 +52,61 @@ else
echo "Download datasets..."
cd ${RAPIDS_DATASET_ROOT_DIR}
bash ./get_test_data.sh ${DOWNLOAD_MODE}
- ERRORCODE=$((ERRORCODE | $?))
- # no need to run tests if dataset download fails
- if (( ${ERRORCODE} != 0 )); then
- exit ${ERRORCODE}
- fi
fi
if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
cd ${CUGRAPH_ROOT}/cpp/build
else
- export LD_LIBRARY_PATH="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build:$LD_LIBRARY_PATH"
+ export LD_LIBRARY_PATH="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build:$CONDA_PREFIX/lib:$LD_LIBRARY_PATH"
cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build
fi
-for gt in gtests/*; do
- test_name=$(basename $gt)
- echo "Running GoogleTest $test_name"
- ${gt} ${GTEST_FILTER} ${GTEST_ARGS}
- ERRORCODE=$((ERRORCODE | $?))
-done
-
+# FIXME: if possible, any install and build steps should be moved outside this
+# script since a failing install/build step is treated as a failing test command
+# and will not stop the script. This script is also only expected to run tests
+# in a preconfigured environment, and install/build steps are unexpected side
+# effects.
if [[ "$PROJECT_FLASH" == "1" ]]; then
+ export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build"
+
+ # Faiss patch
+ echo "Update libcugraph.so"
+ cd $LIBCUGRAPH_BUILD_DIR
+ chrpath -d libcugraph.so
+ patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so
+
CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"`
CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension
CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install
echo "Installing $CONDA_FILE"
conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE"
- export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build"
echo "Build cugraph..."
$WORKSPACE/build.sh cugraph
fi
+# Do not abort the script on error from this point on. This allows all tests to
+# run regardless of pass/fail, but relies on the ERR trap above to manage the
+# EXITCODE for the script.
+set +e
+
+echo "C++ gtests for cuGraph..."
+for gt in tests/*_TEST; do
+ test_name=$(basename $gt)
+ echo "Running gtest $test_name"
+ ${gt} ${GTEST_FILTER} ${GTEST_ARGS}
+ echo "Ran gtest $test_name : return code was: $?, test script exit code is now: $EXITCODE"
+done
+
echo "Python pytest for cuGraph..."
cd ${CUGRAPH_ROOT}/python
pytest --cache-clear --junitxml=${CUGRAPH_ROOT}/junit-cugraph.xml -v --cov-config=.coveragerc --cov=cugraph --cov-report=xml:${WORKSPACE}/python/cugraph/cugraph-coverage.xml --cov-report term --ignore=cugraph/raft --benchmark-disable
-ERRORCODE=$((ERRORCODE | $?))
+echo "Ran Python pytest for cugraph : return code was: $?, test script exit code is now: $EXITCODE"
echo "Python benchmarks for cuGraph (running as tests)..."
cd ${CUGRAPH_ROOT}/benchmarks
pytest -v -m "managedmem_on and poolallocator_on and tiny" --benchmark-disable
-ERRORCODE=$((ERRORCODE | $?))
+echo "Ran Python benchmarks for cuGraph (running as tests) : return code was: $?, test script exit code is now: $EXITCODE"
-exit ${ERRORCODE}
+echo "Test script exiting with value: $EXITCODE"
+exit ${EXITCODE}
diff --git a/ci/utils/nbtest.sh b/ci/utils/nbtest.sh
index 8c86baeaa09..ae8b52df106 100755
--- a/ci/utils/nbtest.sh
+++ b/ci/utils/nbtest.sh
@@ -1,5 +1,5 @@
#!/bin/bash
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -12,6 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+# Any failing command will set EXITCODE to non-zero
+set +e # do not abort the script on error
+set -o pipefail # piped commands propagate their error
+set -E # ERR traps are inherited by subcommands
+trap "EXITCODE=1" ERR
+
+# Prepend the following code to all scripts generated from nbconvert. This
+# allows all cell and line magic code to run and update the namespace as if
+# running in jupyter, but will also tolerate failures due to running in a
+# non-jupyter env.
+# Note: depending on the assumptions of the notebook script, ignoring failures
+# may not be acceptable (meaning the converted notebook simply cannot run
+# outside of jupyter as-is), hence the warning.
MAGIC_OVERRIDE_CODE="
def my_run_line_magic(*args, **kwargs):
g=globals()
@@ -58,7 +71,6 @@ for nb in $*; do
NBEXITCODE=$?
echo EXIT CODE: ${NBEXITCODE}
echo
- EXITCODE=$((EXITCODE | ${NBEXITCODE}))
done
exit ${EXITCODE}
diff --git a/conda/environments/builddocs.yml b/conda/environments/builddocs.yml
deleted file mode 100644
index 89bd44a5542..00000000000
--- a/conda/environments/builddocs.yml
+++ /dev/null
@@ -1,19 +0,0 @@
-name: builddocs
-channels:
-- rapidsai
-- pytorch
-- conda-forge
-- numba
-- defaults
-dependencies:
-- python=3.6*
-- cugraph=0.8*
-- cudatoolkit=9.2
-- cudf=0.8*
-- pyarrow=0.12.1.*
-- cython=0.29*
-- pip:
- - numpydoc
- - sphinx
- - sphinx-rtd-theme
- - sphinxcontrib-websupport
diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml
index 067fd0bc4ba..255366b0a82 100644
--- a/conda/environments/cugraph_dev_cuda10.1.yml
+++ b/conda/environments/cugraph_dev_cuda10.1.yml
@@ -5,17 +5,17 @@ channels:
- rapidsai-nightly
- conda-forge
dependencies:
-- cudf=0.18.*
-- libcudf=0.18.*
-- rmm=0.18.*
-- cuxfilter=0.18.*
-- librmm=0.18.*
+- cudf=0.19.*
+- libcudf=0.19.*
+- rmm=0.19.*
+- cuxfilter=0.19.*
+- librmm=0.19.*
- dask>=2.12.0
- distributed>=2.12.0
-- dask-cuda=0.18*
-- dask-cudf=0.18*
+- dask-cuda=0.19*
+- dask-cudf=0.19*
- nccl>=2.7
-- ucx-py=0.18*
+- ucx-py=0.19*
- ucx-proc=*=gpu
- scipy
- networkx
@@ -23,12 +23,14 @@ dependencies:
- cudatoolkit=10.1
- clang=8.0.1
- clang-tools=8.0.1
-- cmake>=3.12
+- cmake>=3.18
- python>=3.6,<3.9
- notebook>=0.5.0
- boost
- cython>=0.29,<0.30
- pytest
+- libfaiss=1.6.3
+- faiss-proc=*=cuda
- scikit-learn>=0.23.1
- colorcet
- holoviews
@@ -44,3 +46,6 @@ dependencies:
- libcypher-parser
- rapids-pytest-benchmark
- doxygen
+- pytest-cov
+- gtest
+- gmock
diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml
index 3371340d8bd..e64d7c77b7d 100644
--- a/conda/environments/cugraph_dev_cuda10.2.yml
+++ b/conda/environments/cugraph_dev_cuda10.2.yml
@@ -5,17 +5,17 @@ channels:
- rapidsai-nightly
- conda-forge
dependencies:
-- cudf=0.18.*
-- libcudf=0.18.*
-- rmm=0.18.*
-- cuxfilter=0.18.*
-- librmm=0.18.*
+- cudf=0.19.*
+- libcudf=0.19.*
+- rmm=0.19.*
+- cuxfilter=0.19.*
+- librmm=0.19.*
- dask>=2.12.0
- distributed>=2.12.0
-- dask-cuda=0.18*
-- dask-cudf=0.18*
+- dask-cuda=0.19*
+- dask-cudf=0.19*
- nccl>=2.7
-- ucx-py=0.18*
+- ucx-py=0.19*
- ucx-proc=*=gpu
- scipy
- networkx
@@ -23,12 +23,14 @@ dependencies:
- cudatoolkit=10.2
- clang=8.0.1
- clang-tools=8.0.1
-- cmake>=3.12
+- cmake>=3.18
- python>=3.6,<3.9
- notebook>=0.5.0
- boost
- cython>=0.29,<0.30
- pytest
+- libfaiss=1.6.3
+- faiss-proc=*=cuda
- scikit-learn>=0.23.1
- colorcet
- holoviews
@@ -44,3 +46,6 @@ dependencies:
- libcypher-parser
- rapids-pytest-benchmark
- doxygen
+- pytest-cov
+- gtest
+- gmock
diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml
index ee3b57632a1..1f05e4762ef 100644
--- a/conda/environments/cugraph_dev_cuda11.0.yml
+++ b/conda/environments/cugraph_dev_cuda11.0.yml
@@ -5,17 +5,17 @@ channels:
- rapidsai-nightly
- conda-forge
dependencies:
-- cudf=0.18.*
-- libcudf=0.18.*
-- rmm=0.18.*
-- cuxfilter=0.18.*
-- librmm=0.18.*
+- cudf=0.19.*
+- libcudf=0.19.*
+- rmm=0.19.*
+- cuxfilter=0.19.*
+- librmm=0.19.*
- dask>=2.12.0
- distributed>=2.12.0
-- dask-cuda=0.18*
-- dask-cudf=0.18*
+- dask-cuda=0.19*
+- dask-cudf=0.19*
- nccl>=2.7
-- ucx-py=0.18*
+- ucx-py=0.19*
- ucx-proc=*=gpu
- scipy
- networkx
@@ -23,12 +23,14 @@ dependencies:
- cudatoolkit=11.0
- clang=8.0.1
- clang-tools=8.0.1
-- cmake>=3.12
+- cmake>=3.18
- python>=3.6,<3.9
- notebook>=0.5.0
- boost
- cython>=0.29,<0.30
- pytest
+- libfaiss=1.6.3
+- faiss-proc=*=cuda
- scikit-learn>=0.23.1
- colorcet
- holoviews
@@ -44,3 +46,6 @@ dependencies:
- libcypher-parser
- rapids-pytest-benchmark
- doxygen
+- pytest-cov
+- gtest
+- gmock
diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml
index 211ec920d27..8f7495eab3c 100644
--- a/conda/recipes/libcugraph/meta.yaml
+++ b/conda/recipes/libcugraph/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Usage:
# conda build -c nvidia -c rapidsai -c conda-forge -c defaults .
@@ -21,6 +21,13 @@ build:
- CUDAHOSTCXX
- PARALLEL_LEVEL
- VERSION_SUFFIX
+ - CCACHE_DIR
+ - CCACHE_NOHASHDIR
+ - CCACHE_COMPILERCHECK
+ - CMAKE_GENERATOR
+ - CMAKE_C_COMPILER_LAUNCHER
+ - CMAKE_CXX_COMPILER_LAUNCHER
+ - CMAKE_CUDA_COMPILER_LAUNCHER
requirements:
build:
@@ -32,12 +39,18 @@ requirements:
- nccl>=2.7
- ucx-py {{ minor_version }}
- ucx-proc=*=gpu
+ - gtest
+ - faiss-proc=*=cuda
+ - libfaiss=1.6.3
+ - gmock
run:
- libcudf={{ minor_version }}
- {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
- nccl>=2.7
- ucx-py {{ minor_version }}
- ucx-proc=*=gpu
+ - faiss-proc=*=cuda
+ - libfaiss=1.6.3
#test:
# commands:
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index bd122fc1fb2..b2d537edaa2 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1,5 +1,5 @@
#=============================================================================
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
@@ -14,9 +14,9 @@
# limitations under the License.
#=============================================================================
-cmake_minimum_required(VERSION 3.12..3.17 FATAL_ERROR)
+cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR)
-project(CUGRAPH VERSION 0.18.0 LANGUAGES C CXX CUDA)
+project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA)
###################################################################################################
# - build type ------------------------------------------------------------------------------------
@@ -33,6 +33,18 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
"Debug" "Release" "MinSizeRel" "RelWithDebInfo")
endif()
+##############################################################################
+# - User Options ------------------------------------------------------------
+
+option(BUILD_CUGRAPH_MG_TESTS "Build cuGraph multigpu algorithm tests" OFF)
+
+###################################################################################################
+# - user options ------------------------------------------------------------------------------
+
+set(BLAS_LIBRARIES "" CACHE STRING
+ "Location of BLAS library for FAISS build.")
+option(BUILD_STATIC_FAISS "Build the FAISS library for nearest neighbors search on GPU" OFF)
+
###################################################################################################
# - compiler options ------------------------------------------------------------------------------
@@ -90,10 +102,12 @@ message("-- Building for GPU_ARCHS = ${GPU_ARCHS}")
foreach(arch ${GPU_ARCHS})
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${arch},code=sm_${arch}")
set(GUNROCK_GENCODE_SM${arch} "ON")
+ set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${arch},code=sm_${arch}")
endforeach()
list(GET GPU_ARCHS -1 ptx)
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${ptx},code=compute_${ptx}")
+set(FAISS_GPU_ARCHS "${FAISS_GPU_ARCHS} -gencode arch=compute_${ptx},code=compute_${ptx}")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-constexpr")
set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings")
@@ -152,23 +166,24 @@ if(OpenMP_FOUND)
endif(OpenMP_FOUND)
+###################################################################################################
+# - find blas -------------------------------------------------------------------------------------
+
+if(NOT DEFINED BLAS_LIBRARIES)
+ find_package( BLAS REQUIRED )
+else()
+ message(STATUS "Manually setting BLAS to ${BLAS_LIBRARIES}")
+endif()
+
###################################################################################################
# - find gtest ------------------------------------------------------------------------------------
if(BUILD_TESTS)
- include(ConfigureGoogleTest)
-
- if(GTEST_FOUND)
- message(STATUS
- "Google C++ Testing Framework (Google Test) found in ${GTEST_ROOT}")
- else()
- message(AUTHOR_WARNING
- "Google C++ Testing Framework (Google Test) not found: automated tests are disabled.")
- endif(GTEST_FOUND)
+ find_package(GTest REQUIRED)
endif(BUILD_TESTS)
###################################################################################################
-# - RMM -------------------------------------------------------------------------------------------
+# - find RMM --------------------------------------------------------------------------------------
find_path(RMM_INCLUDE "rmm"
HINTS
@@ -178,6 +193,24 @@ find_path(RMM_INCLUDE "rmm"
message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}")
+###################################################################################################
+# - find NCCL -------------------------------------------------------------------------------------
+
+if(NOT NCCL_PATH)
+ find_package(NCCL REQUIRED)
+else()
+ message("-- Manually set NCCL PATH to ${NCCL_PATH}")
+ set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include)
+ set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so)
+endif(NOT NCCL_PATH)
+
+###################################################################################################
+# - find MPI - only enabled if MG tests are to be built
+
+if(BUILD_CUGRAPH_MG_TESTS)
+ find_package(MPI REQUIRED)
+endif(BUILD_CUGRAPH_MG_TESTS)
+
###################################################################################################
# - Fetch Content ---------------------------------------------------------------------------------
include(FetchContent)
@@ -205,7 +238,7 @@ message("Fetching cuco")
FetchContent_Declare(
cuco
GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git
- GIT_TAG d965ed8dea8f56da8e260a6130dddf3ca351c45f
+ GIT_TAG 2196040f0562a0280292eebef5295d914f615e63
)
FetchContent_GetProperties(cuco)
@@ -235,26 +268,60 @@ endif()
set(LIBCUDACXX_INCLUDE_DIR "${libcudacxx_SOURCE_DIR}/include")
message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}")
+# - CUHORNET
+FetchContent_Declare(
+ cuhornet
+ GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git
+ GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad
+ GIT_SHALLOW true
+ SOURCE_SUBDIR hornet
+)
+
+FetchContent_GetProperties(cuhornet)
+if(NOT cuhornet_POPULATED)
+ message("populating cuhornet")
+ FetchContent_Populate(cuhornet)
+ # We are not using the cuhornet CMake targets, so no need to call `add_subdirectory()`.
+endif()
+set(CUHORNET_INCLUDE_DIR ${cuhornet_SOURCE_DIR} CACHE STRING "Path to cuhornet includes")
+
+# - raft - (header only)
+# Only cloned if RAFT_PATH env variable is not defined
+if(DEFINED ENV{RAFT_PATH})
+ message(STATUS "RAFT_PATH environment variable detected.")
+ message(STATUS "RAFT_DIR set to $ENV{RAFT_PATH}")
+ set(RAFT_DIR "$ENV{RAFT_PATH}")
+
+else(DEFINED ENV{RAFT_PATH})
+ message(STATUS "RAFT_PATH environment variable NOT detected, cloning RAFT")
+
+ FetchContent_Declare(
+ raft
+ GIT_REPOSITORY https://github.com/rapidsai/raft.git
+ GIT_TAG 4a79adcb0c0e87964dcdc9b9122f242b5235b702
+ SOURCE_SUBDIR raft
+ )
+
+ FetchContent_GetProperties(raft)
+ if(NOT raft_POPULATED)
+ message("populating raft")
+ FetchContent_Populate(raft)
+ # We are not using any raft CMake targets, so no need to call `add_subdirectory()`.
+ endif()
+
+ set(RAFT_DIR "${raft_SOURCE_DIR}")
+endif(DEFINED ENV{RAFT_PATH})
###################################################################################################
# - External Projects -----------------------------------------------------------------------------
# https://cmake.org/cmake/help/v3.0/module/ExternalProject.html
-include(ExternalProject)
-
-# - CUHORNET
-set(CUHORNET_DIR ${CMAKE_CURRENT_BINARY_DIR}/cuhornet CACHE STRING "Path to cuhornet repo")
-set(CUHORNET_INCLUDE_DIR ${CUHORNET_DIR}/src/cuhornet CACHE STRING "Path to cuhornet includes")
+# FIXME: gunrock is the only external package still using ExternalProject
+# instead of FetchContent. Consider migrating to FetchContent soon (this may
+# require updates to the gunrock cmake files to support this).
-ExternalProject_Add(cuhornet
- GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git
- GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad
- PREFIX ${CUHORNET_DIR}
- CONFIGURE_COMMAND ""
- BUILD_COMMAND ""
- INSTALL_COMMAND ""
-)
+include(ExternalProject)
# - GUNROCK
set(GUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/gunrock CACHE STRING "Path to gunrock repo")
@@ -262,7 +329,7 @@ set(GUNROCK_INCLUDE_DIR ${GUNROCK_DIR}/src/gunrock_ext CACHE STRING "Path to gun
ExternalProject_Add(gunrock_ext
GIT_REPOSITORY https://github.com/gunrock/gunrock.git
- GIT_TAG dev
+ GIT_TAG v1.2
PREFIX ${GUNROCK_DIR}
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=
-DGUNROCK_BUILD_SHARED_LIBS=OFF
@@ -280,74 +347,61 @@ ExternalProject_Add(gunrock_ext
)
add_library(gunrock STATIC IMPORTED)
-
add_dependencies(gunrock gunrock_ext)
-
set_property(TARGET gunrock PROPERTY IMPORTED_LOCATION ${GUNROCK_DIR}/src/gunrock_ext-build/lib/libgunrock.a)
-# - NCCL
-if(NOT NCCL_PATH)
- find_package(NCCL REQUIRED)
+# - FAISS
+# FIXME: The commit currently being fetched from faiss is using autotools which
+# is more convenient to build with ExternalProjectAdd.
+# Consider migrating to FetchContent once the tagged commit is changed.
+
+if(BUILD_STATIC_FAISS)
+ set(FAISS_DIR ${CMAKE_CURRENT_BINARY_DIR}/faiss CACHE STRING
+ "Path to FAISS source directory")
+ ExternalProject_Add(faiss
+ GIT_REPOSITORY https://github.com/facebookresearch/faiss.git
+ GIT_TAG a5b850dec6f1cd6c88ab467bfd5e87b0cac2e41d
+ CONFIGURE_COMMAND LIBS=-pthread
+ CPPFLAGS=-w
+ LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib
+ ${CMAKE_CURRENT_BINARY_DIR}/faiss/src/faiss/configure
+ --prefix=${CMAKE_CURRENT_BINARY_DIR}/faiss
+ --with-blas=${BLAS_LIBRARIES}
+ --with-cuda=${CUDA_TOOLKIT_ROOT_DIR}
+ --with-cuda-arch=${FAISS_GPU_ARCHS}
+ -v
+ PREFIX ${FAISS_DIR}
+ BUILD_COMMAND make -j${PARALLEL_LEVEL} VERBOSE=1
+ BUILD_BYPRODUCTS ${FAISS_DIR}/lib/libfaiss.a
+ BUILD_ALWAYS 1
+ INSTALL_COMMAND make -s install > /dev/null
+ UPDATE_COMMAND ""
+ BUILD_IN_SOURCE 1
+ PATCH_COMMAND patch -p1 -N < ${CMAKE_CURRENT_SOURCE_DIR}/cmake/faiss_cuda11.patch || true)
+
+ ExternalProject_Get_Property(faiss install_dir)
+ add_library(FAISS::FAISS STATIC IMPORTED)
+ add_dependencies(FAISS::FAISS faiss)
+ set_property(TARGET FAISS::FAISS PROPERTY
+ IMPORTED_LOCATION ${FAISS_DIR}/lib/libfaiss.a)
+ set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src")
else()
- message("-- Manually set NCCL PATH to ${NCCL_PATH}")
- set(NCCL_INCLUDE_DIRS ${NCCL_PATH}/include)
- set(NCCL_LIBRARIES ${NCCL_PATH}/lib/libnccl.so)
-endif(NOT NCCL_PATH)
-
-# - raft - (header only) -----------------------------------------------------
-
-# Only cloned if RAFT_PATH env variable is not defined
-
-if(DEFINED ENV{RAFT_PATH})
- message(STATUS "RAFT_PATH environment variable detected.")
- message(STATUS "RAFT_DIR set to $ENV{RAFT_PATH}")
- set(RAFT_DIR "$ENV{RAFT_PATH}")
-
- ExternalProject_Add(raft
- DOWNLOAD_COMMAND ""
- SOURCE_DIR ${RAFT_DIR}
- CONFIGURE_COMMAND ""
- BUILD_COMMAND ""
- INSTALL_COMMAND "")
-
-else(DEFINED ENV{RAFT_PATH})
- message(STATUS "RAFT_PATH environment variable NOT detected, cloning RAFT")
- set(RAFT_DIR ${CMAKE_CURRENT_BINARY_DIR}/raft CACHE STRING "Path to RAFT repo")
-
- ExternalProject_Add(raft
- GIT_REPOSITORY https://github.com/rapidsai/raft.git
- GIT_TAG f75d7b437bf1da3df749108161b8a0505fb6b7b3
- PREFIX ${RAFT_DIR}
- CONFIGURE_COMMAND ""
- BUILD_COMMAND ""
- INSTALL_COMMAND "")
-
- # Redefining RAFT_DIR so it coincides with the one inferred by env variable.
- set(RAFT_DIR "${RAFT_DIR}/src/raft/")
-endif(DEFINED ENV{RAFT_PATH})
-
+ set(FAISS_INSTALL_DIR ENV{FAISS_ROOT})
+ find_package(FAISS REQUIRED)
+endif(BUILD_STATIC_FAISS)
###################################################################################################
# - library targets -------------------------------------------------------------------------------
-# target_link_directories is added in cmake 3.13, and cmake advises to use this instead of
-# link_directoires (we should switch to target_link_directories once 3.13 becomes the minimum
-# required version).
-link_directories(
- # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the
- # link directories for nvcc.
- "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}")
-
add_library(cugraph SHARED
src/utilities/spmv_1D.cu
src/utilities/cython.cu
src/structure/graph.cu
src/linear_assignment/hungarian.cu
- src/link_analysis/pagerank.cu
- src/link_analysis/pagerank_1D.cu
src/link_analysis/gunrock_hits.cpp
src/traversal/bfs.cu
src/traversal/sssp.cu
+ src/traversal/tsp.cu
src/link_prediction/jaccard.cu
src/link_prediction/overlap.cu
src/layout/force_atlas2.cu
@@ -357,9 +411,10 @@ add_library(cugraph SHARED
src/community/louvain.cu
src/community/leiden.cu
src/community/ktruss.cu
- src/community/ECG.cu
+ src/community/ecg.cu
src/community/triangles_counting.cu
src/community/extract_subgraph_by_vertex.cu
+ src/community/egonet.cu
src/cores/core_number.cu
src/traversal/two_hop_neighbors.cu
src/components/connectivity.cu
@@ -367,6 +422,10 @@ add_library(cugraph SHARED
src/centrality/betweenness_centrality.cu
src/experimental/graph.cu
src/experimental/graph_view.cu
+ src/experimental/coarsen_graph.cu
+ src/experimental/renumber_edgelist.cu
+ src/experimental/relabel.cu
+ src/experimental/induced_subgraph.cu
src/experimental/bfs.cu
src/experimental/sssp.cu
src/experimental/pagerank.cu
@@ -374,12 +433,17 @@ add_library(cugraph SHARED
src/tree/mst.cu
)
+target_link_directories(cugraph
+ PRIVATE
+ # CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES is an undocumented/unsupported variable containing the
+ # link directories for nvcc.
+ "${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}")
+
#
# NOTE: This dependency will force the building of cugraph to
# wait until after cugunrock is constructed.
#
add_dependencies(cugraph gunrock_ext)
-add_dependencies(cugraph raft)
###################################################################################################
# - include paths ---------------------------------------------------------------------------------
@@ -408,7 +472,7 @@ target_include_directories(cugraph
# - link libraries --------------------------------------------------------------------------------
target_link_libraries(cugraph PRIVATE
- gunrock cublas cusparse curand cusolver cudart cuda ${NCCL_LIBRARIES})
+ gunrock cublas cusparse curand cusolver cudart cuda FAISS::FAISS ${NCCL_LIBRARIES})
if(OpenMP_CXX_FOUND)
target_link_libraries(cugraph PRIVATE
@@ -461,16 +525,23 @@ target_link_libraries(cugraph PRIVATE
${OpenMP_CXX_LIB_NAMES})
endif(OpenMP_CXX_FOUND)
+# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the
+# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent cmake
+# warnings about policy CMP0104. With this setting, arch flags must be manually
+# set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism used in
+# cpp/CMakeLists.txt for setting arch options). Run "cmake --help-policy
+# CMP0104" for policy details.
+# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to the
+# findcudatoolkit features in cmake 3.17+
+set_target_properties(cugraph PROPERTIES
+ CUDA_ARCHITECTURES OFF)
+
###################################################################################################
# - generate tests --------------------------------------------------------------------------------
if(BUILD_TESTS)
if(GTEST_FOUND)
- # target_link_directories is added in cmake 3.13, and cmake advises to use this instead of
- # link_directoires (we should switch to target_link_directories once 3.13 becomes the
- # minimum required version).
- link_directories(${GTEST_LIBRARY_DIR})
- add_subdirectory(${CMAKE_SOURCE_DIR}/tests)
+ add_subdirectory(${CMAKE_CURRENT_SOURCE_DIR}/tests)
endif(GTEST_FOUND)
endif(BUILD_TESTS)
diff --git a/cpp/cmake/Modules/ConfigureArrow.cmake b/cpp/cmake/Modules/ConfigureArrow.cmake
deleted file mode 100644
index b27e53dd415..00000000000
--- a/cpp/cmake/Modules/ConfigureArrow.cmake
+++ /dev/null
@@ -1,98 +0,0 @@
-set(ARROW_ROOT ${CMAKE_BINARY_DIR}/arrow)
-
-set(ARROW_CMAKE_ARGS " -DARROW_WITH_LZ4=OFF"
- " -DARROW_WITH_ZSTD=OFF"
- " -DARROW_WITH_BROTLI=OFF"
- " -DARROW_WITH_SNAPPY=OFF"
- " -DARROW_WITH_ZLIB=OFF"
- " -DARROW_BUILD_STATIC=ON"
- " -DARROW_BUILD_SHARED=OFF"
- " -DARROW_BOOST_USE_SHARED=ON"
- " -DARROW_BUILD_TESTS=OFF"
- " -DARROW_TEST_LINKAGE=OFF"
- " -DARROW_TEST_MEMCHECK=OFF"
- " -DARROW_BUILD_BENCHMARKS=OFF"
- " -DARROW_IPC=ON"
- " -DARROW_COMPUTE=OFF"
- " -DARROW_CUDA=OFF"
- " -DARROW_JEMALLOC=OFF"
- " -DARROW_BOOST_VENDORED=OFF"
- " -DARROW_PYTHON=OFF"
- " -DARROW_USE_GLOG=OFF"
- " -DCMAKE_VERBOSE_MAKEFILE=ON")
-
-configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/Arrow.CMakeLists.txt.cmake"
- "${ARROW_ROOT}/CMakeLists.txt")
-
-file(MAKE_DIRECTORY "${ARROW_ROOT}/build")
-file(MAKE_DIRECTORY "${ARROW_ROOT}/install")
-
-execute_process(
- COMMAND ${CMAKE_COMMAND} -G "${CMAKE_GENERATOR}" .
- RESULT_VARIABLE ARROW_CONFIG
- WORKING_DIRECTORY ${ARROW_ROOT})
-
-if(ARROW_CONFIG)
- message(FATAL_ERROR "Configuring Arrow failed: " ${ARROW_CONFIG})
-endif(ARROW_CONFIG)
-
-set(PARALLEL_BUILD -j)
-if($ENV{PARALLEL_LEVEL})
- set(NUM_JOBS $ENV{PARALLEL_LEVEL})
- set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}")
-endif($ENV{PARALLEL_LEVEL})
-
-if(${NUM_JOBS})
- if(${NUM_JOBS} EQUAL 1)
- message(STATUS "ARROW BUILD: Enabling Sequential CMake build")
- elseif(${NUM_JOBS} GREATER 1)
- message(STATUS "ARROW BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs")
- endif(${NUM_JOBS} EQUAL 1)
-else()
- message(STATUS "ARROW BUILD: Enabling Parallel CMake build with all threads")
-endif(${NUM_JOBS})
-
-execute_process(
- COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD}
- RESULT_VARIABLE ARROW_BUILD
- WORKING_DIRECTORY ${ARROW_ROOT}/build)
-
-if(ARROW_BUILD)
- message(FATAL_ERROR "Building Arrow failed: " ${ARROW_BUILD})
-endif(ARROW_BUILD)
-
-set(ARROW_GENERATED_IPC_DIR
- "${ARROW_ROOT}/build/src/arrow/ipc")
-
-configure_file(${ARROW_GENERATED_IPC_DIR}/File_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/File_generated.h COPYONLY)
-configure_file(${ARROW_GENERATED_IPC_DIR}/Message_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Message_generated.h COPYONLY)
-configure_file(${ARROW_GENERATED_IPC_DIR}/Schema_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Schema_generated.h COPYONLY)
-configure_file(${ARROW_GENERATED_IPC_DIR}/Tensor_generated.h ${CMAKE_SOURCE_DIR}/include/cudf/ipc_generated/Tensor_generated.h COPYONLY)
-
-message(STATUS "Arrow installed here: " ${ARROW_ROOT}/install)
-set(ARROW_LIBRARY_DIR "${ARROW_ROOT}/install/lib")
-set(ARROW_INCLUDE_DIR "${ARROW_ROOT}/install/include")
-
-find_library(ARROW_LIB arrow
- NO_DEFAULT_PATH
- HINTS "${ARROW_LIBRARY_DIR}")
-
-if(ARROW_LIB)
- message(STATUS "Arrow library: " ${ARROW_LIB})
- set(ARROW_FOUND TRUE)
-endif(ARROW_LIB)
-
-set(FLATBUFFERS_ROOT "${ARROW_ROOT}/build/flatbuffers_ep-prefix/src/flatbuffers_ep-install")
-
-message(STATUS "FlatBuffers installed here: " ${FLATBUFFERS_ROOT})
-set(FLATBUFFERS_INCLUDE_DIR "${FLATBUFFERS_ROOT}/include")
-set(FLATBUFFERS_LIBRARY_DIR "${FLATBUFFERS_ROOT}/lib")
-
-add_definitions(-DARROW_METADATA_V4)
-add_definitions(-DARROW_VERSION=1210)
-
-
-
-
-
-
diff --git a/cpp/cmake/Modules/ConfigureGoogleTest.cmake b/cpp/cmake/Modules/ConfigureGoogleTest.cmake
deleted file mode 100644
index 9fac40f4649..00000000000
--- a/cpp/cmake/Modules/ConfigureGoogleTest.cmake
+++ /dev/null
@@ -1,49 +0,0 @@
-set(GTEST_ROOT "${CMAKE_BINARY_DIR}/googletest")
-
-set(GTEST_CMAKE_ARGS "")
- #" -Dgtest_build_samples=ON"
- #" -DCMAKE_VERBOSE_MAKEFILE=ON")
-
-configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/GoogleTest.CMakeLists.txt.cmake"
- "${GTEST_ROOT}/CMakeLists.txt")
-
-file(MAKE_DIRECTORY "${GTEST_ROOT}/build")
-file(MAKE_DIRECTORY "${GTEST_ROOT}/install")
-
-execute_process(COMMAND ${CMAKE_COMMAND} -G ${CMAKE_GENERATOR} .
- RESULT_VARIABLE GTEST_CONFIG
- WORKING_DIRECTORY ${GTEST_ROOT})
-
-if(GTEST_CONFIG)
- message(FATAL_ERROR "Configuring GoogleTest failed: " ${GTEST_CONFIG})
-endif(GTEST_CONFIG)
-
-set(PARALLEL_BUILD -j)
-if($ENV{PARALLEL_LEVEL})
- set(NUM_JOBS $ENV{PARALLEL_LEVEL})
- set(PARALLEL_BUILD "${PARALLEL_BUILD}${NUM_JOBS}")
-endif($ENV{PARALLEL_LEVEL})
-
-if(${NUM_JOBS})
- if(${NUM_JOBS} EQUAL 1)
- message(STATUS "GTEST BUILD: Enabling Sequential CMake build")
- elseif(${NUM_JOBS} GREATER 1)
- message(STATUS "GTEST BUILD: Enabling Parallel CMake build with ${NUM_JOBS} jobs")
- endif(${NUM_JOBS} EQUAL 1)
-else()
- message(STATUS "GTEST BUILD: Enabling Parallel CMake build with all threads")
-endif(${NUM_JOBS})
-
-execute_process(COMMAND ${CMAKE_COMMAND} --build .. -- ${PARALLEL_BUILD}
- RESULT_VARIABLE GTEST_BUILD
- WORKING_DIRECTORY ${GTEST_ROOT}/build)
-
-if(GTEST_BUILD)
- message(FATAL_ERROR "Building GoogleTest failed: " ${GTEST_BUILD})
-endif(GTEST_BUILD)
-
-message(STATUS "GoogleTest installed here: " ${GTEST_ROOT}/install)
-set(GTEST_INCLUDE_DIR "${GTEST_ROOT}/install/include")
-set(GTEST_LIBRARY_DIR "${GTEST_ROOT}/install/lib")
-set(GTEST_FOUND TRUE)
-
diff --git a/cpp/cmake/Modules/FindFAISS.cmake b/cpp/cmake/Modules/FindFAISS.cmake
new file mode 100644
index 00000000000..7c456edfeef
--- /dev/null
+++ b/cpp/cmake/Modules/FindFAISS.cmake
@@ -0,0 +1,98 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+# Based on FindPNG.cmake from cmake 3.14.3
+
+#[=======================================================================[.rst:
+FindFAISS
+--------
+
+Template to generate FindPKG_NAME.cmake CMake modules
+
+Find FAISS
+
+Imported targets
+^^^^^^^^^^^^^^^^
+
+This module defines the following :prop_tgt:`IMPORTED` target:
+
+``FAISS::FAISS``
+ The libFAISS library, if found.
+
+Result variables
+^^^^^^^^^^^^^^^^
+
+This module will set the following variables in your project:
+
+``FAISS_INCLUDE_DIRS``
+ where to find FAISS.hpp , etc.
+``FAISS_LIBRARIES``
+ the libraries to link against to use libFAISS.
+``FAISS_FOUND``
+ If false, do not try to use FAISS.
+``FAISS_VERSION_STRING``
+ the version of the FAISS library found
+
+#]=======================================================================]
+
+find_path(FAISS_LOCATION faiss/IndexFlat.h
+ HINTS ${FAISS_INSTALL_DIR}
+ PATH_SUFFIXES include include/)
+
+list(APPEND FAISS_NAMES faiss libfaiss)
+set(_FAISS_VERSION_SUFFIXES )
+
+foreach(v IN LISTS _FAISS_VERSION_SUFFIXES)
+ list(APPEND FAISS_NAMES faiss${v} libfaiss${v})
+ list(APPEND FAISS_NAMES faiss.${v} libfaiss.${v})
+endforeach()
+unset(_FAISS_VERSION_SUFFIXES)
+
+find_library(FAISS_LIBRARY_RELEASE NAMES ${FAISS_NAMES}
+ HINTS ${FAISS_INSTALL_DIR}
+ PATH_SUFFIXES lib)
+
+include(${CMAKE_ROOT}/Modules/SelectLibraryConfigurations.cmake)
+select_library_configurations(FAISS)
+mark_as_advanced(FAISS_LIBRARY_RELEASE)
+unset(FAISS_NAMES)
+
+# Set by select_library_configurations(), but we want the one from
+# find_package_handle_standard_args() below.
+unset(FAISS_FOUND)
+
+if (FAISS_LIBRARY AND FAISS_LOCATION)
+ set(FAISS_INCLUDE_DIRS ${FAISS_LOCATION} )
+ set(FAISS_LIBRARY ${FAISS_LIBRARY})
+
+ if(NOT TARGET FAISS::FAISS)
+ add_library(FAISS::FAISS UNKNOWN IMPORTED)
+ set_target_properties(FAISS::FAISS PROPERTIES
+ INTERFACE_INCLUDE_DIRECTORIES "${FAISS_INCLUDE_DIRS}")
+ if(EXISTS "${FAISS_LIBRARY}")
+ set_target_properties(FAISS::FAISS PROPERTIES
+ IMPORTED_LINK_INTERFACE_LANGUAGES "CXX"
+ IMPORTED_LOCATION "${FAISS_LIBRARY}")
+ endif()
+ endif()
+endif ()
+
+
+include(${CMAKE_ROOT}/Modules/FindPackageHandleStandardArgs.cmake)
+find_package_handle_standard_args(FAISS
+ REQUIRED_VARS FAISS_LIBRARY FAISS_LOCATION
+ VERSION_VAR FAISS_VERSION_STRING)
+
+mark_as_advanced(FAISS_LOCATION FAISS_LIBRARY)
diff --git a/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake b/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake
deleted file mode 100644
index b1eaf3f0efa..00000000000
--- a/cpp/cmake/Templates/Arrow.CMakeLists.txt.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-cmake_minimum_required(VERSION 3.12)
-
-include(ExternalProject)
-
-ExternalProject_Add(Arrow
- GIT_REPOSITORY https://github.com/apache/arrow.git
- GIT_TAG apache-arrow-0.12.1
- SOURCE_DIR "${ARROW_ROOT}/arrow"
- SOURCE_SUBDIR "cpp"
- BINARY_DIR "${ARROW_ROOT}/build"
- INSTALL_DIR "${ARROW_ROOT}/install"
- CMAKE_ARGS ${ARROW_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${ARROW_ROOT}/install)
-
-
-
-
-
-
-
diff --git a/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake b/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake
deleted file mode 100644
index 66e1dc85a50..00000000000
--- a/cpp/cmake/Templates/GoogleTest.CMakeLists.txt.cmake
+++ /dev/null
@@ -1,19 +0,0 @@
-cmake_minimum_required(VERSION 3.12)
-
-include(ExternalProject)
-
-ExternalProject_Add(GoogleTest
- GIT_REPOSITORY https://github.com/google/googletest.git
- GIT_TAG release-1.8.0
- SOURCE_DIR "${GTEST_ROOT}/googletest"
- BINARY_DIR "${GTEST_ROOT}/build"
- INSTALL_DIR "${GTEST_ROOT}/install"
- CMAKE_ARGS ${GTEST_CMAKE_ARGS} -DCMAKE_INSTALL_PREFIX=${GTEST_ROOT}/install)
-
-
-
-
-
-
-
-
diff --git a/cpp/cmake/faiss_cuda11.patch b/cpp/cmake/faiss_cuda11.patch
new file mode 100644
index 00000000000..496ca0e7b23
--- /dev/null
+++ b/cpp/cmake/faiss_cuda11.patch
@@ -0,0 +1,40 @@
+diff --git a/configure b/configure
+index ed40dae..f88ed0a 100755
+--- a/configure
++++ b/configure
+@@ -2970,7 +2970,7 @@ ac_link='$CXX -o conftest$ac_exeext $CXXFLAGS $CPPFLAGS $LDFLAGS conftest.$ac_ex
+ ac_compiler_gnu=$ac_cv_cxx_compiler_gnu
+
+
+- ax_cxx_compile_alternatives="11 0x" ax_cxx_compile_cxx11_required=true
++ ax_cxx_compile_alternatives="14 11 0x" ax_cxx_compile_cxx11_required=true
+ ac_ext=cpp
+ ac_cpp='$CXXCPP $CPPFLAGS'
+ ac_compile='$CXX -c $CXXFLAGS $CPPFLAGS conftest.$ac_ext >&5'
+diff --git a/gpu/utils/DeviceDefs.cuh b/gpu/utils/DeviceDefs.cuh
+index 89d3dda..bc0f9b5 100644
+--- a/gpu/utils/DeviceDefs.cuh
++++ b/gpu/utils/DeviceDefs.cuh
+@@ -13,7 +13,7 @@
+ namespace faiss { namespace gpu {
+
+ #ifdef __CUDA_ARCH__
+-#if __CUDA_ARCH__ <= 750
++#if __CUDA_ARCH__ <= 800
+ constexpr int kWarpSize = 32;
+ #else
+ #error Unknown __CUDA_ARCH__; please define parameters for compute capability
+diff --git a/gpu/utils/MatrixMult-inl.cuh b/gpu/utils/MatrixMult-inl.cuh
+index ede225e..4f7eb44 100644
+--- a/gpu/utils/MatrixMult-inl.cuh
++++ b/gpu/utils/MatrixMult-inl.cuh
+@@ -51,6 +51,9 @@ rawGemm(cublasHandle_t handle,
+ auto cBT = GetCudaType::Type;
+
+ // Always accumulate in f32
++# if __CUDACC_VER_MAJOR__ >= 11
++ cublasSetMathMode(handle, CUBLAS_MATH_DISALLOW_REDUCED_PRECISION_REDUCTION);
++# endif
+ return cublasSgemmEx(handle, transa, transb, m, n, k,
+ &fAlpha, A, cAT, lda,
+ B, cBT, ldb,
diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp
index a57e550521e..c666bce23ad 100644
--- a/cpp/include/algorithms.hpp
+++ b/cpp/include/algorithms.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
* limitations under the License.
*/
#pragma once
-
+#include
#include
#include
#include
@@ -22,78 +22,6 @@
namespace cugraph {
-/**
- * @brief Find the PageRank vertex values for a graph.
- *
- * cuGraph computes an approximation of the Pagerank eigenvector using the power method.
- * The number of iterations depends on the properties of the network itself; it increases
- * when the tolerance descreases and/or alpha increases toward the limiting value of 1.
- * The user is free to use default values or to provide inputs for the initial guess,
- * tolerance and maximum number of iterations.
-
- *
- * @throws cugraph::logic_error with a custom message when an error
- occurs.
- *
- * @tparam VT Type of vertex identifiers. Supported value : int (signed,
- 32-bit)
- * @tparam ET Type of edge identifiers. Supported value : int (signed,
- 32-bit)
- * @tparam WT Type of edge weights. Supported value : float or double.
- *
- * @param[in] handle Library handle (RAFT). If a communicator is set in the handle,
- the multi GPU version will be selected.
- * @param[in] graph cuGraph graph descriptor, should contain the connectivity
- information as a transposed adjacency list (CSC). Edge weights are not used for this algorithm.
- * @param[in] alpha The damping factor alpha represents the probability to follow
- an outgoing edge, standard value is 0.85. Thus, 1.0-alpha is the probability to “teleport” to a
- random vertex. Alpha should be greater than 0.0 and strictly lower than 1.0.
- * The initial guess must not be the vector of 0s. Any value other
- than 1 or 0 is treated as an invalid value.
- * @param[in] pagerank Array of size V. Should contain the initial guess if
- has_guess=true. In this case the initial guess cannot be the vector of 0s. Memory is provided and
- owned by the caller.
- * @param[in] personalization_subset_size (optional) Supported on single-GPU, on the roadmap for
- Multi-GPU. The number of vertices for to personalize. Initialized to 0 by default.
- * @param[in] personalization_subset (optional) Supported on single-GPU, on the roadmap for
- Multi-GPU..= Array of size personalization_subset_size containing vertices for running personalized
- pagerank. Initialized to nullptr by default. Memory is provided and owned by the caller.
- * @param[in] personalization_values (optional) Supported on single-GPU, on the roadmap for
- Multi-GPU. Array of size personalization_subset_size containing values associated with
- personalization_subset vertices. Initialized to nullptr by default. Memory is provided and owned by
- the caller.
- * @param[in] tolerance Supported on single-GPU. Set the tolerance the approximation,
- this parameter should be a small magnitude value.
- * The lower the tolerance the better the approximation. If this
- value is 0.0f, cuGraph will use the default value which is 1.0E-5.
- * Setting too small a tolerance can lead to non-convergence due
- to numerical roundoff. Usually values between 0.01 and 0.00001 are acceptable.
- * @param[in] max_iter (optional) The maximum number of iterations before an answer is
- returned. This can be used to limit the execution time and do an early exit before the solver
- reaches the convergence tolerance.
- * If this value is lower or equal to 0 cuGraph will use the
- default value, which is 500.
- * @param[in] has_guess (optional) Supported on single-GPU. This parameter is used to
- notify cuGraph if it should use a user-provided initial guess. False means the user does not have a
- guess, in this case cuGraph will use a uniform vector set to 1/V.
- * If the value is True, cuGraph will read the pagerank parameter
- and use this as an initial guess.
- * @param[out] *pagerank The PageRank : pagerank[i] is the PageRank of vertex i. Memory
- remains provided and owned by the caller.
- *
- */
-template
-void pagerank(raft::handle_t const &handle,
- GraphCSCView const &graph,
- WT *pagerank,
- VT personalization_subset_size = 0,
- VT *personalization_subset = nullptr,
- WT *personalization_values = nullptr,
- double alpha = 0.85,
- double tolerance = 1e-5,
- int64_t max_iter = 500,
- bool has_guess = false);
-
/**
* @brief Compute jaccard similarity coefficient for all vertices
*
@@ -264,6 +192,44 @@ void force_atlas2(GraphCOOView &graph,
bool verbose = false,
internals::GraphBasedDimRedCallback *callback = nullptr);
+/**
+ * @brief Finds an approximate solution to the traveling salesperson problem (TSP).
+ * cuGraph computes an approximation of the TSP problem using hill climbing
+ * optimization.
+ *
+ * The current implementation does not support a weighted graph.
+ *
+ * @throws cugraph::logic_error when an error occurs.
+ * @param[in] handle Library handle (RAFT). If a communicator is set in the
+ * handle, the multi GPU version will be selected.
+ * @param[in] vtx_ptr Device array containing the vertex identifiers used
+ * to initialize the route.
+ * @param[in] x_pos Device array containing starting x-axis positions.
+ * @param[in] y_pos Device array containing starting y-axis positions.
+ * @param[in] nodes Number of cities.
+ * @param[in] restarts Number of starts to try. The more restarts,
+ * the better the solution will be approximated. The number of restarts depends on the problem
+ * size and should be kept low for instances above 2k cities.
+ * @param[in] beam_search Specify if the initial solution should use KNN
+ * for an approximation solution.
+ * @param[in] k Beam width to use in the search.
+ * @param[in] nstart Start from a specific position.
+ * @param[in] verbose Logs configuration and iterative improvement.
+ * @param[out] route Device array containing the returned route.
+ *
+ */
+float traveling_salesperson(raft::handle_t &handle,
+ int const *vtx_ptr,
+ float const *x_pos,
+ float const *y_pos,
+ int nodes,
+ int restarts,
+ bool beam_search,
+ int k,
+ int nstart,
+ bool verbose,
+ int *route);
+
/**
* @brief Compute betweenness centrality for a graph
*
@@ -815,6 +781,7 @@ template
std::unique_ptr> extract_subgraph_vertex(GraphCOOView const &graph,
VT const *vertices,
VT num_vertices);
+} // namespace subgraph
/**
* @brief Wrapper function for Nvgraph balanced cut clustering
@@ -837,7 +804,6 @@ std::unique_ptr> extract_subgraph_vertex(GraphCOOView
@@ -1191,6 +1157,35 @@ void katz_centrality(raft::handle_t const &handle,
bool has_initial_guess = false,
bool normalize = false,
bool do_expensive_check = false);
-
+/**
+ * @brief returns induced EgoNet subgraph(s) of neighbors centered at nodes in source_vertex within
+ * a given radius.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object of, we extract induced egonet subgraphs from @p graph_view.
+ * @param source_vertex Pointer to egonet center vertices (size == @p n_subgraphs).
+ * @param n_subgraphs Number of induced EgoNet subgraphs to extract (ie. number of elements in @p
+ * source_vertex).
+ * @param radius Include all neighbors of distance <= radius from @p source_vertex.
+ * @return std::tuple, rmm::device_uvector,
+ * rmm::device_uvector, rmm::device_uvector> Quadraplet of edge source vertices,
+ * edge destination vertices, edge weights, and edge offsets for each induced EgoNet subgraph.
+ */
+template
+std::tuple,
+ rmm::device_uvector,
+ rmm::device_uvector,
+ rmm::device_uvector>
+extract_ego(raft::handle_t const &handle,
+ graph_view_t const &graph_view,
+ vertex_t *source_vertex,
+ vertex_t n_subgraphs,
+ vertex_t radius);
} // namespace experimental
} // namespace cugraph
diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh
index bf56b2e6f80..3ac2e2163c6 100644
--- a/cpp/include/experimental/detail/graph_utils.cuh
+++ b/cpp/include/experimental/detail/graph_utils.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -17,6 +17,8 @@
#include
#include
+#include
+#include
#include
#include
@@ -24,8 +26,10 @@
#include
#include
+#include
#include
+#include
#include
namespace cugraph {
@@ -137,6 +141,38 @@ struct degree_from_offsets_t {
__device__ edge_t operator()(vertex_t v) { return offsets[v + 1] - offsets[v]; }
};
+template
+struct compute_gpu_id_from_vertex_t {
+ int comm_size{0};
+
+ __device__ int operator()(vertex_t v) const
+ {
+ cuco::detail::MurmurHash3_32 hash_func{};
+ return hash_func(v) % comm_size;
+ }
+};
+
+template
+struct compute_gpu_id_from_edge_t {
+ bool hypergraph_partitioned{false};
+ int comm_size{0};
+ int row_comm_size{0};
+ int col_comm_size{0};
+
+ __device__ int operator()(vertex_t major, vertex_t minor) const
+ {
+ cuco::detail::MurmurHash3_32 hash_func{};
+ auto major_comm_rank = static_cast(hash_func(major) % comm_size);
+ auto minor_comm_rank = static_cast(hash_func(minor) % comm_size);
+ if (hypergraph_partitioned) {
+ return (minor_comm_rank / col_comm_size) * row_comm_size + (major_comm_rank % row_comm_size);
+ } else {
+ return (major_comm_rank - (major_comm_rank % row_comm_size)) +
+ (minor_comm_rank / col_comm_size);
+ }
+ }
+};
+
} // namespace detail
} // namespace experimental
} // namespace cugraph
diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp
index 592294c8967..cc21f7c5013 100644
--- a/cpp/include/experimental/graph.hpp
+++ b/cpp/include/experimental/graph.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -62,7 +62,7 @@ class graph_t> const &edge_lists,
+ std::vector> const &edgelists,
partition_t const &partition,
vertex_t number_of_vertices,
edge_t number_of_edges,
@@ -70,7 +70,7 @@ class graph_t view()
+ graph_view_t view() const
{
std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr);
std::vector indices(adj_matrix_partition_indices_.size(), nullptr);
@@ -124,7 +124,7 @@ class graph_t const &edge_list,
+ edgelist_t const &edgelist,
vertex_t number_of_vertices,
graph_properties_t properties,
bool sorted_by_degree,
@@ -132,7 +132,7 @@ class graph_tget_number_of_vertices(); }
- graph_view_t view()
+ graph_view_t view() const
{
return graph_view_t(
*(this->get_handle_ptr()),
diff --git a/cpp/include/experimental/graph_functions.hpp b/cpp/include/experimental/graph_functions.hpp
new file mode 100644
index 00000000000..7b4bb466b97
--- /dev/null
+++ b/cpp/include/experimental/graph_functions.hpp
@@ -0,0 +1,291 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+#include
+#include
+
+#include
+#include
+
+#include
+#include
+
+namespace cugraph {
+namespace experimental {
+
+/**
+ * @brief renumber edgelist (multi-GPU)
+ *
+ * This function assumes that edges are pre-shuffled to their target processes using the
+ * compute_gpu_id_from_edge_t functor.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as
+ * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex
+ * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to
+ * every (major, minor) pair should return the local GPU ID for this function to work (edges should
+ * be pre-shuffled).
+ * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is
+ * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored).
+ * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t
+ * functor to every (major, minor) pair should return the local GPU ID for this function to work
+ * (edges should be pre-shuffled).
+ * @param num_edgelist_edges Number of edges in the edgelist.
+ * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning
+ * (this flag will be removed in the future).
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return std::tuple, partition_t, vertex_t, edge_t>
+ * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to
+ * this process in multi-GPU), partition_t object storing graph partitioning information, total
+ * number of vertices, and total number of edges.
+ */
+template
+std::enable_if_t, partition_t, vertex_t, edge_t>>
+renumber_edgelist(raft::handle_t const& handle,
+ vertex_t* edgelist_major_vertices /* [INOUT] */,
+ vertex_t* edgelist_minor_vertices /* [INOUT] */,
+ edge_t num_edgelist_edges,
+ bool is_hypergraph_partitioned,
+ bool do_expensive_check = false);
+
+/**
+ * @brief renumber edgelist (single-GPU)
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as
+ * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex
+ * IDs are updated in-place ([INOUT] parameter).
+ * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is
+ * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored).
+ * Vertex IDs are updated in-place ([INOUT] parameter).
+ * @param num_edgelist_edges Number of edges in the edgelist.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set
+ * of vertices.
+ */
+template
+std::enable_if_t> renumber_edgelist(
+ raft::handle_t const& handle,
+ vertex_t* edgelist_major_vertices /* [INOUT] */,
+ vertex_t* edgelist_minor_vertices /* [INOUT] */,
+ edge_t num_edgelist_edges,
+ bool do_expensive_check = false);
+
+/**
+ * @brief renumber edgelist (multi-GPU)
+ *
+ * This version takes the vertex set in addition; this allows renumbering to include isolated
+ * vertices. This function assumes that vertices and edges are pre-shuffled to their target
+ * processes using the compute_gpu_id_from_vertex_t & compute_gpu_id_from_edge_t functors,
+ * respectively.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param local_vertices Part of the entire set of vertices in the graph to be renumbered. Applying
+ * the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this function
+ * to work (vertices should be pre-shuffled).
+ * @param num_local_vertices Number of local vertices.
+ * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as
+ * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex
+ * IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t functor to
+ * every (major, minor) pair should return the local GPU ID for this function to work (edges should
+ * be pre-shuffled).
+ * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is
+ * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored).
+ * Vertex IDs are updated in-place ([INOUT] parameter). Applying the compute_gpu_id_from_edge_t
+ * functor to every (major, minor) pair should return the local GPU ID for this function to work
+ * (edges should be pre-shuffled).
+ * @param num_edgelist_edges Number of edges in the edgelist.
+ * @param is_hypergraph_partitioned Flag indicating whether we are assuming hypergraph partitioning
+ * (this flag will be removed in the future).
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return std::tuple, partition_t, vertex_t, edge_t>
+ * Quadruplet of labels (vertex IDs before renumbering) for the entire set of vertices (assigned to
+ * this process in multi-GPU), partition_t object storing graph partitioning information, total
+ * number of vertices, and total number of edges.
+ */
+template
+std::enable_if_t, partition_t, vertex_t, edge_t>>
+renumber_edgelist(raft::handle_t const& handle,
+ vertex_t const* local_vertices,
+ vertex_t num_local_vertices,
+ vertex_t* edgelist_major_vertices /* [INOUT] */,
+ vertex_t* edgelist_minor_vertices /* [INOUT] */,
+ edge_t num_edgelist_edges,
+ bool is_hypergraph_partitioned,
+ bool do_expensive_check = false);
+
+/**
+ * @brief renumber edgelist (single-GPU)
+ *
+ * This version takes the vertex set in addition; this allows renumbering to include isolated
+ * vertices.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param vertices The entire set of vertices in the graph to be renumbered.
+ * @param num_vertices Number of vertices.
+ * @param edgelist_major_vertices Edge source vertex IDs (if the graph adjacency matrix is stored as
+ * is) or edge destination vertex IDs (if the transposed graph adjacency matrix is stored). Vertex
+ * IDs are updated in-place ([INOUT] parameter).
+ * @param edgelist_minor_vertices Edge destination vertex IDs (if the graph adjacency matrix is
+ * stored as is) or edge source vertex IDs (if the transposed graph adjacency matrix is stored).
+ * Vertex IDs are updated in-place ([INOUT] parameter).
+ * @param num_edgelist_edges Number of edges in the edgelist.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return rmm::device_uvector Labels (vertex IDs before renumbering) for the entire set
+ * of vertices.
+ */
+template
+std::enable_if_t> renumber_edgelist(
+ raft::handle_t const& handle,
+ vertex_t const* vertices,
+ vertex_t num_vertices,
+ vertex_t* edgelist_major_vertices /* [INOUT] */,
+ vertex_t* edgelist_minor_vertices /* [INOUT] */,
+ edge_t num_edgelist_edges,
+ bool do_expensive_check = false);
+
+/**
+ * @brief Compute the coarsened graph.
+ *
+ * Aggregates the vertices with the same label to a new vertex in the output coarsened graph.
+ * Multi-edges in the coarsened graph are collapsed to a single edge with its weight equal to the
+ * sum of multi-edge weights.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as
+ * transposed.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object of the input graph to be coarsened.
+ * @param labels Vertex labels (assigned to this process in multi-GPU) to be used in coarsening.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return std::tuple>, rmm::device_uvector> Tuple of the coarsened graph and labels mapped to the
+ * vertices (assigned to this process in multi-GPU) in the coarsened graph.
+ */
+template
+std::tuple>,
+ rmm::device_uvector>
+coarsen_graph(
+ raft::handle_t const& handle,
+ graph_view_t const& graph_view,
+ vertex_t const* labels,
+ bool do_expensive_check = false);
+
+/**
+ * @brief Relabel old labels to new labels.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param old_new_label_pairs Pairs of an old label and the corresponding new label (each process
+ * holds only part of the entire old labels and the corresponding new labels; partitioning can be
+ * arbitrary).
+ * @param num_label_pairs Number of (old, new) label pairs.
+ * @param labels Labels to be relabeled. This initially holds old labels. Old labels are updated to
+ * new labels in-place ([INOUT] parameter).
+ * @param num_labels Number of labels to be relabeled.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return rmm::device_uvector New labels corresponding to the @p old_labels.
+ */
+template
+void relabel(raft::handle_t const& handle,
+ std::tuple old_new_label_pairs,
+ vertex_t num_label_pairs,
+ vertex_t* labels /* [INOUT] */,
+ vertex_t num_labels,
+ bool do_expensive_check = false);
+
+/**
+ * @brief extract induced subgraph(s).
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights.
+ * @tparam store_transposed Flag indicating whether to store the graph adjacency matrix as is or as
+ * transposed.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * or multi-GPU (true).
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object, we extract induced subgraphs from @p graph_view.
+ * @param subgraph_offsets Pointer to subgraph vertex offsets (size == @p num_subgraphs + 1).
+ * @param subgraph_vertices Pointer to subgraph vertices (size == @p subgraph_offsets[@p
+ * num_subgraphs]). The elements of @p subgraph_vertices for each subgraph should be sorted in
+ * ascending order and unique.
+ * @param num_subgraphs Number of induced subgraphs to extract.
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return std::tuple, rmm::device_uvector,
+ * rmm::device_uvector, rmm::device_uvector> Quadraplet of edge major (destination
+ * if @p store_transposed is true, source otherwise) vertices, edge minor (source if @p
+ * store_transposed is true, destination otherwise) vertices, edge weights, and edge offsets for
+ * each induced subgraphs (size == num_subgraphs + 1). The sizes of the edge major & minor vertices
+ * are edge_offsets[num_subgraphs]. The size of the edge weights is either
+ * edge_offsets[num_subgraphs] (if @p graph_view is weighted) or 0 (if @p graph_view is unweighted).
+ */
+template
+std::tuple,
+ rmm::device_uvector,
+ rmm::device_uvector,
+ rmm::device_uvector>
+extract_induced_subgraphs(
+ raft::handle_t const& handle,
+ graph_view_t const& graph_view,
+ size_t const* subgraph_offsets /* size == num_subgraphs + 1 */,
+ vertex_t const* subgraph_vertices /* size == subgraph_offsets[num_subgraphs] */,
+ size_t num_subgraphs,
+ bool do_expensive_check = false);
+
+} // namespace experimental
+} // namespace cugraph
diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp
index ba327047b1d..d2ae1150970 100644
--- a/cpp/include/experimental/graph_view.hpp
+++ b/cpp/include/experimental/graph_view.hpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -332,6 +332,7 @@ class graph_view_t 0; }
+ // FIXME: this should be removed once MNMG Louvain is updated to use graph primitives
partition_t get_partition() const { return partition_; }
vertex_t get_number_of_local_vertices() const
@@ -399,6 +400,11 @@ class graph_view_t adj_matrix_partition_offsets_{};
std::vector adj_matrix_partition_indices_{};
std::vector adj_matrix_partition_weights_{};
+ std::vector adj_matrix_partition_number_of_edges_{};
partition_t partition_{};
@@ -567,6 +574,12 @@ class graph_view_tget_number_of_vertices();
}
+ edge_t get_number_of_local_adj_matrix_partition_edges(size_t adj_matrix_partition_idx) const
+ {
+ assert(adj_matrix_partition_idx == 0);
+ return this->get_number_of_edges();
+ }
+
vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const
{
assert(adj_matrix_partition_idx == 0);
@@ -629,6 +642,7 @@ class graph_view_t segment_offsets_{}; // segment offsets based on vertex degree, relevant
// only if sorted_by_global_degree is true
};
diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/matrix_partition_device.cuh
index 53796530f60..b41119e7be6 100644
--- a/cpp/include/matrix_partition_device.cuh
+++ b/cpp/include/matrix_partition_device.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -30,11 +30,14 @@ class matrix_partition_device_base_t {
public:
matrix_partition_device_base_t(edge_t const* offsets,
vertex_t const* indices,
- weight_t const* weights)
- : offsets_(offsets), indices_(indices), weights_(weights)
+ weight_t const* weights,
+ edge_t number_of_edges)
+ : offsets_(offsets), indices_(indices), weights_(weights), number_of_edges_(number_of_edges)
{
}
+ __host__ __device__ edge_t get_number_of_edges() const { return number_of_edges_; }
+
__device__ thrust::tuple get_local_edges(
vertex_t major_offset) const noexcept
{
@@ -50,11 +53,17 @@ class matrix_partition_device_base_t {
return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset);
}
+ __device__ edge_t get_local_offset(vertex_t major_offset) const noexcept
+ {
+ return *(offsets_ + major_offset);
+ }
+
private:
// should be trivially copyable to device
edge_t const* offsets_{nullptr};
vertex_t const* indices_{nullptr};
weight_t const* weights_{nullptr};
+ edge_t number_of_edges_{0};
};
template
@@ -73,7 +82,8 @@ class matrix_partition_device_t(
graph_view.offsets(partition_idx),
graph_view.indices(partition_idx),
- graph_view.weights(partition_idx)),
+ graph_view.weights(partition_idx),
+ graph_view.get_number_of_local_adj_matrix_partition_edges(partition_idx)),
major_first_(GraphViewType::is_adj_matrix_transposed
? graph_view.get_local_adj_matrix_partition_col_first(partition_idx)
: graph_view.get_local_adj_matrix_partition_row_first(partition_idx)),
@@ -93,7 +103,7 @@ class matrix_partition_device_t(
- graph_view.offsets(), graph_view.indices(), graph_view.weights()),
+ graph_view.offsets(),
+ graph_view.indices(),
+ graph_view.weights(),
+ graph_view.get_number_of_edges()),
number_of_vertices_(graph_view.get_number_of_vertices())
{
assert(partition_idx == 0);
diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/patterns/any_of_adj_matrix_row.cuh
index 199e7c230ef..a367ec2a50c 100644
--- a/cpp/include/patterns/any_of_adj_matrix_row.cuh
+++ b/cpp/include/patterns/any_of_adj_matrix_row.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@
#pragma once
#include
-#include
#include
+#include
#include
#include
diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh
index 760775c03d4..d4559de06af 100644
--- a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh
+++ b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,8 +18,10 @@
#include
#include
#include
-#include
+#include
+#include
#include
+#include
#include
#include
@@ -120,10 +122,10 @@ void copy_to_matrix_major(raft::handle_t const& handle,
for (int i = 0; i < row_comm_size; ++i) {
rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i],
handle.get_stream());
- auto rx_tmp_buffer =
- allocate_comm_buffer::value_type>(
- rx_counts[i], handle.get_stream());
- auto rx_value_first = get_comm_buffer_begin<
+ auto rx_tmp_buffer = allocate_dataframe_buffer<
+ typename std::iterator_traits::value_type>(rx_counts[i],
+ handle.get_stream());
+ auto rx_value_first = get_dataframe_buffer_begin<
typename std::iterator_traits::value_type>(rx_tmp_buffer);
if (row_comm_rank == i) {
@@ -173,12 +175,6 @@ void copy_to_matrix_major(raft::handle_t const& handle,
map_first,
matrix_major_value_output_first);
}
-
- CUDA_TRY(cudaStreamSynchronize(
- handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope
- // once control flow exits this block (FIXME: we can reduce stream
- // synchronization if we compute the maximum rx_counts and
- // allocate rx_tmp_buffer outside the loop)
}
}
} else {
@@ -219,7 +215,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
// partitioning
auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank;
auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size;
- // FIXME: this branch may no longer necessary with NCCL backend
+ // FIXME: this branch may be no longer necessary with NCCL backend
if (comm_src_rank == comm_rank) {
assert(comm_dst_rank == comm_rank);
thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
@@ -314,12 +310,11 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
vertex_partition_device_t vertex_partition(graph_view);
rmm::device_uvector dst_vertices(rx_count, handle.get_stream());
- auto dst_tmp_buffer =
- allocate_comm_buffer::value_type>(
- rx_count, handle.get_stream());
- auto dst_value_first =
- get_comm_buffer_begin::value_type>(
- dst_tmp_buffer);
+ auto dst_tmp_buffer = allocate_dataframe_buffer<
+ typename std::iterator_traits::value_type>(rx_count,
+ handle.get_stream());
+ auto dst_value_first = get_dataframe_buffer_begin<
+ typename std::iterator_traits::value_type>(dst_tmp_buffer);
if (comm_src_rank == comm_rank) {
thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
vertex_first,
@@ -335,10 +330,10 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
vertex_value_input_first,
dst_value_first);
} else {
- auto src_tmp_buffer =
- allocate_comm_buffer::value_type>(
- tx_count, handle.get_stream());
- auto src_value_first = get_comm_buffer_begin<
+ auto src_tmp_buffer = allocate_dataframe_buffer<
+ typename std::iterator_traits::value_type>(tx_count,
+ handle.get_stream());
+ auto src_value_first = get_dataframe_buffer_begin<
typename std::iterator_traits::value_type>(src_tmp_buffer);
auto map_first =
@@ -369,10 +364,6 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
rx_count,
comm_src_rank,
handle.get_stream());
-
- CUDA_TRY(cudaStreamSynchronize(
- handle.get_stream())); // this is as necessary src_tmp_buffer will become out-of-scope
- // once control flow exits this block
}
// FIXME: now we can clear tx_tmp_buffer
@@ -383,10 +374,10 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
for (int i = 0; i < col_comm_size; ++i) {
rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i],
handle.get_stream());
- auto rx_tmp_buffer =
- allocate_comm_buffer::value_type>(
- rx_counts[i], handle.get_stream());
- auto rx_value_first = get_comm_buffer_begin<
+ auto rx_tmp_buffer = allocate_dataframe_buffer<
+ typename std::iterator_traits::value_type>(rx_counts[i],
+ handle.get_stream());
+ auto rx_value_first = get_dataframe_buffer_begin<
typename std::iterator_traits::value_type>(rx_tmp_buffer);
// FIXME: these broadcast operations can be placed between ncclGroupStart() and
@@ -423,17 +414,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle,
map_first,
matrix_minor_value_output_first);
}
-
- CUDA_TRY(cudaStreamSynchronize(
- handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope
- // once control flow exits this block (FIXME: we can reduce stream
- // synchronization if we compute the maximum rx_counts and
- // allocate rx_tmp_buffer outside the loop)
}
-
- CUDA_TRY(cudaStreamSynchronize(
- handle.get_stream())); // this is as necessary dst_tmp_buffer will become out-of-scope once
- // control flow exits this block
}
} else {
assert(graph_view.get_number_of_local_vertices() ==
diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh
index f3c36897dd6..3059cf95852 100644
--- a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh
+++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -19,7 +19,8 @@
#include
#include
#include
-#include
+#include
+#include
#include
#include
@@ -377,8 +378,8 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
? graph_view.get_number_of_local_adj_matrix_partition_rows()
: graph_view.get_number_of_local_adj_matrix_partition_cols()
: vertex_t{0};
- auto minor_tmp_buffer = allocate_comm_buffer(minor_tmp_buffer_size, handle.get_stream());
- auto minor_buffer_first = get_comm_buffer_begin(minor_tmp_buffer);
+ auto minor_tmp_buffer = allocate_dataframe_buffer(minor_tmp_buffer_size, handle.get_stream());
+ auto minor_buffer_first = get_dataframe_buffer_begin(minor_tmp_buffer);
if (in != GraphViewType::is_adj_matrix_transposed) {
auto minor_init = init;
@@ -424,8 +425,9 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
: graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i)
: vertex_t{0};
}
- auto major_tmp_buffer = allocate_comm_buffer(major_tmp_buffer_size, handle.get_stream());
- auto major_buffer_first = get_comm_buffer_begin(major_tmp_buffer);
+ auto major_tmp_buffer =
+ allocate_dataframe_buffer(major_tmp_buffer_size, handle.get_stream());
+ auto major_buffer_first = get_dataframe_buffer_begin(major_tmp_buffer);
auto major_init = T{};
if (in == GraphViewType::is_adj_matrix_transposed) {
@@ -523,12 +525,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
handle.get_stream());
}
}
-
- CUDA_TRY(cudaStreamSynchronize(
- handle.get_stream())); // this is as necessary major_tmp_buffer will become out-of-scope once
- // control flow exits this block (FIXME: we can reduce stream
- // synchronization if we compute the maximum major_tmp_buffer_size and
- // allocate major_tmp_buffer outside the loop)
}
if (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) {
@@ -590,10 +586,6 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
}
}
}
-
- CUDA_TRY(cudaStreamSynchronize(
- handle.get_stream())); // this is as necessary minor_tmp_buffer will become out-of-scope once
- // control flow exits this block
}
} // namespace detail
@@ -627,7 +619,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle,
* weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first +
* j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0,
* get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced.
- * @param init Initial value to be added to the reduced @e_op return values for each vertex.
+ * @param init Initial value to be added to the reduced @p e_op return values for each vertex.
* @param vertex_value_output_first Iterator pointing to the vertex property variables for the first
* (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last`
* (exclusive) is deduced as @p vertex_value_output_first + @p
@@ -689,7 +681,7 @@ void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle,
* adj_matrix_col_value_input_first + j) (where i is in [0,
* graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0,
* get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced.
- * @param init Initial value to be added to the reduced @e_op return values for each vertex.
+ * @param init Initial value to be added to the reduced @p e_op return values for each vertex.
* @param vertex_value_output_first Iterator pointing to the vertex property variables for the
* first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last`
* (exclusive) is deduced as @p vertex_value_output_first + @p
diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh
new file mode 100644
index 00000000000..785f8197aff
--- /dev/null
+++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh
@@ -0,0 +1,522 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+
+#include
+
+#include
+
+namespace cugraph {
+namespace experimental {
+
+namespace detail {
+
+// FIXME: block size requires tuning
+int32_t constexpr copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size = 128;
+
+template
+__global__ void for_all_major_for_all_nbr_low_degree(
+ matrix_partition_device_t matrix_partition,
+ typename GraphViewType::vertex_type major_first,
+ typename GraphViewType::vertex_type major_last,
+ VertexIterator adj_matrix_minor_key_first,
+ typename GraphViewType::vertex_type* major_vertices,
+ typename GraphViewType::vertex_type* minor_keys,
+ typename GraphViewType::weight_type* key_aggregated_edge_weights,
+ typename GraphViewType::vertex_type invalid_vertex)
+{
+ using vertex_t = typename GraphViewType::vertex_type;
+ using edge_t = typename GraphViewType::edge_type;
+ using weight_t = typename GraphViewType::weight_type;
+
+ auto const tid = threadIdx.x + blockIdx.x * blockDim.x;
+ auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first());
+ auto idx = static_cast(tid);
+
+ while (idx < static_cast(major_last - major_first)) {
+ vertex_t const* indices{nullptr};
+ weight_t const* weights{nullptr};
+ edge_t local_degree{};
+ auto major_offset = major_start_offset + idx;
+ thrust::tie(indices, weights, local_degree) =
+ matrix_partition.get_local_edges(static_cast(major_offset));
+ if (local_degree > 0) {
+ auto local_offset = matrix_partition.get_local_offset(major_offset);
+ auto minor_key_first = thrust::make_transform_iterator(
+ indices, [matrix_partition, adj_matrix_minor_key_first] __device__(auto minor) {
+ return *(adj_matrix_minor_key_first +
+ matrix_partition.get_minor_offset_from_minor_nocheck(minor));
+ });
+ thrust::copy(
+ thrust::seq, minor_key_first, minor_key_first + local_degree, minor_keys + local_offset);
+ if (weights == nullptr) {
+ thrust::sort(
+ thrust::seq, minor_keys + local_offset, minor_keys + local_offset + local_degree);
+ } else {
+ thrust::copy(
+ thrust::seq, weights, weights + local_degree, key_aggregated_edge_weights + local_offset);
+ thrust::sort_by_key(thrust::seq,
+ minor_keys + local_offset,
+ minor_keys + local_offset + local_degree,
+ key_aggregated_edge_weights + local_offset);
+ }
+ // in-place reduce_by_key
+ vertex_t key_idx{0};
+ key_aggregated_edge_weights[local_offset + key_idx] =
+ weights != nullptr ? weights[0] : weight_t{1.0};
+ for (edge_t i = 1; i < local_degree; ++i) {
+ if (minor_keys[local_offset + i] == minor_keys[local_offset + key_idx]) {
+ key_aggregated_edge_weights[local_offset + key_idx] +=
+ weights != nullptr ? weights[i] : weight_t{1.0};
+ } else {
+ ++key_idx;
+ minor_keys[local_offset + key_idx] = minor_keys[local_offset + i];
+ key_aggregated_edge_weights[local_offset + key_idx] =
+ weights != nullptr ? weights[i] : weight_t{1.0};
+ }
+ }
+ thrust::fill(thrust::seq,
+ major_vertices + local_offset,
+ major_vertices + local_offset + key_idx,
+ matrix_partition.get_major_from_major_offset_nocheck(major_offset));
+ thrust::fill(thrust::seq,
+ major_vertices + local_offset + key_idx,
+ major_vertices + local_offset + local_degree,
+ invalid_vertex);
+ }
+
+ idx += gridDim.x * blockDim.x;
+ }
+}
+
+} // namespace detail
+
+/**
+ * @brief Iterate over every vertex's key-aggregated outgoing edges to update vertex properties.
+ *
+ * This function is inspired by thrust::transfrom_reduce() (iteration over the outgoing edges
+ * part) and thrust::copy() (update vertex properties part, take transform_reduce output as copy
+ * input).
+ * Unlike copy_v_transform_reduce_out_nbr, this function first aggregates outgoing edges by key to
+ * support two level reduction for every vertex.
+ *
+ * @tparam GraphViewType Type of the passed non-owning graph object.
+ * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row
+ * input properties.
+ * @tparam VertexIterator Type of the iterator for graph adjacency matrix column key values for
+ * aggregation (key type should coincide with vertex type).
+ * @tparam ValueIterator Type of the iterator for values in (key, value) pairs.
+ * @tparam KeyAggregatedEdgeOp Type of the quinary key-aggregated edge operator.
+ * @tparam ReduceOp Type of the binary reduction operator.
+ * @tparam T Type of the initial value for reduction over the key-aggregated outgoing edges.
+ * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables.
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Non-owning graph object.
+ * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input
+ * properties for the first (inclusive) row (assigned to this process in multi-GPU).
+ * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first
+ * + @p graph_view.get_number_of_local_adj_matrix_partition_rows().
+ * @param adj_matrix_col_key_first Iterator pointing to the adjacency matrix column key (for
+ * aggregation) for the first (inclusive) column (assigned to this process in multi-GPU).
+ * `adj_matrix_col_key_last` (exclusive) is deduced as @p adj_matrix_col_key_first + @p
+ * graph_view.get_number_of_local_adj_matrix_partition_cols().
+ * @param map_key_first Iterator pointing to the first (inclusive) key in (key, value) pairs
+ * (assigned to this process in multi-GPU,
+ * `cugraph::experimental::detail::compute_gpu_id_from_vertex_t` is used to map keys to processes).
+ * (Key, value) pairs may be provided by transform_reduce_by_adj_matrix_row_key_e() or
+ * transform_reduce_by_adj_matrix_col_key_e().
+ * @param map_key_last Iterator pointing to the last (exclusive) key in (key, value) pairs (assigned
+ * to this process in multi-GPU).
+ * @param map_value_first Iterator pointing to the first (inclusive) value in (key, value) pairs
+ * (assigned to this process in multi-GPU). `map_value_last` (exclusive) is deduced as @p
+ * map_value_first + thrust::distance(@p map_key_first, @p map_key_last).
+ * @param key_aggregated_e_op Quinary operator takes edge source, key, aggregated edge weight, *(@p
+ * adj_matrix_row_value_input_first + i), and value for the key stored in the input (key, value)
+ * pairs provided by @p map_key_first, @p map_key_last, and @p map_value_first (aggregated over the
+ * entire set of processes in multi-GPU).
+ * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one.
+ * @param init Initial value to be added to the reduced @p key_aggregated_e_op return values for
+ * each vertex.
+ * @param vertex_value_output_first Iterator pointing to the vertex property variables for the
+ * first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last`
+ * (exclusive) is deduced as @p vertex_value_output_first + @p
+ * graph_view.get_number_of_local_vertices().
+ */
+template
+void copy_v_transform_reduce_key_aggregated_out_nbr(
+ raft::handle_t const& handle,
+ GraphViewType const& graph_view,
+ AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first,
+ VertexIterator adj_matrix_col_key_first,
+ VertexIterator map_key_first,
+ VertexIterator map_key_last,
+ ValueIterator map_value_first,
+ KeyAggregatedEdgeOp key_aggregated_e_op,
+ ReduceOp reduce_op,
+ T init,
+ VertexValueOutputIterator vertex_value_output_first)
+{
+ static_assert(!GraphViewType::is_adj_matrix_transposed,
+ "GraphViewType should support the push model.");
+ static_assert(std::is_same::value_type,
+ typename GraphViewType::vertex_type>::value);
+
+ using vertex_t = typename GraphViewType::vertex_type;
+ using edge_t = typename GraphViewType::edge_type;
+ using weight_t = typename GraphViewType::weight_type;
+ using value_t = typename std::iterator_traits::value_type;
+
+ double constexpr load_factor = 0.7;
+
+ // 1. build a cuco::static_map object for the k, v pairs.
+
+ auto kv_map_ptr = std::make_unique>(
+ static_cast(static_cast(thrust::distance(map_key_first, map_key_last)) /
+ load_factor),
+ invalid_vertex_id::value,
+ invalid_vertex_id::value);
+ auto pair_first = thrust::make_transform_iterator(
+ thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)),
+ [] __device__(auto val) {
+ return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val));
+ });
+ kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last));
+
+ if (GraphViewType::is_multi_gpu) {
+ auto& comm = handle.get_comms();
+ auto const comm_size = comm.get_size();
+
+ rmm::device_uvector unique_keys(
+ graph_view.get_number_of_local_adj_matrix_partition_cols(), handle.get_stream());
+ thrust::copy(
+ rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ adj_matrix_col_key_first,
+ adj_matrix_col_key_first + graph_view.get_number_of_local_adj_matrix_partition_cols(),
+ unique_keys.begin());
+ thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ unique_keys.begin(),
+ unique_keys.end());
+ auto last = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ unique_keys.begin(),
+ unique_keys.end());
+ unique_keys.resize(thrust::distance(unique_keys.begin(), last), handle.get_stream());
+
+ rmm::device_uvector rx_unique_keys(0, handle.get_stream());
+ std::vector rx_value_counts{};
+ std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values(
+ comm,
+ unique_keys.begin(),
+ unique_keys.end(),
+ [key_func = detail::compute_gpu_id_from_vertex_t{comm_size}] __device__(auto val) {
+ return key_func(val);
+ },
+ handle.get_stream());
+
+ rmm::device_uvector values_for_unique_keys(rx_unique_keys.size(), handle.get_stream());
+
+ CUDA_TRY(cudaStreamSynchronize(
+ handle.get_stream())); // cuco::static_map currently does not take stream
+
+ kv_map_ptr->find(rx_unique_keys.begin(), rx_unique_keys.end(), values_for_unique_keys.begin());
+
+ rmm::device_uvector rx_values_for_unique_keys(0, handle.get_stream());
+
+ std::tie(rx_values_for_unique_keys, std::ignore) =
+ shuffle_values(comm, values_for_unique_keys.begin(), rx_value_counts, handle.get_stream());
+
+ CUDA_TRY(cudaStreamSynchronize(
+ handle.get_stream())); // cuco::static_map currently does not take stream
+
+ kv_map_ptr.reset();
+
+ kv_map_ptr = std::make_unique>(
+ static_cast(static_cast(unique_keys.size()) / load_factor),
+ invalid_vertex_id::value,
+ invalid_vertex_id::value);
+
+ auto pair_first = thrust::make_transform_iterator(
+ thrust::make_zip_iterator(
+ thrust::make_tuple(unique_keys.begin(), rx_values_for_unique_keys.begin())),
+ [] __device__(auto val) {
+ return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val));
+ });
+
+ kv_map_ptr->insert(pair_first, pair_first + unique_keys.size());
+ }
+
+ // 2. aggregate each vertex out-going edges based on keys and transform-reduce.
+
+ auto loop_count = size_t{1};
+ if (GraphViewType::is_multi_gpu) {
+ auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name());
+ auto const row_comm_size = row_comm.get_size();
+ loop_count = graph_view.is_hypergraph_partitioned()
+ ? graph_view.get_number_of_local_adj_matrix_partitions()
+ : static_cast(row_comm_size);
+ }
+
+ rmm::device_uvector major_vertices(0, handle.get_stream());
+ auto e_op_result_buffer = allocate_dataframe_buffer(0, handle.get_stream());
+ for (size_t i = 0; i < loop_count; ++i) {
+ matrix_partition_device_t matrix_partition(
+ graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i);
+
+ int comm_root_rank = 0;
+ if (GraphViewType::is_multi_gpu) {
+ auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name());
+ auto const row_comm_rank = row_comm.get_rank();
+ auto const row_comm_size = row_comm.get_size();
+ auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name());
+ auto const col_comm_rank = col_comm.get_rank();
+ comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank
+ : col_comm_rank * row_comm_size + i;
+ }
+
+ auto num_edges = thrust::transform_reduce(
+ rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ thrust::make_counting_iterator(graph_view.get_vertex_partition_first(comm_root_rank)),
+ thrust::make_counting_iterator(graph_view.get_vertex_partition_last(comm_root_rank)),
+ [matrix_partition] __device__(auto row) {
+ auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row);
+ return matrix_partition.get_local_degree(row_offset);
+ },
+ edge_t{0},
+ thrust::plus());
+
+ rmm::device_uvector tmp_major_vertices(num_edges, handle.get_stream());
+ rmm::device_uvector tmp_minor_keys(tmp_major_vertices.size(), handle.get_stream());
+ rmm::device_uvector tmp_key_aggregated_edge_weights(tmp_major_vertices.size(),
+ handle.get_stream());
+
+ if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) {
+ raft::grid_1d_thread_t update_grid(
+ graph_view.get_vertex_partition_size(comm_root_rank),
+ detail::copy_v_transform_reduce_key_aggregated_out_nbr_for_all_block_size,
+ handle.get_device_properties().maxGridSize[0]);
+
+ auto constexpr invalid_vertex = invalid_vertex_id::value;
+
+ // FIXME: This is highly inefficient for graphs with high-degree vertices. If we renumber
+ // vertices to insure that rows within a partition are sorted by their out-degree in
+ // decreasing order, we will apply this kernel only to low out-degree vertices.
+ detail::for_all_major_for_all_nbr_low_degree<<>>(
+ matrix_partition,
+ graph_view.get_vertex_partition_first(comm_root_rank),
+ graph_view.get_vertex_partition_last(comm_root_rank),
+ adj_matrix_col_key_first,
+ tmp_major_vertices.data(),
+ tmp_minor_keys.data(),
+ tmp_key_aggregated_edge_weights.data(),
+ invalid_vertex);
+ }
+
+ auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple(
+ tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin()));
+ auto last =
+ thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ triplet_first,
+ triplet_first + tmp_major_vertices.size(),
+ [] __device__(auto val) {
+ return thrust::get<0>(val) == invalid_vertex_id::value;
+ });
+ tmp_major_vertices.resize(thrust::distance(triplet_first, last), handle.get_stream());
+ tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream());
+ tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream());
+
+ if (GraphViewType::is_multi_gpu) {
+ auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned()
+ ? cugraph::partition_2d::key_naming_t().col_name()
+ : cugraph::partition_2d::key_naming_t().row_name());
+ auto const sub_comm_size = sub_comm.get_size();
+
+ triplet_first =
+ thrust::make_zip_iterator(thrust::make_tuple(tmp_major_vertices.begin(),
+ tmp_minor_keys.begin(),
+ tmp_key_aggregated_edge_weights.begin()));
+ rmm::device_uvector rx_major_vertices(0, handle.get_stream());
+ rmm::device_uvector rx_minor_keys(0, handle.get_stream());
+ rmm::device_uvector rx_key_aggregated_edge_weights(0, handle.get_stream());
+ std::forward_as_tuple(
+ std::tie(rx_major_vertices, rx_minor_keys, rx_key_aggregated_edge_weights), std::ignore) =
+ groupby_gpuid_and_shuffle_values(
+ sub_comm,
+ triplet_first,
+ triplet_first + tmp_major_vertices.size(),
+ [key_func = detail::compute_gpu_id_from_vertex_t{sub_comm_size}] __device__(
+ auto val) { return key_func(thrust::get<1>(val)); },
+ handle.get_stream());
+
+ tmp_major_vertices = std::move(rx_major_vertices);
+ tmp_minor_keys = std::move(rx_minor_keys);
+ tmp_key_aggregated_edge_weights = std::move(rx_key_aggregated_edge_weights);
+ }
+
+ auto tmp_e_op_result_buffer =
+ allocate_dataframe_buffer(tmp_major_vertices.size(), handle.get_stream());
+ auto tmp_e_op_result_buffer_first = get_dataframe_buffer_begin(tmp_e_op_result_buffer);
+
+ triplet_first = thrust::make_zip_iterator(thrust::make_tuple(
+ tmp_major_vertices.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_weights.begin()));
+ thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ triplet_first,
+ triplet_first + major_vertices.size(),
+ tmp_e_op_result_buffer_first,
+ [adj_matrix_row_value_input_first,
+ key_aggregated_e_op,
+ matrix_partition,
+ kv_map = kv_map_ptr->get_device_view()] __device__(auto val) {
+ auto major = thrust::get<0>(val);
+ auto key = thrust::get<1>(val);
+ auto w = thrust::get<2>(val);
+ return key_aggregated_e_op(
+ major,
+ key,
+ w,
+ *(adj_matrix_row_value_input_first +
+ matrix_partition.get_major_offset_from_major_nocheck(major)),
+ kv_map.find(key)->second);
+ });
+ tmp_minor_keys.resize(0, handle.get_stream());
+ tmp_key_aggregated_edge_weights.resize(0, handle.get_stream());
+ tmp_minor_keys.shrink_to_fit(handle.get_stream());
+ tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream());
+
+ if (GraphViewType::is_multi_gpu) {
+ auto& sub_comm = handle.get_subcomm(graph_view.is_hypergraph_partitioned()
+ ? cugraph::partition_2d::key_naming_t().col_name()
+ : cugraph::partition_2d::key_naming_t().row_name());
+ auto const sub_comm_rank = sub_comm.get_rank();
+ auto const sub_comm_size = sub_comm.get_size();
+
+ // FIXME: additional optimization is possible if reduce_op is a pure function (and reduce_op
+ // can be mapped to ncclRedOp_t).
+
+ auto rx_sizes =
+ host_scalar_gather(sub_comm, tmp_major_vertices.size(), i, handle.get_stream());
+ std::vector rx_displs(
+ static_cast(sub_comm_rank) == i ? sub_comm_size : int{0}, size_t{0});
+ if (static_cast(sub_comm_rank) == i) {
+ std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1);
+ }
+ rmm::device_uvector rx_major_vertices(
+ static_cast(sub_comm_rank) == i
+ ? std::accumulate(rx_sizes.begin(), rx_sizes.end(), size_t{0})
+ : size_t{0},
+ handle.get_stream());
+ auto rx_tmp_e_op_result_buffer =
+ allocate_dataframe_buffer(rx_major_vertices.size(), handle.get_stream());
+
+ device_gatherv(sub_comm,
+ tmp_major_vertices.data(),
+ rx_major_vertices.data(),
+ tmp_major_vertices.size(),
+ rx_sizes,
+ rx_displs,
+ i,
+ handle.get_stream());
+ device_gatherv(sub_comm,
+ tmp_e_op_result_buffer_first,
+ get_dataframe_buffer_begin(rx_tmp_e_op_result_buffer),
+ tmp_major_vertices.size(),
+ rx_sizes,
+ rx_displs,
+ i,
+ handle.get_stream());
+
+ if (static_cast(sub_comm_rank) == i) {
+ major_vertices = std::move(rx_major_vertices);
+ e_op_result_buffer = std::move(rx_tmp_e_op_result_buffer);
+ }
+ } else {
+ major_vertices = std::move(tmp_major_vertices);
+ e_op_result_buffer = std::move(tmp_e_op_result_buffer);
+ }
+ }
+
+ thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ vertex_value_output_first,
+ vertex_value_output_first + graph_view.get_number_of_local_vertices(),
+ T{});
+ thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ major_vertices.begin(),
+ major_vertices.end(),
+ get_dataframe_buffer_begin(e_op_result_buffer));
+
+ auto num_uniques = thrust::count_if(
+ rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ thrust::make_counting_iterator(size_t{0}),
+ thrust::make_counting_iterator(major_vertices.size()),
+ [major_vertices = major_vertices.data()] __device__(auto i) {
+ return ((i == 0) || (major_vertices[i] != major_vertices[i - 1])) ? true : false;
+ });
+ rmm::device_uvector unique_major_vertices(num_uniques, handle.get_stream());
+
+ auto major_vertex_first = thrust::make_transform_iterator(
+ thrust::make_counting_iterator(size_t{0}),
+ [major_vertices = major_vertices.data()] __device__(auto i) {
+ return ((i == 0) || (major_vertices[i] == major_vertices[i - 1]))
+ ? major_vertices[i]
+ : invalid_vertex_id::value;
+ });
+ thrust::copy_if(
+ major_vertex_first,
+ major_vertex_first + major_vertices.size(),
+ unique_major_vertices.begin(),
+ [] __device__(auto major) { return major != invalid_vertex_id::value; });
+ thrust::reduce_by_key(
+ rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ major_vertices.begin(),
+ major_vertices.end(),
+ get_dataframe_buffer_begin(e_op_result_buffer),
+ thrust::make_discard_iterator(),
+ thrust::make_permutation_iterator(
+ vertex_value_output_first,
+ thrust::make_transform_iterator(
+ major_vertices.begin(),
+ [vertex_partition = vertex_partition_device_t(graph_view)] __device__(
+ auto v) { return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); })),
+ reduce_op);
+
+ thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+ vertex_value_output_first,
+ vertex_value_output_first + graph_view.get_number_of_local_vertices(),
+ vertex_value_output_first,
+ [reduce_op, init] __device__(auto val) { return reduce_op(val, init); });
+}
+
+} // namespace experimental
+} // namespace cugraph
diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh
index 4f0f0a7a43e..63b31f9c44e 100644
--- a/cpp/include/patterns/count_if_e.cuh
+++ b/cpp/include/patterns/count_if_e.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -18,8 +18,8 @@
#include
#include
#include
-#include
#include
+#include
#include
#include
diff --git a/cpp/include/patterns/count_if_v.cuh b/cpp/include/patterns/count_if_v.cuh
index c90b259cdde..6b28cd7ae12 100644
--- a/cpp/include/patterns/count_if_v.cuh
+++ b/cpp/include/patterns/count_if_v.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@
#pragma once
#include
-#include
#include
+#include
#include
#include
diff --git a/cpp/include/patterns/reduce_op.cuh b/cpp/include/patterns/reduce_op.cuh
index e9011914292..d92d3352d08 100644
--- a/cpp/include/patterns/reduce_op.cuh
+++ b/cpp/include/patterns/reduce_op.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -29,6 +29,7 @@ struct any {
__host__ __device__ T operator()(T const& lhs, T const& rhs) const { return lhs; }
};
+// FIXME: thrust::minimum can replace this.
// reducing N elements (operator < should be defined between any two elements), the minimum element
// should be selected.
template
diff --git a/cpp/include/patterns/reduce_v.cuh b/cpp/include/patterns/reduce_v.cuh
index 12224dc55f4..b232d37b78d 100644
--- a/cpp/include/patterns/reduce_v.cuh
+++ b/cpp/include/patterns/reduce_v.cuh
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
@@ -16,8 +16,8 @@
#pragma once
#include
-#include
#include
+#include
#include
diff --git a/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh
new file mode 100644
index 00000000000..70b6dc92752
--- /dev/null
+++ b/cpp/include/patterns/transform_reduce_by_adj_matrix_row_col_key_e.cuh
@@ -0,0 +1,453 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+#include
+#include
+#include