diff --git a/CHANGELOG.md b/CHANGELOG.md
index e58f3b9aa07..0011b99fbf3 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,10 +1,6 @@
 # cuGraph 0.19.0 (Date TBD)
 
-## New Features
-
-## Improvements
-
-## Bug Fixes
+Please see https://github.com/rapidsai/cugraph/releases/tag/v0.19.0a for the latest changes to this development branch.
 
 # cuGraph 0.18.0 (24 Feb 2021)
 
diff --git a/README.md b/README.md
index 62059e9c7b6..77377fe2bbc 100644
--- a/README.md
+++ b/README.md
@@ -86,7 +86,6 @@ As of Release 0.18 - including 0.18 nightly
 |              | Renumbering                            | Single-GPU   | multiple columns, any data type  |
 |              | Symmetrize                             | Multi-GPU    |                     |
 | Other        |                                        |              |                     |
-|              | Hungarian Algorithm                    | Single-GPU   |                     |
 |              | Minimum Spanning Tree                  | Single-GPU   |                     |
 |              | Maximum Spanning Tree                  | Single-GPU   |                     |
 |  |  |
diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh
index 0fef7b62f8d..7242b4a11f5 100755
--- a/ci/gpu/build.sh
+++ b/ci/gpu/build.sh
@@ -90,8 +90,25 @@ conda list --show-channel-urls
 ################################################################################
 
 if [[ -z "$PROJECT_FLASH" || "$PROJECT_FLASH" == "0" ]]; then
-  gpuci_logger "Build from source"
-  $WORKSPACE/build.sh -v clean libcugraph cugraph
+    gpuci_logger "Build from source"
+    $WORKSPACE/build.sh -v clean libcugraph cugraph
+else
+    export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build"
+
+    # Faiss patch
+    echo "Update libcugraph.so"
+    cd $LIBCUGRAPH_BUILD_DIR
+    chrpath -d libcugraph.so
+    patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so
+
+    CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"`
+    CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension
+    CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install
+    echo "Installing $CONDA_FILE"
+    conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE"
+
+    echo "Build cugraph..."
+    $WORKSPACE/build.sh cugraph
 fi
 
 ################################################################################
diff --git a/ci/gpu/notebook_list.py b/ci/gpu/notebook_list.py
index bb54913ac8d..8748c434006 100644
--- a/ci/gpu/notebook_list.py
+++ b/ci/gpu/notebook_list.py
@@ -24,7 +24,9 @@
 pascal = False
 
 device = cuda.get_current_device()
-cc = getattr(device, 'COMPUTE_CAPABILITY')
+# check for the attribute using both pre and post numba 0.53 names
+cc = getattr(device, 'COMPUTE_CAPABILITY', None) or \
+     getattr(device, 'compute_capability')
 if (cc[0] < 7):
     pascal = True
 
diff --git a/ci/test.sh b/ci/test.sh
index b0134e97246..58cbb950f73 100755
--- a/ci/test.sh
+++ b/ci/test.sh
@@ -61,30 +61,6 @@ else
     cd $WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build
 fi
 
-# FIXME: if possible, any install and build steps should be moved outside this
-# script since a failing install/build step is treated as a failing test command
-# and will not stop the script. This script is also only expected to run tests
-# in a preconfigured environment, and install/build steps are unexpected side
-# effects.
-if [[ "$PROJECT_FLASH" == "1" ]]; then
-    export LIBCUGRAPH_BUILD_DIR="$WORKSPACE/ci/artifacts/cugraph/cpu/conda_work/cpp/build"
-
-    # Faiss patch
-    echo "Update libcugraph.so"
-    cd $LIBCUGRAPH_BUILD_DIR
-    chrpath -d libcugraph.so
-    patchelf --replace-needed `patchelf --print-needed libcugraph.so | grep faiss` libfaiss.so libcugraph.so
-
-    CONDA_FILE=`find $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ -name "libcugraph*.tar.bz2"`
-    CONDA_FILE=`basename "$CONDA_FILE" .tar.bz2` #get filename without extension
-    CONDA_FILE=${CONDA_FILE//-/=} #convert to conda install
-    echo "Installing $CONDA_FILE"
-    conda install -c $WORKSPACE/ci/artifacts/cugraph/cpu/conda-bld/ "$CONDA_FILE"
-
-    echo "Build cugraph..."
-    $WORKSPACE/build.sh cugraph
-fi
-
 # Do not abort the script on error from this point on. This allows all tests to
 # run regardless of pass/fail, but relies on the ERR trap above to manage the
 # EXITCODE for the script.
diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml
index 255366b0a82..f26c3dd45d9 100644
--- a/conda/environments/cugraph_dev_cuda10.1.yml
+++ b/conda/environments/cugraph_dev_cuda10.1.yml
@@ -14,7 +14,7 @@ dependencies:
 - distributed>=2.12.0
 - dask-cuda=0.19*
 - dask-cudf=0.19*
-- nccl>=2.7
+- nccl>=2.8.4
 - ucx-py=0.19*
 - ucx-proc=*=gpu
 - scipy
@@ -29,7 +29,7 @@ dependencies:
 - boost
 - cython>=0.29,<0.30
 - pytest
-- libfaiss=1.6.3
+- libfaiss=1.7.0
 - faiss-proc=*=cuda
 - scikit-learn>=0.23.1
 - colorcet
diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml
index e64d7c77b7d..2848cc49dc7 100644
--- a/conda/environments/cugraph_dev_cuda10.2.yml
+++ b/conda/environments/cugraph_dev_cuda10.2.yml
@@ -14,7 +14,7 @@ dependencies:
 - distributed>=2.12.0
 - dask-cuda=0.19*
 - dask-cudf=0.19*
-- nccl>=2.7
+- nccl>=2.8.4
 - ucx-py=0.19*
 - ucx-proc=*=gpu
 - scipy
@@ -29,7 +29,7 @@ dependencies:
 - boost
 - cython>=0.29,<0.30
 - pytest
-- libfaiss=1.6.3
+- libfaiss=1.7.0
 - faiss-proc=*=cuda
 - scikit-learn>=0.23.1
 - colorcet
diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml
index 1f05e4762ef..82e8b409d13 100644
--- a/conda/environments/cugraph_dev_cuda11.0.yml
+++ b/conda/environments/cugraph_dev_cuda11.0.yml
@@ -14,7 +14,7 @@ dependencies:
 - distributed>=2.12.0
 - dask-cuda=0.19*
 - dask-cudf=0.19*
-- nccl>=2.7
+- nccl>=2.8.4
 - ucx-py=0.19*
 - ucx-proc=*=gpu
 - scipy
@@ -29,7 +29,7 @@ dependencies:
 - boost
 - cython>=0.29,<0.30
 - pytest
-- libfaiss=1.6.3
+- libfaiss=1.7.0
 - faiss-proc=*=cuda
 - scikit-learn>=0.23.1
 - colorcet
diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml
index 90f5bed942a..e714b61d774 100644
--- a/conda/recipes/cugraph/meta.yaml
+++ b/conda/recipes/cugraph/meta.yaml
@@ -1,4 +1,4 @@
-# Copyright (c) 2018, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 
 # Usage:
 #   conda build -c nvidia -c rapidsai -c conda-forge -c defaults .
@@ -37,7 +37,7 @@ requirements:
     - dask-cuda {{ minor_version }}
     - dask>=2.12.0
     - distributed>=2.12.0
-    - nccl>=2.7
+    - nccl>=2.8.4
     - ucx-py {{ minor_version }}
     - ucx-proc=*=gpu
 
diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml
index 8f7495eab3c..2602b2d8608 100644
--- a/conda/recipes/libcugraph/meta.yaml
+++ b/conda/recipes/libcugraph/meta.yaml
@@ -32,30 +32,21 @@ build:
 requirements:
   build:
     - cmake>=3.12.4
-    - libcudf={{ minor_version }}
     - cudatoolkit {{ cuda_version }}.*
+    - librmm {{ minor_version }}.*
     - boost-cpp>=1.66
-    - libcypher-parser
-    - nccl>=2.7
-    - ucx-py {{ minor_version }}
+    - nccl>=2.8.4
     - ucx-proc=*=gpu
     - gtest
-    - faiss-proc=*=cuda
-    - libfaiss=1.6.3
     - gmock
+    - faiss-proc=*=cuda
+    - conda-forge::libfaiss=1.7.0
   run:
-    - libcudf={{ minor_version }}
     - {{ pin_compatible('cudatoolkit', max_pin='x.x') }}
-    - nccl>=2.7
-    - ucx-py {{ minor_version }}
+    - nccl>=2.8.4
     - ucx-proc=*=gpu
     - faiss-proc=*=cuda
-    - libfaiss=1.6.3
-
-#test:
-#  commands:
-#    - test -f $PREFIX/include/cugraph.h
-
+    - conda-forge::libfaiss=1.7.0
 
 about:
   home: http://rapids.ai/
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index d211fe9ed5a..26a8f98e265 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -113,7 +113,6 @@ set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda --expt-relaxed-
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings")
 set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable")
 
-
 # Option to enable line info in CUDA device compilation to allow introspection when profiling /
 # memchecking
 option(CMAKE_CUDA_LINEINFO "Enable the -lineinfo option for nvcc (useful for cuda-memcheck / profiler" OFF)
@@ -298,7 +297,8 @@ else(DEFINED ENV{RAFT_PATH})
   FetchContent_Declare(
     raft
     GIT_REPOSITORY    https://github.com/rapidsai/raft.git
-    GIT_TAG           a3461b201ea1c9f61571f1927274f739e775d2d2
+    GIT_TAG           6455e05b3889db2b495cf3189b33c2b07bfbebf2
+
     SOURCE_SUBDIR     raft
   )
 
@@ -317,9 +317,9 @@ endif(DEFINED ENV{RAFT_PATH})
 
 # https://cmake.org/cmake/help/v3.0/module/ExternalProject.html
 
-# FIXME: gunrock is the only external package still using ExternalProject
-# instead of FetchContent. Consider migrating to FetchContent soon (this may
-# require updates to the gunrock cmake files to support this).
+# FIXME: gunrock is still using ExternalProject instead of
+# FetchContent. Consider migrating to FetchContent soon (this may require
+# updates to the gunrock cmake files to support this).
 
 include(ExternalProject)
 
@@ -360,31 +360,32 @@ if(BUILD_STATIC_FAISS)
     "Path to FAISS source directory")
   ExternalProject_Add(faiss
     GIT_REPOSITORY    https://github.com/facebookresearch/faiss.git
-    GIT_TAG           a5b850dec6f1cd6c88ab467bfd5e87b0cac2e41d
+    GIT_TAG           7c2d2388a492d65fdda934c7e74ae87acaeed066
     CONFIGURE_COMMAND LIBS=-pthread
                       CPPFLAGS=-w
                       LDFLAGS=-L${CMAKE_INSTALL_PREFIX}/lib
-                              ${CMAKE_CURRENT_BINARY_DIR}/faiss/src/faiss/configure
-	                      --prefix=${CMAKE_CURRENT_BINARY_DIR}/faiss
-	                      --with-blas=${BLAS_LIBRARIES}
-	                      --with-cuda=${CUDA_TOOLKIT_ROOT_DIR}
-	                      --with-cuda-arch=${FAISS_GPU_ARCHS}
-	                      -v
+                        cmake -B build .
+                        -DCMAKE_BUILD_TYPE=Release
+                        -DBUILD_TESTING=OFF
+                        -DFAISS_ENABLE_PYTHON=OFF
+                        -DBUILD_SHARED_LIBS=OFF
+                        -DFAISS_ENABLE_GPU=ON
+                        -DCUDAToolkit_ROOT=${CUDA_TOOLKIT_ROOT_DIR}
+                        -DCUDA_ARCHITECTURES=${FAISS_GPU_ARCHS}
+                        -DBLAS_LIBRARIES=${BLAS_LIBRARIES}
     PREFIX            ${FAISS_DIR}
-    BUILD_COMMAND     make -j${PARALLEL_LEVEL} VERBOSE=1
-    BUILD_BYPRODUCTS  ${FAISS_DIR}/lib/libfaiss.a
+    BUILD_COMMAND     make -C build -j${PARALLEL_LEVEL} VERBOSE=1
+    BUILD_BYPRODUCTS  ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a
     BUILD_ALWAYS      1
-    INSTALL_COMMAND   make -s install > /dev/null
+    INSTALL_COMMAND   ""
     UPDATE_COMMAND    ""
-    BUILD_IN_SOURCE   1
-    PATCH_COMMAND     patch -p1 -N < ${CMAKE_CURRENT_SOURCE_DIR}/cmake/faiss_cuda11.patch || true)
+    BUILD_IN_SOURCE   1)
 
   ExternalProject_Get_Property(faiss install_dir)
   add_library(FAISS::FAISS STATIC IMPORTED)
-  add_dependencies(FAISS::FAISS faiss)
   set_property(TARGET FAISS::FAISS PROPERTY
-    IMPORTED_LOCATION ${FAISS_DIR}/lib/libfaiss.a)
-  set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src")
+    IMPORTED_LOCATION ${FAISS_DIR}/src/faiss/build/faiss/libfaiss.a)
+  set(FAISS_INCLUDE_DIRS "${FAISS_DIR}/src/faiss")
 else()
   set(FAISS_INSTALL_DIR ENV{FAISS_ROOT})
   find_package(FAISS REQUIRED)
@@ -420,6 +421,7 @@ add_library(cugraph SHARED
     src/components/connectivity.cu
     src/centrality/katz_centrality.cu
     src/centrality/betweenness_centrality.cu
+    src/experimental/generate_rmat_edgelist.cu
     src/experimental/graph.cu
     src/experimental/graph_view.cu
     src/experimental/coarsen_graph.cu
@@ -445,6 +447,10 @@ target_link_directories(cugraph
 #
 add_dependencies(cugraph gunrock_ext)
 
+# Per-thread default stream option see https://docs.nvidia.com/cuda/cuda-runtime-api/stream-sync-behavior.html
+# The per-thread default stream does not synchronize with other streams
+target_compile_definitions(cugraph PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM)
+
 ###################################################################################################
 # - include paths ---------------------------------------------------------------------------------
 target_include_directories(cugraph
diff --git a/cpp/docs/DEVELOPER_GUIDE.md b/cpp/docs/DEVELOPER_GUIDE.md
new file mode 100644
index 00000000000..ba24d68aca5
--- /dev/null
+++ b/cpp/docs/DEVELOPER_GUIDE.md
@@ -0,0 +1,277 @@
+# cuGraph C++ Developer Guide
+
+This document serves as a guide for contributors to cuGraph C++ code. Developers should also refer 
+to these additional files for further documentation of cuGraph best practices.
+
+* [Documentation Guide](TODO) for guidelines on documenting cuGraph code.
+* [Testing Guide](TODO) for guidelines on writing unit tests.
+* [Benchmarking Guide](TODO) for guidelines on writing unit benchmarks.
+
+# Overview
+
+cuGraph includes a C++ library that provides GPU-accelerated graph algorithms for processing 
+sparse graphs.
+
+## Lexicon
+
+This section defines terminology used within cuGraph
+
+### COO
+
+COOrdinate format is one of the standard formats for representing graph data.  In COO format the
+graph is represented as an array of source vertex ids, an array of destination vertex ids, and an
+optional array of edge weights.  Edge i is identified by source_vertex_id[i], destination_vertex_id[i]
+and weight[i].
+
+### MORE
+
+# Directory Structure and File Naming
+
+External/public cuGraph APIs are grouped based on functionality into an appropriately titled 
+header file  in `cugraph/cpp/include/`. For example, `cugraph/cpp/include/graph.hpp` 
+contains the definition of the (legacy) graph objects. Note the  `.hpp` 
+file extension used to indicate a C++ header file.
+
+Header files should use the `#pragma once` include guard. 
+
+## File extensions
+
+- `.hpp` : C++ header files
+- `.cpp` : C++ source files
+- `.cu`  : CUDA C++ source files
+- `.cuh` : Headers containing CUDA device code
+
+Header files and source files should use `.hpp` and `.cpp` extensions unless they must
+be compiled by nvcc.  `.cu` and `.cuh` files are more expensive to compile, so we want
+to minimize the use of these files to only when necessary.  A good indicator of the need
+to use a `.cu` or `.cuh` file is the inclusion of `__device__` and other
+symbols that are only recognized by `nvcc`. Another indicator is Thrust
+algorithm APIs with a device execution policy (always `rmm::exec_policy` in cuGraph).
+
+## Code and Documentation Style and Formatting
+
+cuGraph code uses [snake_case](https://en.wikipedia.org/wiki/Snake_case) for all names except in a 
+few cases: unit tests and test case names may use Pascal case, aka 
+[UpperCamelCase](https://en.wikipedia.org/wiki/Camel_case). We do not use
+[Hungarian notation](https://en.wikipedia.org/wiki/Hungarian_notation), except for the following examples:
+ * device data variables should be prefaced by d_ if it makes the intent clearer
+ * host data variables should be prefaced by h_ if it makes the intent clearer
+ * template parameters defining a type should be suffixed with _t
+ * private member variables are typically suffixed with an underscore
+
+```c++
+template <typename graph_t>
+void algorithm_function(graph_t const &g)
+{
+  ...
+}
+
+template <typename vertex_t>
+class utility_class 
+{
+  ...
+ private:
+  vertex_t num_vertices_{};
+}
+```
+
+C++ formatting is enforced using `clang-format`. You should configure `clang-format` on your 
+machine to use the `cugraph/cpp/.clang-format` configuration file, and run `clang-format` on all 
+changed code before committing it. The easiest way to do this is to configure your editor to 
+"format on save".
+
+Aspects of code style not discussed in this document and not automatically enforceable are typically
+caught during code review, or not enforced.
+
+### C++ Guidelines
+
+In general, we recommend following 
+[C++ Core Guidelines](https://isocpp.github.io/CppCoreGuidelines/CppCoreGuidelines). We also 
+recommend watching Sean Parent's [C++ Seasoning talk](https://www.youtube.com/watch?v=W2tWOdzgXHA), 
+and we try to follow his rules: "No raw loops. No raw pointers. No raw synchronization primitives." 
+
+ * Prefer algorithms from STL and Thrust to raw loops.
+ * Prefer cugraph and RMM to raw pointers and raw memory allocation.
+
+Documentation is discussed in the [Documentation Guide](TODO).
+
+### Includes
+
+The following guidelines apply to organizing `#include` lines.
+
+ * Group includes by library (e.g. cuGraph, RMM, Thrust, STL). `clang-format` will respect the 
+   groupings and sort the individual includes within a group lexicographically.
+ * Separate groups by a blank line.
+ * Order the groups from "nearest" to "farthest". In other words, local includes, then includes 
+   from other RAPIDS libraries, then includes from related libraries, like `<thrust/...>`, then 
+   includes from dependencies installed with cuGraph, and then standard headers (for example `<string>`, 
+   `<iostream>`).
+ * Use <> instead of "" unless the header is in the same directory as the source file.
+ * Tools like `clangd` often auto-insert includes when they can, but they usually get the grouping
+   and brackets wrong.
+ * Always check that includes are only necessary for the file in which they are included. 
+   Try to avoid excessive including especially in header files. Double check this when you remove 
+   code.
+
+# cuGraph Data Structures
+
+Application data in cuGraph is contained in graph objects, but there are a variety of other
+data structures you will use when developing cuGraph code.
+
+## Views and Ownership
+
+Resource ownership is an essential concept in cuGraph. In short, an "owning" object owns a 
+resource (such as device memory). It acquires that resource during construction and releases the 
+resource in destruction ([RAII](https://en.cppreference.com/w/cpp/language/raii)). A "non-owning"
+object does not own resources. Any class in cuGraph with the `*_view` suffix is non-owning.
+
+## `rmm::device_memory_resource`<a name="memory_resource"></a>
+
+cuGraph allocates all device memory via RMM memory resources (MR). See the 
+[RMM documentation](https://github.com/rapidsai/rmm/blob/main/README.md) for details.
+
+## Streams
+
+CUDA streams are not yet exposed in external cuGraph APIs.
+
+We are currently investigating the best technique for exposing this.
+
+### Memory Management
+
+cuGraph code generally eschews raw pointers and direct memory allocation. Use RMM classes built to
+use `device_memory_resource`(*)s for device memory allocation with automated lifetime management.
+
+#### `rmm::device_buffer`
+Allocates a specified number of bytes of untyped, uninitialized device memory using a 
+`device_memory_resource`. If no resource is explicitly provided, uses 
+`rmm::mr::get_current_device_resource()`. 
+
+`rmm::device_buffer` is copyable and movable. A copy performs a deep copy of the `device_buffer`'s 
+device memory, whereas a move moves ownership of the device memory from one `device_buffer` to 
+another.
+
+```c++
+// Allocates at least 100 bytes of uninitialized device memory 
+// using the specified resource and stream
+rmm::device_buffer buff(100, stream, mr); 
+void * raw_data = buff.data(); // Raw pointer to underlying device memory
+
+rmm::device_buffer copy(buff); // Deep copies `buff` into `copy`
+rmm::device_buffer moved_to(std::move(buff)); // Moves contents of `buff` into `moved_to`
+
+custom_memory_resource *mr...;
+rmm::device_buffer custom_buff(100, mr); // Allocates 100 bytes from the custom_memory_resource
+```
+
+#### `rmm::device_uvector<T>`
+
+Similar to a `rmm::device_vector`, allocates a contiguous set of elements in device memory but with key 
+differences:
+- As an optimization, elements are uninitialized and no synchronization occurs at construction.
+This limits the types `T` to trivially copyable types.
+- All operations are stream ordered (i.e., they accept a `cuda_stream_view` specifying the stream 
+on which the operation is performed).
+
+## Namespaces
+
+### External
+All public cuGraph APIs should be placed in the `cugraph` namespace. Example:
+```c++
+namespace cugraph{
+   void public_function(...);
+} // namespace cugraph
+```
+
+### Internal
+
+Many functions are not meant for public use, so place them in either the `detail` or an *anonymous* 
+namespace, depending on the situation.
+
+#### `detail` namespace
+
+Functions or objects that will be used across *multiple* translation units (i.e., source files), 
+should be exposed in an internal header file and placed in the `detail` namespace. Example:
+
+```c++
+// some_utilities.hpp
+namespace cugraph{
+namespace detail{
+void reusable_helper_function(...);
+} // namespace detail
+} // namespace cugraph
+```
+
+#### Anonymous namespace
+
+Functions or objects that will only be used in a *single* translation unit should be defined in an 
+*anonymous* namespace in the source file where it is used. Example:
+
+```c++
+// some_file.cpp
+namespace{
+void isolated_helper_function(...);
+} // anonymous namespace
+```
+
+[**Anonymous namespaces should *never* be used in a header file.**](https://wiki.sei.cmu.edu/confluence/display/cplusplus/DCL59-CPP.+Do+not+define+an+unnamed+namespace+in+a+header+file) 
+
+# Error Handling
+
+cuGraph follows conventions (and provides utilities) enforcing compile-time and run-time 
+conditions and detecting and handling CUDA errors. Communication of errors is always via C++ 
+exceptions.
+
+## Runtime Conditions
+
+Use the `CUGRAPH_EXPECTS` macro to enforce runtime conditions necessary for correct execution.
+
+Example usage:
+```c++
+CUGRAPH_EXPECTS(lhs.type() == rhs.type(), "Column type mismatch");
+```
+
+The first argument is the conditional expression expected to resolve to  `true`  under normal 
+conditions. If the conditional evaluates to  `false`, then an error has occurred and an instance of  `cugraph::logic_error` is thrown. The second argument to  `CUGRAPH_EXPECTS` is a short description of the 
+error that has occurred and is used for the exception's `what()` message. 
+
+There are times where a particular code path, if reached, should indicate an error no matter what. 
+For example, often the `default` case of a `switch` statement represents an invalid alternative. 
+Use the `CUGRAPH_FAIL` macro for such errors. This is effectively the same as calling 
+`CUGRAPH_EXPECTS(false, reason)`.
+
+Example:
+```c++
+CUGRAPH_FAIL("This code path should not be reached.");
+```
+
+### CUDA Error Checking
+
+Use the `CUDA_TRY` macro to check for the successful completion of CUDA runtime API functions. This 
+macro throws a `cugraph::cuda_error` exception if the CUDA API return value is not `cudaSuccess`. The 
+thrown exception includes a description of the CUDA error code in it's  `what()`  message.
+
+Example:
+
+```c++
+CUDA_TRY( cudaMemcpy(&dst, &src, num_bytes) );
+```
+
+## Compile-Time Conditions
+
+Use `static_assert` to enforce compile-time conditions. For example,
+
+```c++
+template <typename T>
+void trivial_types_only(T t){
+   static_assert(std::is_trivial<T>::value, "This function requires a trivial type.");
+...
+}
+```
+
+# Data Types
+
+TBD
+
+# Type Dispatcher
+
+TBD
diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp
index c666bce23ad..c3a4f3ec985 100644
--- a/cpp/include/algorithms.hpp
+++ b/cpp/include/algorithms.hpp
@@ -1100,9 +1100,9 @@ void sssp(raft::handle_t const &handle,
 template <typename vertex_t, typename edge_t, typename weight_t, typename result_t, bool multi_gpu>
 void pagerank(raft::handle_t const &handle,
               graph_view_t<vertex_t, edge_t, weight_t, true, multi_gpu> const &graph_view,
-              weight_t *adj_matrix_row_out_weight_sums,
-              vertex_t *personalization_vertices,
-              result_t *personalization_values,
+              weight_t const *adj_matrix_row_out_weight_sums,
+              vertex_t const *personalization_vertices,
+              result_t const *personalization_values,
               vertex_t personalization_vector_size,
               result_t *pageranks,
               result_t alpha,
@@ -1148,7 +1148,7 @@ void pagerank(raft::handle_t const &handle,
 template <typename vertex_t, typename edge_t, typename weight_t, typename result_t, bool multi_gpu>
 void katz_centrality(raft::handle_t const &handle,
                      graph_view_t<vertex_t, edge_t, weight_t, true, multi_gpu> const &graph_view,
-                     result_t *betas,
+                     result_t const *betas,
                      result_t *katz_centralities,
                      result_t alpha,
                      result_t beta,
@@ -1167,7 +1167,7 @@ void katz_centrality(raft::handle_t const &handle,
  * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
  * or multi-GPU (true).
  * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
- * handles to various CUDA libraries) to run graph algorithms.
+ * handles to various CUDA libraries) to run graph algorithms. Must have at least one worker stream.
  * @param graph_view Graph view object of, we extract induced egonet subgraphs from @p graph_view.
  * @param source_vertex Pointer to egonet center vertices (size == @p n_subgraphs).
  * @param n_subgraphs Number of induced EgoNet subgraphs to extract (ie. number of elements in @p
diff --git a/cpp/include/compute_partition.cuh b/cpp/include/compute_partition.cuh
index c81a6237b31..5c03b0971f2 100644
--- a/cpp/include/compute_partition.cuh
+++ b/cpp/include/compute_partition.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -39,27 +39,32 @@ class compute_partition_t {
   using graph_view_t = graph_view_type;
   using vertex_t     = typename graph_view_type::vertex_type;
 
-  compute_partition_t(graph_view_t const &graph_view)
+  compute_partition_t(raft::handle_t const &handle, graph_view_t const &graph_view)
+    : vertex_partition_offsets_v_(0, handle.get_stream())
   {
-    init<graph_view_t::is_multi_gpu>(graph_view);
+    init<graph_view_t::is_multi_gpu>(handle, graph_view);
   }
 
  private:
   template <bool is_multi_gpu, typename std::enable_if_t<!is_multi_gpu> * = nullptr>
-  void init(graph_view_t const &graph_view)
+  void init(raft::handle_t const &handle, graph_view_t const &graph_view)
   {
   }
 
   template <bool is_multi_gpu, typename std::enable_if_t<is_multi_gpu> * = nullptr>
-  void init(graph_view_t const &graph_view)
+  void init(raft::handle_t const &handle, graph_view_t const &graph_view)
   {
     auto partition = graph_view.get_partition();
     row_size_      = partition.get_row_size();
     col_size_      = partition.get_col_size();
     size_          = row_size_ * col_size_;
 
-    vertex_partition_offsets_v_.resize(size_ + 1);
-    vertex_partition_offsets_v_ = partition.get_vertex_partition_offsets();
+    vertex_partition_offsets_v_.resize(size_ + 1, handle.get_stream());
+    auto vertex_partition_offsets = partition.get_vertex_partition_offsets();
+    raft::update_device(vertex_partition_offsets_v_.data(),
+                        vertex_partition_offsets.data(),
+                        vertex_partition_offsets.size(),
+                        handle.get_stream());
   }
 
  public:
@@ -166,7 +171,7 @@ class compute_partition_t {
    */
   vertex_device_view_t vertex_device_view() const
   {
-    return vertex_device_view_t(vertex_partition_offsets_v_.data().get(), size_);
+    return vertex_device_view_t(vertex_partition_offsets_v_.data(), size_);
   }
 
   /**
@@ -176,12 +181,11 @@ class compute_partition_t {
    */
   edge_device_view_t edge_device_view() const
   {
-    return edge_device_view_t(
-      vertex_partition_offsets_v_.data().get(), row_size_, col_size_, size_);
+    return edge_device_view_t(vertex_partition_offsets_v_.data(), row_size_, col_size_, size_);
   }
 
  private:
-  rmm::device_vector<vertex_t> vertex_partition_offsets_v_{};
+  rmm::device_uvector<vertex_t> vertex_partition_offsets_v_;
   int row_size_{1};
   int col_size_{1};
   int size_{1};
diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp
index cc21f7c5013..6a10256e6f4 100644
--- a/cpp/include/experimental/graph.hpp
+++ b/cpp/include/experimental/graph.hpp
@@ -61,6 +61,8 @@ class graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enab
   static constexpr bool is_adj_matrix_transposed = store_transposed;
   static constexpr bool is_multi_gpu             = multi_gpu;
 
+  graph_t(raft::handle_t const &handle) : detail::graph_base_t<vertex_t, edge_t, weight_t>() {}
+
   graph_t(raft::handle_t const &handle,
           std::vector<edgelist_t<vertex_t, edge_t, weight_t>> const &edgelists,
           partition_t<vertex_t> const &partition,
@@ -123,6 +125,12 @@ class graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enab
   static constexpr bool is_adj_matrix_transposed = store_transposed;
   static constexpr bool is_multi_gpu             = multi_gpu;
 
+  graph_t(raft::handle_t const &handle)
+    : detail::graph_base_t<vertex_t, edge_t, weight_t>(),
+      offsets_(0, handle.get_stream()),
+      indices_(0, handle.get_stream()),
+      weights_(0, handle.get_stream()){};
+
   graph_t(raft::handle_t const &handle,
           edgelist_t<vertex_t, edge_t, weight_t> const &edgelist,
           vertex_t number_of_vertices,
diff --git a/cpp/include/experimental/graph_generator.hpp b/cpp/include/experimental/graph_generator.hpp
new file mode 100644
index 00000000000..b8495ed7581
--- /dev/null
+++ b/cpp/include/experimental/graph_generator.hpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <raft/handle.hpp>
+#include <rmm/device_uvector.hpp>
+
+#include <cstdint>
+#include <tuple>
+
+namespace cugraph {
+namespace experimental {
+
+/**
+ * @brief generate an edge list for an R-mat graph.
+ *
+ * This function allows multi-edges and self-loops similar to the Graph 500 reference
+ * implementation.
+ *
+ * @p scramble_vertex_ids needs to be set to `true` to generate a graph conforming to the Graph 500
+ * specification (note that scrambling does not affect cuGraph's graph construction performance, so
+ * this is generally unnecessary). If `edge_factor` is given (e.g. Graph 500), set @p num_edges to
+ * (size_t{1} << @p scale) * `edge_factor`. To generate an undirected graph, set @p b == @p c and @p
+ * clip_and_flip = true. All the resulting edges will be placed in the lower triangular part
+ * (inculding the diagonal) of the graph adjacency matrix.
+ *
+ * For multi-GPU generation with `P` GPUs, @p seed should be set to different values in different
+ * GPUs to avoid every GPU generating the same set of edges. @p num_edges should be adjusted as
+ * well; e.g. assuming `edge_factor` is given, set @p num_edges = (size_t{1} << @p scale) *
+ * `edge_factor` / `P` + (rank < (((size_t{1} << @p scale) * `edge_factor`) % P) ? 1 : 0).
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param scale Scale factor to set the number of verties in the graph. Vertex IDs have values in
+ * [0, V), where V = 1 << @p scale.
+ * @param num_edges Number of edges to generate.
+ * @param a a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
+ * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
+ * than 1.0.
+ * @param b a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
+ * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
+ * than 1.0.
+ * @param c a, b, c, d (= 1.0 - (a + b + c)) in the R-mat graph generator (vist https://graph500.org
+ * for additional details). a, b, c, d should be non-negative and a + b + c should be no larger
+ * than 1.0.
+ * @param seed Seed value for the random number generator.
+ * @param clip_and_flip Flag controlling whether to generate edges only in the lower triangular part
+ * (including the diagonal) of the graph adjacency matrix (if set to `true`) or not (if set to
+ * `false`).
+ * @param scramble_vertex_ids Flag controlling whether to scramble vertex ID bits (if set to `true`)
+ * or not (if set to `false`); scrambling vertx ID bits breaks correlation between vertex ID values
+ * and vertex degrees. The scramble code here follows the algorithm in the Graph 500 reference
+ * implementation version 3.0.0.
+ * @return std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> A tuple of
+ * rmm::device_uvector objects for edge source vertex IDs and edge destination vertex IDs.
+ */
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> generate_rmat_edgelist(
+  raft::handle_t const& handle,
+  size_t scale,
+  size_t edge_factor       = 16,
+  double a                 = 0.57,
+  double b                 = 0.19,
+  double c                 = 0.19,
+  uint64_t seed            = 0,
+  bool clip_and_flip       = false,
+  bool scramble_vertex_ids = false);
+
+}  // namespace experimental
+}  // namespace cugraph
diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp
index 7598841fc1a..5d3d09bb087 100644
--- a/cpp/include/experimental/graph_view.hpp
+++ b/cpp/include/experimental/graph_view.hpp
@@ -82,6 +82,8 @@ namespace experimental {
 template <typename vertex_t>
 class partition_t {
  public:
+  partition_t() = default;
+
   partition_t(std::vector<vertex_t> const& vertex_partition_offsets,
               bool hypergraph_partitioned,
               int row_comm_size,
@@ -247,6 +249,8 @@ size_t constexpr num_segments_per_vertex_partition{3};
 template <typename vertex_t, typename edge_t, typename weight_t>
 class graph_base_t {
  public:
+  graph_base_t() = default;
+
   graph_base_t(raft::handle_t const& handle,
                vertex_t number_of_vertices,
                edge_t number_of_edges,
diff --git a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh
index 8490df1d17d..11cf2cb1137 100644
--- a/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh
+++ b/cpp/include/patterns/copy_v_transform_reduce_key_aggregated_out_nbr.cuh
@@ -88,16 +88,17 @@ __global__ void for_all_major_for_all_nbr_low_degree(
       // in-place reduce_by_key
       vertex_t key_idx{0};
       key_aggregated_edge_weights[local_offset + key_idx] =
-        weights != nullptr ? weights[0] : weight_t{1.0};
+        weights != nullptr ? key_aggregated_edge_weights[local_offset] : weight_t{1.0};
+
       for (edge_t i = 1; i < local_degree; ++i) {
         if (minor_keys[local_offset + i] == minor_keys[local_offset + key_idx]) {
           key_aggregated_edge_weights[local_offset + key_idx] +=
-            weights != nullptr ? weights[i] : weight_t{1.0};
+            weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0};
         } else {
           ++key_idx;
           minor_keys[local_offset + key_idx] = minor_keys[local_offset + i];
           key_aggregated_edge_weights[local_offset + key_idx] =
-            weights != nullptr ? weights[i] : weight_t{1.0};
+            weights != nullptr ? key_aggregated_edge_weights[local_offset + i] : weight_t{1.0};
         }
       }
       thrust::fill(thrust::seq,
@@ -170,6 +171,7 @@ __global__ void for_all_major_for_all_nbr_low_degree(
 template <typename GraphViewType,
           typename AdjMatrixRowValueInputIterator,
           typename VertexIterator,
+          typename VertexIterator2,
           typename ValueIterator,
           typename KeyAggregatedEdgeOp,
           typename ReduceOp,
@@ -180,8 +182,8 @@ void copy_v_transform_reduce_key_aggregated_out_nbr(
   GraphViewType const& graph_view,
   AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first,
   VertexIterator adj_matrix_col_key_first,
-  VertexIterator map_key_first,
-  VertexIterator map_key_last,
+  VertexIterator2 map_key_first,
+  VertexIterator2 map_key_last,
   ValueIterator map_value_first,
   KeyAggregatedEdgeOp key_aggregated_e_op,
   ReduceOp reduce_op,
@@ -215,68 +217,6 @@ void copy_v_transform_reduce_key_aggregated_out_nbr(
     });
   kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last));
 
-  if (GraphViewType::is_multi_gpu) {
-    auto& comm           = handle.get_comms();
-    auto const comm_size = comm.get_size();
-
-    rmm::device_uvector<vertex_t> unique_keys(
-      graph_view.get_number_of_local_adj_matrix_partition_cols(), handle.get_stream());
-    thrust::copy(
-      rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
-      adj_matrix_col_key_first,
-      adj_matrix_col_key_first + graph_view.get_number_of_local_adj_matrix_partition_cols(),
-      unique_keys.begin());
-    thrust::sort(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
-                 unique_keys.begin(),
-                 unique_keys.end());
-    auto last = thrust::unique(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
-                               unique_keys.begin(),
-                               unique_keys.end());
-    unique_keys.resize(thrust::distance(unique_keys.begin(), last), handle.get_stream());
-
-    rmm::device_uvector<vertex_t> rx_unique_keys(0, handle.get_stream());
-    std::vector<size_t> rx_value_counts{};
-    std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values(
-      comm,
-      unique_keys.begin(),
-      unique_keys.end(),
-      [key_func = detail::compute_gpu_id_from_vertex_t<vertex_t>{comm_size}] __device__(auto val) {
-        return key_func(val);
-      },
-      handle.get_stream());
-
-    rmm::device_uvector<value_t> values_for_unique_keys(rx_unique_keys.size(), handle.get_stream());
-
-    CUDA_TRY(cudaStreamSynchronize(
-      handle.get_stream()));  // cuco::static_map currently does not take stream
-
-    kv_map_ptr->find(rx_unique_keys.begin(), rx_unique_keys.end(), values_for_unique_keys.begin());
-
-    rmm::device_uvector<value_t> rx_values_for_unique_keys(0, handle.get_stream());
-
-    std::tie(rx_values_for_unique_keys, std::ignore) =
-      shuffle_values(comm, values_for_unique_keys.begin(), rx_value_counts, handle.get_stream());
-
-    CUDA_TRY(cudaStreamSynchronize(
-      handle.get_stream()));  // cuco::static_map currently does not take stream
-
-    kv_map_ptr.reset();
-
-    kv_map_ptr = std::make_unique<cuco::static_map<vertex_t, value_t>>(
-      static_cast<size_t>(static_cast<double>(unique_keys.size()) / load_factor),
-      invalid_vertex_id<vertex_t>::value,
-      invalid_vertex_id<vertex_t>::value);
-
-    auto pair_first = thrust::make_transform_iterator(
-      thrust::make_zip_iterator(
-        thrust::make_tuple(unique_keys.begin(), rx_values_for_unique_keys.begin())),
-      [] __device__(auto val) {
-        return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val));
-      });
-
-    kv_map_ptr->insert(pair_first, pair_first + unique_keys.size());
-  }
-
   // 2. aggregate each vertex out-going edges based on keys and transform-reduce.
 
   auto loop_count = size_t{1};
@@ -382,9 +322,31 @@ void copy_v_transform_reduce_key_aggregated_out_nbr(
             auto val) { return key_func(thrust::get<1>(val)); },
           handle.get_stream());
 
-      tmp_major_vertices              = std::move(rx_major_vertices);
-      tmp_minor_keys                  = std::move(rx_minor_keys);
-      tmp_key_aggregated_edge_weights = std::move(rx_key_aggregated_edge_weights);
+      auto pair_first = thrust::make_zip_iterator(
+        thrust::make_tuple(rx_major_vertices.begin(), rx_minor_keys.begin()));
+      thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                          pair_first,
+                          pair_first + rx_major_vertices.size(),
+                          rx_key_aggregated_edge_weights.begin());
+      tmp_major_vertices.resize(rx_major_vertices.size(), handle.get_stream());
+      tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream());
+      tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream());
+      auto pair_it =
+        thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                              pair_first,
+                              pair_first + rx_major_vertices.size(),
+                              rx_key_aggregated_edge_weights.begin(),
+                              thrust::make_zip_iterator(thrust::make_tuple(
+                                tmp_major_vertices.begin(), tmp_minor_keys.begin())),
+                              tmp_key_aggregated_edge_weights.begin());
+      tmp_major_vertices.resize(
+        thrust::distance(tmp_key_aggregated_edge_weights.begin(), thrust::get<1>(pair_it)),
+        handle.get_stream());
+      tmp_minor_keys.resize(tmp_major_vertices.size(), handle.get_stream());
+      tmp_key_aggregated_edge_weights.resize(tmp_major_vertices.size(), handle.get_stream());
+      tmp_major_vertices.shrink_to_fit(handle.get_stream());
+      tmp_minor_keys.shrink_to_fit(handle.get_stream());
+      tmp_key_aggregated_edge_weights.shrink_to_fit(handle.get_stream());
     }
 
     auto tmp_e_op_result_buffer =
diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh
index 63b31f9c44e..99bfc80f643 100644
--- a/cpp/include/patterns/count_if_e.cuh
+++ b/cpp/include/patterns/count_if_e.cuh
@@ -201,7 +201,7 @@ typename GraphViewType::edge_type count_if_e(
                                          detail::count_if_e_for_all_block_size,
                                          handle.get_device_properties().maxGridSize[0]);
 
-      rmm::device_vector<edge_t> block_counts(update_grid.num_blocks);
+      rmm::device_uvector<edge_t> block_counts(update_grid.num_blocks, handle.get_stream());
 
       detail::for_all_major_for_all_nbr_low_degree<<<update_grid.num_blocks,
                                                      update_grid.block_size,
@@ -210,7 +210,7 @@ typename GraphViewType::edge_type count_if_e(
         matrix_partition,
         adj_matrix_row_value_input_first + row_value_input_offset,
         adj_matrix_col_value_input_first + col_value_input_offset,
-        block_counts.data().get(),
+        block_counts.data(),
         e_op);
 
       // FIXME: we have several options to implement this. With cooperative group support
diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh
index 946c15a16a0..1f59777bc35 100644
--- a/cpp/include/patterns/transform_reduce_e.cuh
+++ b/cpp/include/patterns/transform_reduce_e.cuh
@@ -206,7 +206,8 @@ T transform_reduce_e(raft::handle_t const& handle,
                                          detail::transform_reduce_e_for_all_block_size,
                                          handle.get_device_properties().maxGridSize[0]);
 
-      rmm::device_vector<T> block_results(update_grid.num_blocks);
+      auto block_result_buffer =
+        allocate_dataframe_buffer<T>(update_grid.num_blocks, handle.get_stream());
 
       detail::for_all_major_for_all_nbr_low_degree<<<update_grid.num_blocks,
                                                      update_grid.block_size,
@@ -215,7 +216,7 @@ T transform_reduce_e(raft::handle_t const& handle,
         matrix_partition,
         adj_matrix_row_value_input_first + row_value_input_offset,
         adj_matrix_col_value_input_first + col_value_input_offset,
-        block_results.data(),
+        get_dataframe_buffer_begin<T>(block_result_buffer),
         e_op);
 
       // FIXME: we have several options to implement this. With cooperative group support
@@ -225,10 +226,10 @@ T transform_reduce_e(raft::handle_t const& handle,
       // synchronization point in varying timings and the number of SMs is not very big)
       auto partial_result =
         thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
-                       block_results.begin(),
-                       block_results.end(),
+                       get_dataframe_buffer_begin<T>(block_result_buffer),
+                       get_dataframe_buffer_begin<T>(block_result_buffer) + update_grid.num_blocks,
                        T(),
-                       [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); });
+                       [] __device__(T lhs, T rhs) { return plus_edge_op_result(lhs, rhs); });
 
       result = plus_edge_op_result(result, partial_result);
     }
diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh
index 4c76322fa79..4efd32bcac7 100644
--- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh
+++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh
@@ -21,6 +21,7 @@
 #include <partition_manager.hpp>
 #include <patterns/edge_op_utils.cuh>
 #include <patterns/reduce_op.cuh>
+#include <utilities/dataframe_buffer.cuh>
 #include <utilities/device_comm.cuh>
 #include <utilities/error.hpp>
 #include <utilities/host_scalar_comm.cuh>
@@ -157,13 +158,14 @@ size_t reduce_buffer_elements(raft::handle_t const& handle,
     // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the
     // vertex unless reduce_op is a pure function.
     rmm::device_uvector<key_t> keys(num_buffer_elements, handle.get_stream());
-    rmm::device_vector<payload_t> values(num_buffer_elements);
+    auto value_buffer =
+      allocate_dataframe_buffer<payload_t>(num_buffer_elements, handle.get_stream());
     auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
                                     buffer_key_output_first,
                                     buffer_key_output_first + num_buffer_elements,
                                     buffer_payload_output_first,
                                     keys.begin(),
-                                    values.begin(),
+                                    get_dataframe_buffer_begin<payload_t>(value_buffer),
                                     thrust::equal_to<key_t>(),
                                     reduce_op);
     auto num_reduced_buffer_elements =
@@ -173,13 +175,9 @@ size_t reduce_buffer_elements(raft::handle_t const& handle,
                  keys.begin() + num_reduced_buffer_elements,
                  buffer_key_output_first);
     thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
-                 values.begin(),
-                 values.begin() + num_reduced_buffer_elements,
+                 get_dataframe_buffer_begin<payload_t>(value_buffer),
+                 get_dataframe_buffer_begin<payload_t>(value_buffer) + num_reduced_buffer_elements,
                  buffer_payload_output_first);
-    // FIXME: this is unecessary if we use a tuple of rmm::device_uvector objects for values
-    CUDA_TRY(
-      cudaStreamSynchronize(handle.get_stream()));  // this is necessary as values will become
-                                                    // out-of-scope once this function returns
     return num_reduced_buffer_elements;
   }
 }
@@ -673,15 +671,19 @@ void update_frontier_v_push_if_out_nbr(
         num_buffer_elements,
         vertex_value_input_first,
         vertex_value_output_first,
-        std::get<0>(bucket_and_bucket_size_device_ptrs).get(),
-        std::get<1>(bucket_and_bucket_size_device_ptrs).get(),
+        std::get<0>(bucket_and_bucket_size_device_ptrs),
+        std::get<1>(bucket_and_bucket_size_device_ptrs),
         VertexFrontierType::kInvalidBucketIdx,
         invalid_vertex,
         v_op);
 
     auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs);
-    thrust::host_vector<size_t> bucket_sizes(
-      bucket_sizes_device_ptr, bucket_sizes_device_ptr + VertexFrontierType::kNumBuckets);
+    std::vector<size_t> bucket_sizes(VertexFrontierType::kNumBuckets);
+    raft::update_host(bucket_sizes.data(),
+                      bucket_sizes_device_ptr,
+                      VertexFrontierType::kNumBuckets,
+                      handle.get_stream());
+    CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
     for (size_t i = 0; i < VertexFrontierType::kNumBuckets; ++i) {
       vertex_frontier.get_bucket(i).set_size(bucket_sizes[i]);
     }
diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh
index 2126a27ee5a..c11142d3cf7 100644
--- a/cpp/include/patterns/vertex_frontier.cuh
+++ b/cpp/include/patterns/vertex_frontier.cuh
@@ -147,13 +147,17 @@ template <typename vertex_t, bool is_multi_gpu = false>
 class Bucket {
  public:
   Bucket(raft::handle_t const& handle, size_t capacity)
-    : handle_ptr_(&handle), elements_(capacity, invalid_vertex_id<vertex_t>::value)
+    : handle_ptr_(&handle), elements_(capacity, handle.get_stream())
   {
+    thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()),
+                 elements_.begin(),
+                 elements_.end(),
+                 invalid_vertex_id<vertex_t>::value);
   }
 
   void insert(vertex_t v)
   {
-    elements_[size_] = v;
+    raft::update_device(elements_.data() + size_, &v, 1, handle_ptr_->get_stream());
     ++size_;
   }
 
@@ -177,9 +181,9 @@ class Bucket {
 
   size_t capacity() const { return elements_.size(); }
 
-  auto const data() const { return elements_.data().get(); }
+  auto const data() const { return elements_.data(); }
 
-  auto data() { return elements_.data().get(); }
+  auto data() { return elements_.data(); }
 
   auto const begin() const { return elements_.begin(); }
 
@@ -191,7 +195,7 @@ class Bucket {
 
  private:
   raft::handle_t const* handle_ptr_{nullptr};
-  rmm::device_vector<vertex_t> elements_{};
+  rmm::device_uvector<vertex_t> elements_;
   size_t size_{0};
 };
 
@@ -206,13 +210,21 @@ class VertexFrontier {
 
   VertexFrontier(raft::handle_t const& handle, std::vector<size_t> bucket_capacities)
     : handle_ptr_(&handle),
-      tmp_bucket_ptrs_(num_buckets, nullptr),
-      tmp_bucket_sizes_(num_buckets, 0),
+      tmp_bucket_ptrs_(num_buckets, handle.get_stream()),
+      tmp_bucket_sizes_(num_buckets, handle.get_stream()),
       buffer_ptrs_(kReduceInputTupleSize + 1 /* to store destination column number */, nullptr),
       buffer_idx_(0, handle_ptr_->get_stream())
   {
     CUGRAPH_EXPECTS(bucket_capacities.size() == num_buckets,
                     "invalid input argument bucket_capacities (size mismatch)");
+    thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()),
+                 tmp_bucket_ptrs_.begin(),
+                 tmp_bucket_ptrs_.end(),
+                 static_cast<vertex_t*>(nullptr));
+    thrust::fill(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()),
+                 tmp_bucket_sizes_.begin(),
+                 tmp_bucket_sizes_.end(),
+                 size_t{0});
     for (size_t i = 0; i < num_buckets; ++i) {
       buckets_.emplace_back(handle, bucket_capacities[i]);
     }
@@ -251,8 +263,8 @@ class VertexFrontier {
            0,
            handle_ptr_->get_stream()>>>(this_bucket.begin(),
                                         this_bucket.end(),
-                                        std::get<0>(bucket_and_bucket_size_device_ptrs).get(),
-                                        std::get<1>(bucket_and_bucket_size_device_ptrs).get(),
+                                        std::get<0>(bucket_and_bucket_size_device_ptrs),
+                                        std::get<1>(bucket_and_bucket_size_device_ptrs),
                                         bucket_idx,
                                         kInvalidBucketIdx,
                                         invalid_vertex,
@@ -269,8 +281,10 @@ class VertexFrontier {
                         [] __device__(auto value) { return value == invalid_vertex; });
 
     auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs);
-    thrust::host_vector<size_t> bucket_sizes(bucket_sizes_device_ptr,
-                                             bucket_sizes_device_ptr + kNumBuckets);
+    std::vector<size_t> bucket_sizes(kNumBuckets);
+    raft::update_host(
+      bucket_sizes.data(), bucket_sizes_device_ptr, kNumBuckets, handle_ptr_->get_stream());
+    CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream()));
     for (size_t i = 0; i < kNumBuckets; ++i) {
       if (i != bucket_idx) { get_bucket(i).set_size(bucket_sizes[i]); }
     }
@@ -283,14 +297,17 @@ class VertexFrontier {
 
   auto get_bucket_and_bucket_size_device_pointers()
   {
-    thrust::host_vector<vertex_t*> tmp_ptrs(buckets_.size(), nullptr);
-    thrust::host_vector<size_t> tmp_sizes(buckets_.size(), 0);
+    std::vector<vertex_t*> tmp_ptrs(buckets_.size(), nullptr);
+    std::vector<size_t> tmp_sizes(buckets_.size(), 0);
     for (size_t i = 0; i < buckets_.size(); ++i) {
       tmp_ptrs[i]  = get_bucket(i).data();
       tmp_sizes[i] = get_bucket(i).size();
     }
-    tmp_bucket_ptrs_  = tmp_ptrs;
-    tmp_bucket_sizes_ = tmp_sizes;
+    raft::update_device(
+      tmp_bucket_ptrs_.data(), tmp_ptrs.data(), tmp_ptrs.size(), handle_ptr_->get_stream());
+    raft::update_device(
+      tmp_bucket_sizes_.data(), tmp_sizes.data(), tmp_sizes.size(), handle_ptr_->get_stream());
+    CUDA_TRY(cudaStreamSynchronize(handle_ptr_->get_stream()));
     return std::make_tuple(tmp_bucket_ptrs_.data(), tmp_bucket_sizes_.data());
   }
 
@@ -345,8 +362,8 @@ class VertexFrontier {
 
   raft::handle_t const* handle_ptr_{nullptr};
   std::vector<Bucket<vertex_t, is_multi_gpu>> buckets_{};
-  rmm::device_vector<vertex_t*> tmp_bucket_ptrs_{};
-  rmm::device_vector<size_t> tmp_bucket_sizes_{};
+  rmm::device_uvector<vertex_t*> tmp_bucket_ptrs_;
+  rmm::device_uvector<size_t> tmp_bucket_sizes_;
 
   std::array<size_t, kReduceInputTupleSize> tuple_element_sizes_ =
     compute_thrust_tuple_element_sizes<ReduceInputTupleType>()();
diff --git a/cpp/include/utilities/collect_comm.cuh b/cpp/include/utilities/collect_comm.cuh
new file mode 100644
index 00000000000..5ca58ebeb17
--- /dev/null
+++ b/cpp/include/utilities/collect_comm.cuh
@@ -0,0 +1,153 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <experimental/graph.hpp>
+#include <utilities/dataframe_buffer.cuh>
+#include <utilities/shuffle_comm.cuh>
+
+#include <raft/handle.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/exec_policy.hpp>
+
+#include <thrust/distance.h>
+#include <cuco/static_map.cuh>
+
+#include <iterator>
+#include <memory>
+#include <vector>
+
+namespace cugraph {
+namespace experimental {
+
+// for key = [map_key_first, map_key_last), key_to_gpu_id_op(key) should be coincide with
+// comm.get_rank()
+template <typename VertexIterator0,
+          typename VertexIterator1,
+          typename ValueIterator,
+          typename KeyToGPUIdOp>
+decltype(allocate_dataframe_buffer<typename std::iterator_traits<ValueIterator>::value_type>(
+  0, cudaStream_t{nullptr}))
+collect_values_for_keys(raft::comms::comms_t const &comm,
+                        VertexIterator0 map_key_first,
+                        VertexIterator0 map_key_last,
+                        ValueIterator map_value_first,
+                        VertexIterator1 collect_key_first,
+                        VertexIterator1 collect_key_last,
+                        KeyToGPUIdOp key_to_gpu_id_op,
+                        cudaStream_t stream)
+{
+  using vertex_t = typename std::iterator_traits<VertexIterator0>::value_type;
+  static_assert(
+    std::is_same<typename std::iterator_traits<VertexIterator1>::value_type, vertex_t>::value);
+  using value_t = typename std::iterator_traits<ValueIterator>::value_type;
+
+  double constexpr load_factor = 0.7;
+
+  // FIXME: we may compare the performance & memory footprint of this hash based approach vs binary
+  // search based approach
+
+  // 1. build a cuco::static_map object for the map k, v pairs.
+
+  auto kv_map_ptr = std::make_unique<cuco::static_map<vertex_t, value_t>>(
+    static_cast<size_t>(static_cast<double>(thrust::distance(map_key_first, map_key_last)) /
+                        load_factor),
+    invalid_vertex_id<vertex_t>::value,
+    invalid_vertex_id<vertex_t>::value);
+  {
+    auto pair_first = thrust::make_transform_iterator(
+      thrust::make_zip_iterator(thrust::make_tuple(map_key_first, map_value_first)),
+      [] __device__(auto val) {
+        return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val));
+      });
+    kv_map_ptr->insert(pair_first, pair_first + thrust::distance(map_key_first, map_key_last));
+  }
+
+  // 2. collect values for the unique keys in [collect_key_first, collect_key_last)
+
+  rmm::device_uvector<vertex_t> unique_keys(thrust::distance(collect_key_first, collect_key_last),
+                                            stream);
+  thrust::copy(
+    rmm::exec_policy(stream)->on(stream), collect_key_first, collect_key_last, unique_keys.begin());
+  // FIXME: sort and unique are unnecessary if the keys in [collect_key_first, collect_key_last) are
+  // already unique, if this cost becomes a performance bottlenec, we may add
+  // collect_values_for_unique_keys in the future
+  thrust::sort(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end());
+  unique_keys.resize(
+    thrust::distance(
+      unique_keys.begin(),
+      thrust::unique(rmm::exec_policy(stream)->on(stream), unique_keys.begin(), unique_keys.end())),
+    stream);
+
+  rmm::device_uvector<value_t> values_for_unique_keys(0, stream);
+  {
+    rmm::device_uvector<vertex_t> rx_unique_keys(0, stream);
+    std::vector<size_t> rx_value_counts{};
+    std::tie(rx_unique_keys, rx_value_counts) = groupby_gpuid_and_shuffle_values(
+      comm,
+      unique_keys.begin(),
+      unique_keys.end(),
+      [key_to_gpu_id_op] __device__(auto val) { return key_to_gpu_id_op(val); },
+      stream);
+
+    rmm::device_uvector<value_t> values_for_rx_unique_keys(rx_unique_keys.size(), stream);
+
+    CUDA_TRY(cudaStreamSynchronize(stream));  // cuco::static_map currently does not take stream
+
+    kv_map_ptr->find(
+      rx_unique_keys.begin(), rx_unique_keys.end(), values_for_rx_unique_keys.begin());
+
+    rmm::device_uvector<value_t> rx_values_for_unique_keys(0, stream);
+    std::tie(rx_values_for_unique_keys, std::ignore) =
+      shuffle_values(comm, values_for_rx_unique_keys.begin(), rx_value_counts, stream);
+
+    values_for_unique_keys = std::move(rx_values_for_unique_keys);
+  }
+
+  // 3. re-build a cuco::static_map object for the k, v pairs in unique_keys,
+  // values_for_unique_keys.
+
+  CUDA_TRY(cudaStreamSynchronize(stream));  // cuco::static_map currently does not take stream
+
+  kv_map_ptr.reset();
+
+  kv_map_ptr = std::make_unique<cuco::static_map<vertex_t, value_t>>(
+    static_cast<size_t>(static_cast<double>(unique_keys.size()) / load_factor),
+    invalid_vertex_id<vertex_t>::value,
+    invalid_vertex_id<vertex_t>::value);
+  {
+    auto pair_first = thrust::make_transform_iterator(
+      thrust::make_zip_iterator(
+        thrust::make_tuple(unique_keys.begin(), values_for_unique_keys.begin())),
+      [] __device__(auto val) {
+        return thrust::make_pair(thrust::get<0>(val), thrust::get<1>(val));
+      });
+
+    kv_map_ptr->insert(pair_first, pair_first + unique_keys.size());
+  }
+
+  // 4. find values for [collect_key_first, collect_key_last)
+
+  auto value_buffer = allocate_dataframe_buffer<value_t>(
+    thrust::distance(collect_key_first, collect_key_last), stream);
+  kv_map_ptr->find(
+    collect_key_first, collect_key_last, get_dataframe_buffer_begin<value_t>(value_buffer));
+
+  return value_buffer;
+}
+
+}  // namespace experimental
+}  // namespace cugraph
diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp
index e94190897b8..98e850abbf0 100644
--- a/cpp/include/utilities/cython.hpp
+++ b/cpp/include/utilities/cython.hpp
@@ -190,10 +190,7 @@ struct major_minor_weights_t {
 //
 template <typename vertex_t, typename edge_t>
 struct renum_quad_t {
-  explicit renum_quad_t(raft::handle_t const& handle)
-    : dv_(0, handle.get_stream()), part_(std::vector<vertex_t>(), false, 0, 0, 0, 0)
-  {
-  }
+  explicit renum_quad_t(raft::handle_t const& handle) : dv_(0, handle.get_stream()), part_() {}
 
   rmm::device_uvector<vertex_t>& get_dv(void) { return dv_; }
 
@@ -298,8 +295,8 @@ struct renum_quad_t {
  private:
   rmm::device_uvector<vertex_t> dv_;
   cugraph::experimental::partition_t<vertex_t> part_;
-  vertex_t nv_;
-  edge_t ne_;
+  vertex_t nv_{0};
+  edge_t ne_{0};
 };
 // FIXME: finish description for vertex_partition_offsets
 //
diff --git a/cpp/include/utilities/shuffle_comm.cuh b/cpp/include/utilities/shuffle_comm.cuh
index da86f76b11d..8c363c9a346 100644
--- a/cpp/include/utilities/shuffle_comm.cuh
+++ b/cpp/include/utilities/shuffle_comm.cuh
@@ -228,6 +228,15 @@ auto shuffle_values(raft::comms::comms_t const &comm,
     rx_src_ranks,
     stream);
 
+  if (rx_counts.size() < static_cast<size_t>(comm_size)) {
+    std::vector<size_t> tmp_rx_counts(comm_size, size_t{0});
+    for (size_t i = 0; i < rx_src_ranks.size(); ++i) {
+      assert(rx_src_ranks[i] < comm_size);
+      tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i];
+    }
+    rx_counts = std::move(tmp_rx_counts);
+  }
+
   return std::make_tuple(std::move(rx_value_buffer), rx_counts);
 }
 
@@ -271,6 +280,14 @@ auto groupby_gpuid_and_shuffle_values(raft::comms::comms_t const &comm,
     rx_src_ranks,
     stream);
 
+  if (rx_counts.size() < static_cast<size_t>(comm_size)) {
+    std::vector<size_t> tmp_rx_counts(comm_size, size_t{0});
+    for (size_t i = 0; i < rx_src_ranks.size(); ++i) {
+      tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i];
+    }
+    rx_counts = std::move(tmp_rx_counts);
+  }
+
   return std::make_tuple(std::move(rx_value_buffer), rx_counts);
 }
 
@@ -282,6 +299,8 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm,
                                         KeyToGPUIdOp key_to_gpu_id_op,
                                         cudaStream_t stream)
 {
+  auto const comm_size = comm.get_size();
+
   auto d_tx_value_counts = detail::sort_and_count(
     comm, tx_key_first, tx_key_last, tx_value_first, key_to_gpu_id_op, stream);
 
@@ -328,6 +347,15 @@ auto groupby_gpuid_and_shuffle_kv_pairs(raft::comms::comms_t const &comm,
     rx_src_ranks,
     stream);
 
+  if (rx_counts.size() < static_cast<size_t>(comm_size)) {
+    std::vector<size_t> tmp_rx_counts(comm_size, size_t{0});
+    for (size_t i = 0; i < rx_src_ranks.size(); ++i) {
+      assert(rx_src_ranks[i] < comm_size);
+      tmp_rx_counts[rx_src_ranks[i]] = rx_counts[i];
+    }
+    rx_counts = std::move(tmp_rx_counts);
+  }
+
   return std::make_tuple(std::move(rx_keys), std::move(rx_value_buffer), rx_counts);
 }
 
diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/utilities/thrust_tuple_utils.cuh
index 0ad71ba5e05..01843a583eb 100644
--- a/cpp/include/utilities/thrust_tuple_utils.cuh
+++ b/cpp/include/utilities/thrust_tuple_utils.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,6 +15,7 @@
  */
 #pragma once
 
+#include <raft/cudart_utils.h>
 #include <raft/device_atomics.cuh>
 
 #include <thrust/iterator/discard_iterator.h>
diff --git a/cpp/src/community/egonet.cu b/cpp/src/community/egonet.cu
index fa788aa307b..067d27f9a92 100644
--- a/cpp/src/community/egonet.cu
+++ b/cpp/src/community/egonet.cu
@@ -22,6 +22,9 @@
 #include <utility>
 
 #include <rmm/thrust_rmm_allocator.h>
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/exec_policy.hpp>
+
 #include <thrust/transform.h>
 #include <ctime>
 
@@ -34,6 +37,8 @@
 #include <experimental/graph_functions.hpp>
 #include <experimental/graph_view.hpp>
 
+#include <utilities/high_res_timer.hpp>
+
 namespace {
 
 /*
@@ -61,58 +66,111 @@ extract(
   vertex_t n_subgraphs,
   vertex_t radius)
 {
-  auto v           = csr_view.get_number_of_vertices();
-  auto e           = csr_view.get_number_of_edges();
-  auto stream      = handle.get_stream();
-  float avg_degree = e / v;
+  auto v                = csr_view.get_number_of_vertices();
+  auto e                = csr_view.get_number_of_edges();
+  auto user_stream_view = handle.get_stream_view();
   rmm::device_vector<size_t> neighbors_offsets(n_subgraphs + 1);
   rmm::device_vector<vertex_t> neighbors;
 
-  // It is the right thing to accept device memory for source_vertex
-  // FIXME consider adding a device API to BFS (ie. accept source on the device)
   std::vector<vertex_t> h_source_vertex(n_subgraphs);
-  raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, stream);
+  std::vector<size_t> h_neighbors_offsets(n_subgraphs + 1);
+
+  raft::update_host(&h_source_vertex[0], source_vertex, n_subgraphs, user_stream_view.value());
+
+  // Streams will allocate concurrently later
+  std::vector<rmm::device_uvector<vertex_t>> reached{};
+  reached.reserve(handle.get_num_internal_streams());
 
-  // reserve some reasonable memory, but could grow larger than that
-  neighbors.reserve(v + avg_degree * n_subgraphs * radius);
-  neighbors_offsets[0] = 0;
-  // each source should be done concurently in the future
+  // h_source_vertex[i] is used by other streams in the for loop
+  user_stream_view.synchronize();
+#ifdef TIMING
+  HighResTimer hr_timer;
+  hr_timer.start("ego_neighbors");
+#endif
   for (vertex_t i = 0; i < n_subgraphs; i++) {
+    // get light handle from worker pool
+    raft::handle_t light_handle(handle, i);
+    auto worker_stream_view = light_handle.get_stream_view();
+
+    // Allocations and operations are attached to the worker stream
+    rmm::device_uvector<vertex_t> local_reach(v, worker_stream_view);
+    reached.push_back(std::move(local_reach));
+
     // BFS with cutoff
-    rmm::device_vector<vertex_t> reached(v);
-    rmm::device_vector<vertex_t> predecessors(v);  // not used
+    // consider adding a device API to BFS (ie. accept source on the device)
+    rmm::device_uvector<vertex_t> predecessors(v, worker_stream_view);  // not used
     bool direction_optimizing = false;
-    cugraph::experimental::bfs<vertex_t, edge_t, weight_t, false>(handle,
+    thrust::fill(rmm::exec_policy(worker_stream_view),
+                 reached[i].begin(),
+                 reached[i].end(),
+                 std::numeric_limits<vertex_t>::max());
+    thrust::fill(
+      rmm::exec_policy(worker_stream_view), reached[i].begin(), reached[i].begin() + 100, 1.0);
+
+    cugraph::experimental::bfs<vertex_t, edge_t, weight_t, false>(light_handle,
                                                                   csr_view,
-                                                                  reached.data().get(),
-                                                                  predecessors.data().get(),
+                                                                  reached[i].data(),
+                                                                  predecessors.data(),
                                                                   h_source_vertex[i],
                                                                   direction_optimizing,
                                                                   radius);
 
     // identify reached vertex ids from distance array
-    thrust::transform(rmm::exec_policy(stream)->on(stream),
+    thrust::transform(rmm::exec_policy(worker_stream_view),
                       thrust::make_counting_iterator(vertex_t{0}),
                       thrust::make_counting_iterator(v),
-                      reached.begin(),
-                      reached.begin(),
+                      reached[i].begin(),
+                      reached[i].begin(),
                       [sentinel = std::numeric_limits<vertex_t>::max()] __device__(
                         auto id, auto val) { return val < sentinel ? id : sentinel; });
 
     // removes unreached data
-    auto reached_end = thrust::remove(rmm::exec_policy(stream)->on(stream),
-                                      reached.begin(),
-                                      reached.end(),
+    auto reached_end = thrust::remove(rmm::exec_policy(worker_stream_view),
+                                      reached[i].begin(),
+                                      reached[i].end(),
                                       std::numeric_limits<vertex_t>::max());
+    // release temp storage
+    reached[i].resize(thrust::distance(reached[i].begin(), reached_end), worker_stream_view);
+    reached[i].shrink_to_fit(worker_stream_view);
+  }
 
-    // update extraction input
-    size_t n_reached         = thrust::distance(reached.begin(), reached_end);
-    neighbors_offsets[i + 1] = neighbors_offsets[i] + n_reached;
-    if (neighbors_offsets[i + 1] > neighbors.capacity())
-      neighbors.reserve(neighbors_offsets[i + 1] * 2);
-    neighbors.insert(neighbors.end(), reached.begin(), reached_end);
+  // wait on every one to identify their neighboors before proceeding to concatenation
+  handle.wait_on_internal_streams();
+
+  // Construct neighboors offsets (just a scan on neighborhod vector sizes)
+  h_neighbors_offsets[0] = 0;
+  for (vertex_t i = 0; i < n_subgraphs; i++) {
+    h_neighbors_offsets[i + 1] = h_neighbors_offsets[i] + reached[i].size();
+  }
+  raft::update_device(neighbors_offsets.data().get(),
+                      &h_neighbors_offsets[0],
+                      n_subgraphs + 1,
+                      user_stream_view.value());
+  neighbors.resize(h_neighbors_offsets[n_subgraphs]);
+  user_stream_view.synchronize();
+
+  // Construct the neighboors list concurrently
+  for (vertex_t i = 0; i < n_subgraphs; i++) {
+    raft::handle_t light_handle(handle, i);
+    auto worker_stream_view = light_handle.get_stream_view();
+    thrust::copy(rmm::exec_policy(worker_stream_view),
+                 reached[i].begin(),
+                 reached[i].end(),
+                 neighbors.begin() + h_neighbors_offsets[i]);
+
+    // reached info is not needed anymore
+    reached[i].resize(0, worker_stream_view);
+    reached[i].shrink_to_fit(worker_stream_view);
   }
 
+  // wait on every one before proceeding to grouped extraction
+  handle.wait_on_internal_streams();
+
+#ifdef TIMING
+  hr_timer.stop();
+  hr_timer.display(std::cout);
+#endif
+
   // extract
   return cugraph::experimental::extract_induced_subgraphs(
     handle, csr_view, neighbors_offsets.data().get(), neighbors.data().get(), n_subgraphs);
@@ -207,4 +265,4 @@ extract_ego(raft::handle_t const &,
             int64_t,
             int64_t);
 }  // namespace experimental
-}  // namespace cugraph
+}  // namespace cugraph
\ No newline at end of file
diff --git a/cpp/src/experimental/generate_rmat_edgelist.cu b/cpp/src/experimental/generate_rmat_edgelist.cu
new file mode 100644
index 00000000000..0a6d666432f
--- /dev/null
+++ b/cpp/src/experimental/generate_rmat_edgelist.cu
@@ -0,0 +1,149 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <experimental/scramble.cuh>
+
+#include <experimental/graph_generator.hpp>
+#include <utilities/error.hpp>
+
+#include <rmm/thrust_rmm_allocator.h>
+#include <raft/handle.hpp>
+#include <raft/random/rng.cuh>
+#include <rmm/device_uvector.hpp>
+
+#include <thrust/iterator/zip_iterator.h>
+#include <thrust/tuple.h>
+
+#include <tuple>
+
+namespace cugraph {
+namespace experimental {
+
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> generate_rmat_edgelist(
+  raft::handle_t const& handle,
+  size_t scale,
+  size_t num_edges,
+  double a,
+  double b,
+  double c,
+  uint64_t seed,
+  bool clip_and_flip,
+  bool scramble_vertex_ids)
+{
+  CUGRAPH_EXPECTS(size_t{1} << scale <= std::numeric_limits<vertex_t>::max(),
+                  "Invalid input argument: scale too large for vertex_t.");
+  CUGRAPH_EXPECTS((a >= 0.0) && (b >= 0.0) && (c >= 0.0) && (a + b + c <= 1.0),
+                  "Invalid input argument: a, b, c should be non-negative and a + b + c should not "
+                  "be larger than 1.0.");
+
+  raft::random::Rng rng(seed + 10);
+  // to limit memory footprint (1024 is a tuning parameter)
+  auto max_edges_to_generate_per_iteration =
+    static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * 1024;
+  rmm::device_uvector<float> rands(
+    std::min(num_edges, max_edges_to_generate_per_iteration) * 2 * scale, handle.get_stream());
+
+  rmm::device_uvector<vertex_t> srcs(num_edges, handle.get_stream());
+  rmm::device_uvector<vertex_t> dsts(num_edges, handle.get_stream());
+
+  size_t num_edges_generated{0};
+  while (num_edges_generated < num_edges) {
+    auto num_edges_to_generate =
+      std::min(num_edges - num_edges_generated, max_edges_to_generate_per_iteration);
+    auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin())) +
+                      num_edges_generated;
+    rng.uniform<float, size_t>(
+      rands.data(), num_edges_to_generate * 2 * scale, 0.0f, 1.0f, handle.get_stream());
+    thrust::transform(
+      rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+      thrust::make_counting_iterator(size_t{0}),
+      thrust::make_counting_iterator(num_edges_to_generate),
+      pair_first,
+      // if a + b == 0.0, a_norm is irrelevant, if (1.0 - (a+b)) == 0.0, c_norm is irrelevant
+      [scale,
+       clip_and_flip,
+       rands    = rands.data(),
+       a_plus_b = a + b,
+       a_norm   = (a + b) > 0.0 ? a / (a + b) : 0.0,
+       c_norm   = (1.0 - (a + b)) > 0.0 ? c / (1.0 - (a + b)) : 0.0] __device__(auto i) {
+        vertex_t src{0};
+        vertex_t dst{0};
+        for (size_t bit = scale - 1; bit != 0; --bit) {
+          auto r0          = rands[i * 2 * scale + 2 * bit];
+          auto r1          = rands[i * 2 * scale + 2 * bit + 1];
+          auto src_bit_set = r0 > a_plus_b;
+          auto dst_bit_set = r1 > (src_bit_set ? c_norm : a_norm);
+          if (clip_and_flip) {
+            if (src == dst) {
+              if (!src_bit_set && dst_bit_set) {
+                src_bit_set = !src_bit_set;
+                dst_bit_set = !dst_bit_set;
+              }
+            }
+          }
+          src += src_bit_set ? static_cast<vertex_t>(1 << bit) : 0;
+          dst += dst_bit_set ? static_cast<vertex_t>(1 << bit) : 0;
+        }
+        return thrust::make_tuple(src, dst);
+      });
+    num_edges_generated += num_edges_to_generate;
+  }
+
+  if (scramble_vertex_ids) {
+    rands.resize(0, handle.get_stream());
+    rands.shrink_to_fit(handle.get_stream());
+
+    auto pair_first = thrust::make_zip_iterator(thrust::make_tuple(srcs.begin(), dsts.begin()));
+    thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                      pair_first,
+                      pair_first + srcs.size(),
+                      pair_first,
+                      [scale] __device__(auto pair) {
+                        return thrust::make_tuple(detail::scramble(thrust::get<0>(pair), scale),
+                                                  detail::scramble(thrust::get<1>(pair), scale));
+                      });
+  }
+
+  return std::make_tuple(std::move(srcs), std::move(dsts));
+}
+
+// explicit instantiation
+
+template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>
+generate_rmat_edgelist<int32_t>(raft::handle_t const& handle,
+                                size_t scale,
+                                size_t num_edges,
+                                double a,
+                                double b,
+                                double c,
+                                uint64_t seed,
+                                bool clip_and_flip,
+                                bool scramble_vertex_ids);
+
+template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>
+generate_rmat_edgelist<int64_t>(raft::handle_t const& handle,
+                                size_t scale,
+                                size_t num_edges,
+                                double a,
+                                double b,
+                                double c,
+                                uint64_t seed,
+                                bool clip_and_flip,
+                                bool scramble_vertex_ids);
+
+}  // namespace experimental
+}  // namespace cugraph
diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu
index 498bb4eaefe..5abe141dafd 100644
--- a/cpp/src/experimental/graph.cu
+++ b/cpp/src/experimental/graph.cu
@@ -304,9 +304,15 @@ graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_
 
     rmm::device_uvector<vertex_t> segment_offsets(detail::num_segments_per_vertex_partition + 1,
                                                   default_stream);
-    segment_offsets.set_element_async(0, 0, default_stream);
+
+    // temporaries are necessary because the &&-overload of device_uvector is deleted
+    // Note that we must sync `default_stream` before these temporaries go out of scope to
+    // avoid use after free. (The syncs are at the end of this function)
+    auto zero_vertex  = vertex_t{0};
+    auto vertex_count = static_cast<vertex_t>(degrees.size());
+    segment_offsets.set_element_async(0, zero_vertex, default_stream);
     segment_offsets.set_element_async(
-      detail::num_segments_per_vertex_partition, degrees.size(), default_stream);
+      detail::num_segments_per_vertex_partition, vertex_count, default_stream);
 
     thrust::upper_bound(rmm::exec_policy(default_stream)->on(default_stream),
                         degrees.begin(),
@@ -454,9 +460,16 @@ graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu, std::enable_if_
 
     rmm::device_uvector<vertex_t> segment_offsets(detail::num_segments_per_vertex_partition + 1,
                                                   default_stream);
-    segment_offsets.set_element_async(0, 0, default_stream);
+
+    // temporaries are necessary because the &&-overload of device_uvector is deleted
+    // Note that we must sync `default_stream` before these temporaries go out of scope to
+    // avoid use after free. (The syncs are at the end of this function)
+    auto zero_vertex  = vertex_t{0};
+    auto vertex_count = static_cast<vertex_t>(this->get_number_of_vertices());
+    segment_offsets.set_element_async(0, zero_vertex, default_stream);
+
     segment_offsets.set_element_async(
-      detail::num_segments_per_vertex_partition, this->get_number_of_vertices(), default_stream);
+      detail::num_segments_per_vertex_partition, vertex_count, default_stream);
 
     thrust::upper_bound(rmm::exec_policy(default_stream)->on(default_stream),
                         degree_first,
diff --git a/cpp/src/experimental/induced_subgraph.cu b/cpp/src/experimental/induced_subgraph.cu
index a88adf76ef4..5cda36ad7e2 100644
--- a/cpp/src/experimental/induced_subgraph.cu
+++ b/cpp/src/experimental/induced_subgraph.cu
@@ -32,6 +32,8 @@
 
 #include <tuple>
 
+#include <utilities/high_res_timer.hpp>
+
 namespace cugraph {
 namespace experimental {
 
@@ -52,6 +54,10 @@ extract_induced_subgraphs(
   size_t num_subgraphs,
   bool do_expensive_check)
 {
+#ifdef TIMING
+  HighResTimer hr_timer;
+  hr_timer.start("extract_induced_subgraphs");
+#endif
   // FIXME: this code is inefficient for the vertices with their local degrees much larger than the
   // number of vertices in the subgraphs (in this case, searching that the subgraph vertices are
   // included in the local neighbors is more efficient than searching the local neighbors are
@@ -244,7 +250,10 @@ extract_induced_subgraphs(
                    subgraph_offsets + (num_subgraphs + 1),
                    subgraph_vertex_output_offsets.begin(),
                    subgraph_edge_offsets.begin());
-
+#ifdef TIMING
+    hr_timer.stop();
+    hr_timer.display(std::cout);
+#endif
     return std::make_tuple(std::move(edge_majors),
                            std::move(edge_minors),
                            std::move(edge_weights),
diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu
index 1ab824f1c91..7ffef5053af 100644
--- a/cpp/src/experimental/katz_centrality.cu
+++ b/cpp/src/experimental/katz_centrality.cu
@@ -38,7 +38,7 @@ namespace detail {
 template <typename GraphViewType, typename result_t>
 void katz_centrality(raft::handle_t const &handle,
                      GraphViewType const &pull_graph_view,
-                     result_t *betas,
+                     result_t const *betas,
                      result_t *katz_centralities,
                      result_t alpha,
                      result_t beta,  // relevant only if betas == nullptr
@@ -173,7 +173,7 @@ void katz_centrality(raft::handle_t const &handle,
 template <typename vertex_t, typename edge_t, typename weight_t, typename result_t, bool multi_gpu>
 void katz_centrality(raft::handle_t const &handle,
                      graph_view_t<vertex_t, edge_t, weight_t, true, multi_gpu> const &graph_view,
-                     result_t *betas,
+                     result_t const *betas,
                      result_t *katz_centralities,
                      result_t alpha,
                      result_t beta,  // relevant only if beta == nullptr
@@ -200,7 +200,7 @@ void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int32_t, int32_t, float, true, true> const &graph_view,
-                              float *betas,
+                              float const *betas,
                               float *katz_centralities,
                               float alpha,
                               float beta,
@@ -212,7 +212,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int32_t, int32_t, double, true, true> const &graph_view,
-                              double *betas,
+                              double const *betas,
                               double *katz_centralities,
                               double alpha,
                               double beta,
@@ -224,7 +224,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int32_t, int64_t, float, true, true> const &graph_view,
-                              float *betas,
+                              float const *betas,
                               float *katz_centralities,
                               float alpha,
                               float beta,
@@ -236,7 +236,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int32_t, int64_t, double, true, true> const &graph_view,
-                              double *betas,
+                              double const *betas,
                               double *katz_centralities,
                               double alpha,
                               double beta,
@@ -248,7 +248,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int64_t, int64_t, float, true, true> const &graph_view,
-                              float *betas,
+                              float const *betas,
                               float *katz_centralities,
                               float alpha,
                               float beta,
@@ -260,7 +260,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int64_t, int64_t, double, true, true> const &graph_view,
-                              double *betas,
+                              double const *betas,
                               double *katz_centralities,
                               double alpha,
                               double beta,
@@ -272,7 +272,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int32_t, int32_t, float, true, false> const &graph_view,
-                              float *betas,
+                              float const *betas,
                               float *katz_centralities,
                               float alpha,
                               float beta,
@@ -284,7 +284,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int32_t, int32_t, double, true, false> const &graph_view,
-                              double *betas,
+                              double const *betas,
                               double *katz_centralities,
                               double alpha,
                               double beta,
@@ -296,7 +296,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int32_t, int64_t, float, true, false> const &graph_view,
-                              float *betas,
+                              float const *betas,
                               float *katz_centralities,
                               float alpha,
                               float beta,
@@ -308,7 +308,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int32_t, int64_t, double, true, false> const &graph_view,
-                              double *betas,
+                              double const *betas,
                               double *katz_centralities,
                               double alpha,
                               double beta,
@@ -320,7 +320,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int64_t, int64_t, float, true, false> const &graph_view,
-                              float *betas,
+                              float const *betas,
                               float *katz_centralities,
                               float alpha,
                               float beta,
@@ -332,7 +332,7 @@ template void katz_centrality(raft::handle_t const &handle,
 
 template void katz_centrality(raft::handle_t const &handle,
                               graph_view_t<int64_t, int64_t, double, true, false> const &graph_view,
-                              double *betas,
+                              double const *betas,
                               double *katz_centralities,
                               double alpha,
                               double beta,
diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh
index f162cd17a61..fe8310a62ca 100644
--- a/cpp/src/experimental/louvain.cuh
+++ b/cpp/src/experimental/louvain.cuh
@@ -405,7 +405,7 @@ class Louvain {
       handle_(handle),
       dendrogram_(std::make_unique<Dendrogram<vertex_t>>()),
       current_graph_view_(graph_view),
-      compute_partition_(graph_view),
+      compute_partition_(handle, graph_view),
       local_num_vertices_(graph_view.get_number_of_local_vertices()),
       local_num_rows_(graph_view.get_number_of_local_adj_matrix_partition_rows()),
       local_num_cols_(graph_view.get_number_of_local_adj_matrix_partition_cols()),
diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu
index c498d2864b4..e5874acb04f 100644
--- a/cpp/src/experimental/pagerank.cu
+++ b/cpp/src/experimental/pagerank.cu
@@ -44,9 +44,9 @@ namespace detail {
 template <typename GraphViewType, typename result_t>
 void pagerank(raft::handle_t const& handle,
               GraphViewType const& pull_graph_view,
-              typename GraphViewType::weight_type* precomputed_vertex_out_weight_sums,
-              typename GraphViewType::vertex_type* personalization_vertices,
-              result_t* personalization_values,
+              typename GraphViewType::weight_type const* precomputed_vertex_out_weight_sums,
+              typename GraphViewType::vertex_type const* personalization_vertices,
+              result_t const* personalization_values,
               typename GraphViewType::vertex_type personalization_vector_size,
               result_t* pageranks,
               result_t alpha,
@@ -279,9 +279,9 @@ void pagerank(raft::handle_t const& handle,
 template <typename vertex_t, typename edge_t, typename weight_t, typename result_t, bool multi_gpu>
 void pagerank(raft::handle_t const& handle,
               graph_view_t<vertex_t, edge_t, weight_t, true, multi_gpu> const& graph_view,
-              weight_t* precomputed_vertex_out_weight_sums,
-              vertex_t* personalization_vertices,
-              result_t* personalization_values,
+              weight_t const* precomputed_vertex_out_weight_sums,
+              vertex_t const* personalization_vertices,
+              result_t const* personalization_values,
               vertex_t personalization_vector_size,
               result_t* pageranks,
               result_t alpha,
@@ -308,9 +308,9 @@ void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int32_t, int32_t, float, true, true> const& graph_view,
-                       float* precomputed_vertex_out_weight_sums,
-                       int32_t* personalization_vertices,
-                       float* personalization_values,
+                       float const* precomputed_vertex_out_weight_sums,
+                       int32_t const* personalization_vertices,
+                       float const* personalization_values,
                        int32_t personalization_vector_size,
                        float* pageranks,
                        float alpha,
@@ -321,9 +321,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int32_t, int32_t, double, true, true> const& graph_view,
-                       double* precomputed_vertex_out_weight_sums,
-                       int32_t* personalization_vertices,
-                       double* personalization_values,
+                       double const* precomputed_vertex_out_weight_sums,
+                       int32_t const* personalization_vertices,
+                       double const* personalization_values,
                        int32_t personalization_vector_size,
                        double* pageranks,
                        double alpha,
@@ -334,9 +334,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int32_t, int64_t, float, true, true> const& graph_view,
-                       float* precomputed_vertex_out_weight_sums,
-                       int32_t* personalization_vertices,
-                       float* personalization_values,
+                       float const* precomputed_vertex_out_weight_sums,
+                       int32_t const* personalization_vertices,
+                       float const* personalization_values,
                        int32_t personalization_vector_size,
                        float* pageranks,
                        float alpha,
@@ -347,9 +347,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int32_t, int64_t, double, true, true> const& graph_view,
-                       double* precomputed_vertex_out_weight_sums,
-                       int32_t* personalization_vertices,
-                       double* personalization_values,
+                       double const* precomputed_vertex_out_weight_sums,
+                       int32_t const* personalization_vertices,
+                       double const* personalization_values,
                        int32_t personalization_vector_size,
                        double* pageranks,
                        double alpha,
@@ -360,9 +360,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int64_t, int64_t, float, true, true> const& graph_view,
-                       float* precomputed_vertex_out_weight_sums,
-                       int64_t* personalization_vertices,
-                       float* personalization_values,
+                       float const* precomputed_vertex_out_weight_sums,
+                       int64_t const* personalization_vertices,
+                       float const* personalization_values,
                        int64_t personalization_vector_size,
                        float* pageranks,
                        float alpha,
@@ -373,9 +373,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int64_t, int64_t, double, true, true> const& graph_view,
-                       double* precomputed_vertex_out_weight_sums,
-                       int64_t* personalization_vertices,
-                       double* personalization_values,
+                       double const* precomputed_vertex_out_weight_sums,
+                       int64_t const* personalization_vertices,
+                       double const* personalization_values,
                        int64_t personalization_vector_size,
                        double* pageranks,
                        double alpha,
@@ -386,9 +386,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int32_t, int32_t, float, true, false> const& graph_view,
-                       float* precomputed_vertex_out_weight_sums,
-                       int32_t* personalization_vertices,
-                       float* personalization_values,
+                       float const* precomputed_vertex_out_weight_sums,
+                       int32_t const* personalization_vertices,
+                       float const* personalization_values,
                        int32_t personalization_vector_size,
                        float* pageranks,
                        float alpha,
@@ -399,9 +399,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int32_t, int32_t, double, true, false> const& graph_view,
-                       double* precomputed_vertex_out_weight_sums,
-                       int32_t* personalization_vertices,
-                       double* personalization_values,
+                       double const* precomputed_vertex_out_weight_sums,
+                       int32_t const* personalization_vertices,
+                       double const* personalization_values,
                        int32_t personalization_vector_size,
                        double* pageranks,
                        double alpha,
@@ -412,9 +412,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int32_t, int64_t, float, true, false> const& graph_view,
-                       float* precomputed_vertex_out_weight_sums,
-                       int32_t* personalization_vertices,
-                       float* personalization_values,
+                       float const* precomputed_vertex_out_weight_sums,
+                       int32_t const* personalization_vertices,
+                       float const* personalization_values,
                        int32_t personalization_vector_size,
                        float* pageranks,
                        float alpha,
@@ -425,9 +425,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int32_t, int64_t, double, true, false> const& graph_view,
-                       double* precomputed_vertex_out_weight_sums,
-                       int32_t* personalization_vertices,
-                       double* personalization_values,
+                       double const* precomputed_vertex_out_weight_sums,
+                       int32_t const* personalization_vertices,
+                       double const* personalization_values,
                        int32_t personalization_vector_size,
                        double* pageranks,
                        double alpha,
@@ -438,9 +438,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int64_t, int64_t, float, true, false> const& graph_view,
-                       float* precomputed_vertex_out_weight_sums,
-                       int64_t* personalization_vertices,
-                       float* personalization_values,
+                       float const* precomputed_vertex_out_weight_sums,
+                       int64_t const* personalization_vertices,
+                       float const* personalization_values,
                        int64_t personalization_vector_size,
                        float* pageranks,
                        float alpha,
@@ -451,9 +451,9 @@ template void pagerank(raft::handle_t const& handle,
 
 template void pagerank(raft::handle_t const& handle,
                        graph_view_t<int64_t, int64_t, double, true, false> const& graph_view,
-                       double* precomputed_vertex_out_weight_sums,
-                       int64_t* personalization_vertices,
-                       double* personalization_values,
+                       double const* precomputed_vertex_out_weight_sums,
+                       int64_t const* personalization_vertices,
+                       double const* personalization_values,
                        int64_t personalization_vector_size,
                        double* pageranks,
                        double alpha,
diff --git a/cpp/src/experimental/renumber_edgelist.cu b/cpp/src/experimental/renumber_edgelist.cu
index b093a9adb22..a8847167b87 100644
--- a/cpp/src/experimental/renumber_edgelist.cu
+++ b/cpp/src/experimental/renumber_edgelist.cu
@@ -547,11 +547,10 @@ renumber_edgelist(raft::handle_t const& handle,
   return std::make_tuple(
     std::move(renumber_map_labels), partition, number_of_vertices, number_of_edges);
 #else
-  return std::make_tuple(
-    rmm::device_uvector<vertex_t>(0, handle.get_stream()),
-    partition_t<vertex_t>(std::vector<vertex_t>(), false, int{0}, int{0}, int{0}, int{0}),
-    vertex_t{0},
-    edge_t{0});
+  return std::make_tuple(rmm::device_uvector<vertex_t>(0, handle.get_stream()),
+                         partition_t<vertex_t>{},
+                         vertex_t{0},
+                         edge_t{0});
 #endif
 }
 
diff --git a/cpp/src/experimental/scramble.cuh b/cpp/src/experimental/scramble.cuh
new file mode 100644
index 00000000000..875bb5feff0
--- /dev/null
+++ b/cpp/src/experimental/scramble.cuh
@@ -0,0 +1,82 @@
+/* Copyright (C) 2009-2010 The Trustees of Indiana University.             */
+/*                                                                         */
+/* Use, modification and distribution is subject to the Boost Software     */
+/* License, Version 1.0. (See accompanying file LICENSE_1_0.txt or copy at */
+/* http://www.boost.org/LICENSE_1_0.txt)                                   */
+/*                                                                         */
+/*  Authors: Jeremiah Willcock                                             */
+/*           Andrew Lumsdaine                                              */
+
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cassert>
+#include <cstdint>
+
+namespace cugraph {
+namespace experimental {
+namespace detail {
+
+template <typename uvertex_t>
+__device__ std::enable_if_t<sizeof(uvertex_t) == 8, uvertex_t> bitreversal(uvertex_t value)
+{
+  return __brevll(value);
+}
+
+template <typename uvertex_t>
+__device__ std::enable_if_t<sizeof(uvertex_t) == 4, uvertex_t> bitreversal(uvertex_t value)
+{
+  return __brev(value);
+}
+
+template <typename uvertex_t>
+__device__ std::enable_if_t<sizeof(uvertex_t) == 2, uvertex_t> bitreversal(uvertex_t value)
+{
+  return static_cast<uvertex_t>(__brev(value) >> 16);
+}
+
+/* Apply a permutation to scramble vertex numbers; a randomly generated
+ * permutation is not used because applying it at scale is too expensive. */
+template <typename vertex_t>
+__device__ vertex_t scramble(vertex_t value, size_t lgN)
+{
+  constexpr size_t number_of_bits = sizeof(vertex_t) * 8;
+
+  static_assert((number_of_bits == 64) || (number_of_bits == 32) || (number_of_bits == 16));
+  assert((std::is_unsigned<vertex_t>::value && lgN <= number_of_bits) ||
+         (!std::is_unsigned<vertex_t>::value && lgN < number_of_bits));
+  assert(value >= 0);
+
+  using uvertex_t = typename std::make_unsigned<vertex_t>::type;
+
+  constexpr auto scramble_value0 = static_cast<uvertex_t>(
+    sizeof(vertex_t) == 8 ? 606610977102444280 : (sizeof(vertex_t) == 4 ? 282475248 : 0));
+  constexpr auto scramble_value1 = static_cast<uvertex_t>(
+    sizeof(vertex_t) == 8 ? 11680327234415193037 : (sizeof(vertex_t) == 4 ? 2617694917 : 8620));
+
+  auto v = static_cast<uvertex_t>(value);
+  v += scramble_value0 + scramble_value1;
+  v *= (scramble_value0 | static_cast<uvertex_t>(0x4519840211493211));
+  v = bitreversal(v) >> (number_of_bits - lgN);
+  v *= (scramble_value1 | static_cast<uvertex_t>(0x3050852102C843A5));
+  v = bitreversal(v) >> (number_of_bits - lgN);
+  return static_cast<vertex_t>(v);
+}
+
+}  // namespace detail
+}  // namespace experimental
+}  // namespace cugraph
diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu
index e95a001cb91..5382b4856f3 100644
--- a/cpp/src/utilities/cython.cu
+++ b/cpp/src/utilities/cython.cu
@@ -762,28 +762,49 @@ std::unique_ptr<major_minor_weights_t<vertex_t, weight_t>> call_shuffle(
 
   auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name());
 
-  auto zip_edge = thrust::make_zip_iterator(
-    thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights));
-
   std::unique_ptr<major_minor_weights_t<vertex_t, weight_t>> ptr_ret =
     std::make_unique<major_minor_weights_t<vertex_t, weight_t>>(handle);
 
-  std::forward_as_tuple(
-    std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()),
-    std::ignore) =
-    cugraph::experimental::groupby_gpuid_and_shuffle_values(
-      comm,  // handle.get_comms(),
-      zip_edge,
-      zip_edge + num_edgelist_edges,
-      [key_func =
-         cugraph::experimental::detail::compute_gpu_id_from_edge_t<vertex_t>{
-           is_hypergraph_partitioned,
-           comm.get_size(),
-           row_comm.get_size(),
-           col_comm.get_size()}] __device__(auto val) {
-        return key_func(thrust::get<0>(val), thrust::get<1>(val));
-      },
-      handle.get_stream());
+  if (edgelist_weights != nullptr) {
+    auto zip_edge = thrust::make_zip_iterator(
+      thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices, edgelist_weights));
+
+    std::forward_as_tuple(
+      std::tie(ptr_ret->get_major(), ptr_ret->get_minor(), ptr_ret->get_weights()),
+      std::ignore) =
+      cugraph::experimental::groupby_gpuid_and_shuffle_values(
+        comm,  // handle.get_comms(),
+        zip_edge,
+        zip_edge + num_edgelist_edges,
+        [key_func =
+           cugraph::experimental::detail::compute_gpu_id_from_edge_t<vertex_t>{
+             is_hypergraph_partitioned,
+             comm.get_size(),
+             row_comm.get_size(),
+             col_comm.get_size()}] __device__(auto val) {
+          return key_func(thrust::get<0>(val), thrust::get<1>(val));
+        },
+        handle.get_stream());
+  } else {
+    auto zip_edge = thrust::make_zip_iterator(
+      thrust::make_tuple(edgelist_major_vertices, edgelist_minor_vertices));
+
+    std::forward_as_tuple(std::tie(ptr_ret->get_major(), ptr_ret->get_minor()),
+                          std::ignore) =
+      cugraph::experimental::groupby_gpuid_and_shuffle_values(
+        comm,  // handle.get_comms(),
+        zip_edge,
+        zip_edge + num_edgelist_edges,
+        [key_func =
+           cugraph::experimental::detail::compute_gpu_id_from_edge_t<vertex_t>{
+             is_hypergraph_partitioned,
+             comm.get_size(),
+             row_comm.get_size(),
+             col_comm.get_size()}] __device__(auto val) {
+          return key_func(thrust::get<0>(val), thrust::get<1>(val));
+        },
+        handle.get_stream());
+  }
 
   return ptr_ret;  // RVO-ed
 }
diff --git a/cpp/src/utilities/high_res_timer.hpp b/cpp/src/utilities/high_res_timer.hpp
index f2d6bc6e13f..a731c5edc9d 100644
--- a/cpp/src/utilities/high_res_timer.hpp
+++ b/cpp/src/utilities/high_res_timer.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -20,6 +20,8 @@
 #include <map>
 #include <string>
 
+//#define TIMING
+
 class HighResTimer {
  public:
   HighResTimer() : timers() {}
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index 68b277871b1..5571cf5f124 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -16,6 +16,46 @@
 #
 #=============================================================================
 
+###################################################################################################
+# - common test utils -----------------------------------------------------------------------------
+
+add_library(cugraphtestutil STATIC
+            "${CMAKE_CURRENT_SOURCE_DIR}/utilities/generate_graph_from_edgelist.cu"
+            "${CMAKE_CURRENT_SOURCE_DIR}/utilities/matrix_market_file_utilities.cu"
+            "${CMAKE_CURRENT_SOURCE_DIR}/utilities/rmat_utilities.cu"
+            "${CMAKE_CURRENT_SOURCE_DIR}/utilities/misc_utilities.cpp"
+            "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c")
+
+set_property(TARGET cugraphtestutil PROPERTY POSITION_INDEPENDENT_CODE ON)
+
+target_include_directories(cugraphtestutil
+    PRIVATE
+    "${CUB_INCLUDE_DIR}"
+    "${THRUST_INCLUDE_DIR}"
+    "${CUCO_INCLUDE_DIR}"
+    "${LIBCUDACXX_INCLUDE_DIR}"
+    "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
+    "${RMM_INCLUDE}"
+    "${NCCL_INCLUDE_DIRS}"
+    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio"
+    "${CMAKE_CURRENT_SOURCE_DIR}/../include"
+    "${CMAKE_CURRENT_SOURCE_DIR}"
+    "${RAFT_DIR}/cpp/include"
+)
+
+target_link_libraries(cugraphtestutil cugraph)
+
+# CUDA_ARCHITECTURES=OFF implies cmake will not pass arch flags to the
+# compiler. CUDA_ARCHITECTURES must be set to a non-empty value to prevent
+# cmake warnings about policy CMP0104. With this setting, arch flags must be
+# manually set! ("evaluate_gpu_archs(GPU_ARCHS)" is the current mechanism
+# used in cpp/CMakeLists.txt for setting arch options).
+# Run "cmake --help-policy CMP0104" for policy details.
+# NOTE: the CUDA_ARCHITECTURES=OFF setting may be removed after migrating to
+# the findcudatoolkit features in cmake 3.17+
+set_target_properties(cugraphtestutil PROPERTIES
+        CUDA_ARCHITECTURES OFF)
+
 ###################################################################################################
 # - compiler function -----------------------------------------------------------------------------
 
@@ -31,8 +71,6 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC)
 	"${LIBCUDACXX_INCLUDE_DIR}"
         "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}"
         "${RMM_INCLUDE}"
-        "${CUDF_INCLUDE}"
-        "${CUDF_INCLUDE}/libcudf/libcudacxx"
         "${NCCL_INCLUDE_DIRS}"
         "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio"
         "${CMAKE_CURRENT_SOURCE_DIR}/../include"
@@ -49,10 +87,10 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC)
 
     target_link_libraries(${CMAKE_TEST_NAME}
         PRIVATE
+        cugraphtestutil
         cugraph
         GTest::GTest
         GTest::Main
-        ${CUDF_LIBRARY}
         ${NCCL_LIBRARIES}
         cudart
         cuda
@@ -140,16 +178,10 @@ endif(RAPIDS_DATASET_ROOT_DIR)
 ### test sources ##################################################################################
 ###################################################################################################
 
-# FIXME: consider adding a "add_library(cugraph_testing SHARED ...) instead of
-# adding the same test utility sources to each test target. There may need to be
-# an additional cugraph_mg_testing lib due to the optional inclusion of MPI.
-
 ###################################################################################################
 # - katz centrality tests -------------------------------------------------------------------------
 
 set(KATZ_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/centrality/katz_centrality_test.cu")
 
   ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}")
@@ -158,15 +190,11 @@ set(KATZ_TEST_SRC
 # - betweenness centrality tests ------------------------------------------------------------------
 
 set(BETWEENNESS_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/centrality/betweenness_centrality_test.cu")
 
   ConfigureTest(BETWEENNESS_TEST "${BETWEENNESS_TEST_SRC}")
 
 set(EDGE_BETWEENNESS_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/centrality/edge_betweenness_centrality_test.cu")
 
   ConfigureTest(EDGE_BETWEENNESS_TEST "${EDGE_BETWEENNESS_TEST_SRC}")
@@ -175,8 +203,6 @@ set(EDGE_BETWEENNESS_TEST_SRC
 # - SSSP tests ------------------------------------------------------------------------------------
 
 set(SSSP_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu")
 
 ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}")
@@ -185,8 +211,6 @@ ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}")
 # - BFS tests -------------------------------------------------------------------------------------
 
 set(BFS_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu")
 
 ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}")
@@ -195,8 +219,6 @@ ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}")
 # - LOUVAIN tests ---------------------------------------------------------------------------------
 
 set(LOUVAIN_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp")
 
 ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}")
@@ -205,8 +227,6 @@ ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}")
 # - LEIDEN tests ---------------------------------------------------------------------------------
 
 set(LEIDEN_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/community/leiden_test.cpp")
 
 ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}")
@@ -215,8 +235,6 @@ ConfigureTest(LEIDEN_TEST "${LEIDEN_TEST_SRC}")
 # - ECG tests ---------------------------------------------------------------------------------
 
 set(ECG_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/community/ecg_test.cpp")
 
 ConfigureTest(ECG_TEST "${ECG_TEST_SRC}")
@@ -225,8 +243,6 @@ ConfigureTest(ECG_TEST "${ECG_TEST_SRC}")
 # - Balanced cut clustering tests -----------------------------------------------------------------
 
 set(BALANCED_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/community/balanced_edge_test.cpp")
 
 ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}")
@@ -235,8 +251,6 @@ ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}")
 # - TRIANGLE tests --------------------------------------------------------------------------------
 
 set(TRIANGLE_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/community/triangle_test.cu")
 
 ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}")
@@ -245,8 +259,6 @@ ConfigureTest(TRIANGLE_TEST "${TRIANGLE_TEST_SRC}")
 # - EGO tests --------------------------------------------------------------------------------
 
 set(EGO_TEST_SRC
-    "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/community/egonet_test.cu")
 
 ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "")
@@ -254,8 +266,6 @@ ConfigureTest(EGO_TEST "${EGO_TEST_SRC}" "")
 # - RENUMBERING tests -----------------------------------------------------------------------------
 
 set(RENUMBERING_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/renumber/renumber_test.cu")
 
 ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}")
@@ -264,8 +274,6 @@ ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}")
 # - FORCE ATLAS 2  tests --------------------------------------------------------------------------
 
 set(FA2_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/layout/force_atlas2_test.cu")
 
 ConfigureTest(FA2_TEST "${FA2_TEST_SRC}")
@@ -274,8 +282,6 @@ ConfigureTest(FA2_TEST "${FA2_TEST_SRC}")
 # - TSP  tests --------------------------------------------------------------------------
 
 set(TSP_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/traversal/tsp_test.cu")
 
   ConfigureTest(TSP_TEST "${TSP_TEST_SRC}" "")
@@ -284,8 +290,6 @@ set(TSP_TEST_SRC
 # - CONNECTED COMPONENTS  tests -------------------------------------------------------------------
 
 set(CONNECT_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/components/con_comp_test.cu")
 
 ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}")
@@ -294,8 +298,6 @@ ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}")
 # - STRONGLY CONNECTED COMPONENTS  tests ----------------------------------------------------------
 
 set(SCC_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/components/scc_test.cu")
 
 ConfigureTest(SCC_TEST "${SCC_TEST_SRC}")
@@ -304,8 +306,6 @@ ConfigureTest(SCC_TEST "${SCC_TEST_SRC}")
 #-Hungarian (Linear Assignment Problem)  tests ---------------------------------------------------------------------
 
 set(HUNGARIAN_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/linear_assignment/hungarian_test.cu")
 
 ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}")
@@ -314,19 +314,23 @@ ConfigureTest(HUNGARIAN_TEST "${HUNGARIAN_TEST_SRC}")
 # - MST tests ----------------------------------------------------------------------------
 
 set(MST_TEST_SRC
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/tree/mst_test.cu")
 
 ConfigureTest(MST_TEST "${MST_TEST_SRC}")
 
 
+###################################################################################################
+# - Experimental R-mat graph generation tests -----------------------------------------------------
+
+set(EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS
+    "${CMAKE_CURRENT_SOURCE_DIR}/experimental/generate_rmat_test.cpp")
+
+ConfigureTest(EXPERIMENTAL_GENERATE_RMAT_TEST "${EXPERIMENTAL_GENERATE_RMAT_TEST_SRCS}" "")
+
 ###################################################################################################
 # - Experimental Graph tests ----------------------------------------------------------------------
 
 set(EXPERIMENTAL_GRAPH_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/graph_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}")
@@ -335,8 +339,6 @@ ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}")
 # - Experimental weight-sum tests -----------------------------------------------------------------
 
 set(EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/weight_sum_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS}")
@@ -345,8 +347,6 @@ ConfigureTest(EXPERIMENTAL_WEIGHT_SUM_TEST "${EXPERIMENTAL_WEIGHT_SUM_TEST_SRCS}
 # - Experimental degree tests ---------------------------------------------------------------------
 
 set(EXPERIMENTAL_DEGREE_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/degree_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}")
@@ -355,8 +355,6 @@ ConfigureTest(EXPERIMENTAL_DEGREE_TEST "${EXPERIMENTAL_DEGREE_TEST_SRCS}")
 # - Experimental coarsening tests -----------------------------------------------------------------
 
 set(EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/coarsen_graph_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST_SRCS}")
@@ -365,8 +363,6 @@ ConfigureTest(EXPERIMENTAL_COARSEN_GRAPH_TEST "${EXPERIMENTAL_COARSEN_GRAPH_TEST
 # - Experimental induced subgraph tests -----------------------------------------------------------
 
 set(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/induced_subgraph_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAPH_TEST_SRCS}")
@@ -375,8 +371,6 @@ ConfigureTest(EXPERIMENTAL_INDUCED_SUBGRAPH_TEST "${EXPERIMENTAL_INDUCED_SUBGRAP
 # - Experimental BFS tests ------------------------------------------------------------------------
 
 set(EXPERIMENTAL_BFS_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}")
@@ -385,8 +379,6 @@ ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}")
 # - Experimental SSSP tests -----------------------------------------------------------------------
 
 set(EXPERIMENTAL_SSSP_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}")
@@ -395,8 +387,6 @@ ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}")
 # - Experimental PAGERANK tests -------------------------------------------------------------------
 
 set(EXPERIMENTAL_PAGERANK_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}")
@@ -405,8 +395,6 @@ ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}")
 # - Experimental LOUVAIN tests -------------------------------------------------------------------
 
 set(EXPERIMENTAL_LOUVAIN_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu")
 
 ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}")
@@ -415,8 +403,6 @@ ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}")
 # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------
 
 set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS
-    "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-    "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
     "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp")
 
 ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}")
@@ -424,16 +410,14 @@ ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_
 
 ###################################################################################################
 # - MG tests --------------------------------------------------------------------------------------
+
 if(BUILD_CUGRAPH_MG_TESTS)
     if(MPI_CXX_FOUND)
         ###########################################################################################
         # - MG PAGERANK tests ---------------------------------------------------------------------
 
         set(MG_PAGERANK_TEST_SRCS
-            "${CMAKE_CURRENT_SOURCE_DIR}/../../thirdparty/mmio/mmio.c"
-            "${CMAKE_CURRENT_SOURCE_DIR}/utilities/test_utilities.cpp"
-            "${CMAKE_CURRENT_SOURCE_DIR}/utilities/mg_test_utilities.cu"
-            "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/pagerank_mg_test.cpp")
+            "${CMAKE_CURRENT_SOURCE_DIR}/pagerank/mg_pagerank_test.cpp")
 
         ConfigureTest(MG_PAGERANK_TEST "${MG_PAGERANK_TEST_SRCS}")
         target_link_libraries(MG_PAGERANK_TEST PRIVATE MPI::MPI_C MPI::MPI_CXX)
diff --git a/cpp/tests/community/egonet_test.cu b/cpp/tests/community/egonet_test.cu
index ec031228998..a9224b42bc1 100644
--- a/cpp/tests/community/egonet_test.cu
+++ b/cpp/tests/community/egonet_test.cu
@@ -15,6 +15,7 @@
  */
 
 #include <utilities/base_fixture.hpp>
+#include <utilities/high_res_timer.hpp>
 #include <utilities/test_utilities.hpp>
 
 #include <algorithms.hpp>
@@ -35,6 +36,8 @@
 #include <tuple>
 #include <vector>
 
+#include <cuda_profiler_api.h>
+
 typedef struct InducedEgo_Usecase_t {
   std::string graph_file_full_path{};
   std::vector<int32_t> ego_sources{};
@@ -67,11 +70,14 @@ class Tests_InducedEgo : public ::testing::TestWithParam<InducedEgo_Usecase> {
   template <typename vertex_t, typename edge_t, typename weight_t, bool store_transposed>
   void run_current_test(InducedEgo_Usecase const& configuration)
   {
-    raft::handle_t handle{};
+    int n_streams = std::min(configuration.ego_sources.size(), static_cast<std::size_t>(128));
+    raft::handle_t handle(n_streams);
 
-    auto graph = cugraph::test::
-      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed>(
-        handle, configuration.graph_file_full_path, configuration.test_weighted);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false> graph(
+      handle);
+    std::tie(graph, std::ignore) = cugraph::test::
+      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed, false>(
+        handle, configuration.graph_file_full_path, configuration.test_weighted, false);
     auto graph_view = graph.view();
 
     rmm::device_uvector<vertex_t> d_ego_sources(configuration.ego_sources.size(),
@@ -86,14 +92,18 @@ class Tests_InducedEgo : public ::testing::TestWithParam<InducedEgo_Usecase> {
     rmm::device_uvector<vertex_t> d_ego_edgelist_dst(0, handle.get_stream());
     rmm::device_uvector<weight_t> d_ego_edgelist_weights(0, handle.get_stream());
     rmm::device_uvector<size_t> d_ego_edge_offsets(0, handle.get_stream());
-
+    HighResTimer hr_timer;
+    hr_timer.start("egonet");
+    cudaProfilerStart();
     std::tie(d_ego_edgelist_src, d_ego_edgelist_dst, d_ego_edgelist_weights, d_ego_edge_offsets) =
       cugraph::experimental::extract_ego(handle,
                                          graph_view,
                                          d_ego_sources.data(),
                                          static_cast<vertex_t>(configuration.ego_sources.size()),
                                          configuration.radius);
-
+    cudaProfilerStop();
+    hr_timer.stop();
+    hr_timer.display(std::cout);
     std::vector<size_t> h_cugraph_ego_edge_offsets(d_ego_edge_offsets.size());
     std::vector<vertex_t> h_cugraph_ego_edgelist_src(d_ego_edgelist_src.size());
     std::vector<vertex_t> h_cugraph_ego_edgelist_dst(d_ego_edgelist_dst.size());
@@ -116,13 +126,11 @@ class Tests_InducedEgo : public ::testing::TestWithParam<InducedEgo_Usecase> {
     ASSERT_TRUE(h_cugraph_ego_edge_offsets[configuration.ego_sources.size()] ==
                 d_ego_edgelist_src.size());
     for (size_t i = 0; i < configuration.ego_sources.size(); i++)
-      ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] < h_cugraph_ego_edge_offsets[i + 1]);
+      ASSERT_TRUE(h_cugraph_ego_edge_offsets[i] <= h_cugraph_ego_edge_offsets[i + 1]);
     auto n_vertices = graph_view.get_number_of_vertices();
     for (size_t i = 0; i < d_ego_edgelist_src.size(); i++) {
-      ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] >= 0);
-      ASSERT_TRUE(h_cugraph_ego_edgelist_src[i] < n_vertices);
-      ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] >= 0);
-      ASSERT_TRUE(h_cugraph_ego_edgelist_dst[i] < n_vertices);
+      ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_src[i]));
+      ASSERT_TRUE(cugraph::test::is_valid_vertex(n_vertices, h_cugraph_ego_edgelist_dst[i]));
     }
 
     /*
@@ -168,6 +176,156 @@ INSTANTIATE_TEST_CASE_P(
     InducedEgo_Usecase("test/datasets/karate.mtx", std::vector<int32_t>{1}, 3, false),
     InducedEgo_Usecase("test/datasets/karate.mtx", std::vector<int32_t>{10, 0, 5}, 2, false),
     InducedEgo_Usecase("test/datasets/karate.mtx", std::vector<int32_t>{9, 3, 10}, 2, false),
-    InducedEgo_Usecase("test/datasets/karate.mtx", std::vector<int32_t>{5, 12, 13}, 2, true)));
+    InducedEgo_Usecase(
+      "test/datasets/karate.mtx", std::vector<int32_t>{5, 9, 3, 10, 12, 13}, 2, true)));
 
+// For perf analysis
+/*
+INSTANTIATE_TEST_CASE_P(
+simple_test,
+Tests_InducedEgo,
+::testing::Values(
+InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector<int32_t>{0}, 1, false),
+InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector<int32_t>{0}, 2, false),
+InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector<int32_t>{0}, 3, false),
+InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector<int32_t>{0}, 4, false),
+InducedEgo_Usecase("test/datasets/soc-LiveJournal1.mtx", std::vector<int32_t>{0}, 5, false),
+InducedEgo_Usecase(
+"test/datasets/soc-LiveJournal1.mtx", std::vector<int32_t>{363617}, 2, false),
+InducedEgo_Usecase(
+"test/datasets/soc-LiveJournal1.mtx",
+std::vector<int32_t>{
+  363617, 722214, 2337449, 2510183, 2513389, 225853, 2035807, 3836330, 1865496, 28755},
+   2,
+   false),
+ InducedEgo_Usecase(
+   "test/datasets/soc-LiveJournal1.mtx",
+   std::vector<int32_t>{
+     363617,  722214,  2337449, 2510183, 2513389, 225853,  2035807, 3836330, 1865496, 28755,
+     2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829,
+     3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899,
+     1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318,
+     320953,  2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680,
+     847662,  3277365, 3957318, 3455123, 2454259, 670953,  4465677, 1027332, 2560721, 89061,
+     1163406, 3109528, 3221856, 4714426, 2382774, 37828,   4433616, 3283229, 591911,
+     4200188, 442522,  872207,  2437601, 741003,  266241,  914618,  3626195, 2021080,
+     4679624, 777476,  2527796, 1114017, 640142, 49259,   4069879, 3869098, 1105040,
+     4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105,
+     4827273, 3795434, 4647518, 4733397, 3980718, 1184627},
+   2,
+   false),
+ InducedEgo_Usecase(
+   "test/datasets/soc-LiveJournal1.mtx",
+   std::vector<int32_t>{
+     363617,  722214,  2337449, 2510183, 2513389, 225853,  2035807, 3836330, 1865496, 28755,
+     2536834, 3070144, 3888415, 3131712, 2382526, 1040771, 2631543, 4607218, 4465829,
+     3341686, 2772973, 2611175, 4526129, 2624421, 1220593, 2593137, 3270705, 1503899,
+     1213033, 4840102, 4529036, 3421116, 4264831, 4089751, 4272322, 3486998, 2830318,
+     320953,  2388331, 520808, 3023094, 1600294, 3631119, 1716614, 4829213, 1175844, 960680,
+     847662,  3277365, 3957318, 3455123, 2454259, 670953,  4465677, 1027332, 2560721, 89061,
+     1163406, 3109528, 3221856, 4714426, 2382774, 37828,   4433616, 3283229, 591911,
+     4200188, 442522,  872207,  2437601, 741003,  266241,  914618,  3626195, 2021080,
+     4679624, 777476,  2527796, 1114017, 640142, 49259,   4069879, 3869098, 1105040,
+     4707804, 3208582, 3325885, 1450601, 4072548, 2037062, 2029646, 4575891, 1488598, 79105,
+     4827273, 3795434, 4647518, 4733397, 3980718, 1184627, 984983,  3114832, 1967741,
+     1599818, 144593,  2698770, 2889449, 2495550, 1053813, 1193622, 686026,  3989015,
+     2040719, 4693428, 3190376, 2926728, 3399030, 1664419, 662429,  4526841, 2186957,
+     3752558, 2440046, 2930226, 3633006, 4058166, 3137060, 3499296, 2126343, 148971,
+     2199672, 275811,  2813976, 2274536, 1189239, 1335942, 2465624, 2596042, 829684, 193400,
+     2682845, 3691697, 4022437, 4051170, 4195175, 2876420, 3984220, 2174475, 326134,
+     2606530, 2493046, 4706121, 1498980, 4576225, 1271339, 44832,   1875673, 4664940,
+     134931,  736397, 4333554, 2751031, 2163610, 2879676, 3174153, 3317403, 2052464,
+     1881883, 4757859, 3596257, 2358088, 2578758, 447504,  590720,  1717038, 1869795,
+     1133885, 3027521, 840312,  2818881, 3654321, 2730947, 353585,  1134903, 2223378,
+     1508824, 3662521, 1363776, 2712071, 288441, 1204581, 3502242, 4645567, 2767267,
+     1514366, 3956099, 1422145, 1216608, 2253360, 189132, 4238225, 1345783, 451571, 1599442,
+     3237284, 4711405, 929446,  1857675, 150759,  1277633, 761210,  138628,  1026833,
+     2599544, 2464737, 989203,  3399615, 2144292, 216142,  637312, 2044964, 716256, 1660632,
+     1762919, 4784357, 2213415, 2764769, 291806,  609772,  3264819, 1870953, 1516385,
+     235647,  1045474, 2664957, 819095,  1824119, 4045271, 4448109, 1676788, 4285177,
+     1580502, 3546548, 2771971, 3927086, 1339779, 3156204, 1730998, 1172522, 2433024,
+     4533449, 479930,  2010695, 672994,  3542039, 3176455, 26352,   2137735, 866910,
+     4410835, 2623982, 3603159, 2555625, 2765653, 267865,  2015523, 1009052, 4713994,
+     1600667, 2176195, 3179631, 4570390, 2018424, 3356384, 1784287, 894861,  3622099,
+     1647273, 3044136, 950354, 1491760, 3416929, 3757300, 2244912, 4129215, 1600848,
+     3867343, 72329,   919189,  992521, 3445975, 4712557, 4680974, 188419,  2612093,
+     1991268, 3566207, 2281468, 3859078, 2492806, 3398628, 763441,  2679107, 2554420,
+     2130132, 4664374, 1182901, 3890770, 4714667, 4209303, 4013060, 3617653, 2040022,
+     3296519, 4190671, 1693353, 2678411, 3788834, 2781815, 191965, 1083926, 503974, 3529226,
+     1650522, 1900976, 542080,  3423929, 3418905, 878165,  4701703, 3022790, 4316365, 76365,
+     4053672, 1358185, 3830478, 4445661, 3210024, 1895915, 4541133, 2938808, 562788,
+     3920065, 1458776, 4052046, 2967475, 1092809, 3203538, 159626,  3399464, 214467,
+     3343982, 1811854, 3189045, 4272117, 4701563, 424807,  4341116, 760545,  4674683,
+     1538018, 386762,  194237,  2162719, 1694433, 943728,  2389036, 2196653, 3085571,
+     1513424, 3689413, 3278747, 4197291, 3324063, 3651090, 1737936, 2768803, 2768889,
+     3108096, 4311775, 3569480, 886705,  733256,  2477493, 1735412, 2960895, 1983781,
+     1861797, 3566460, 4537673, 1164093, 3499764, 4553071, 3518985, 847658,  918948,
+     2922351, 1056144, 652895,  1013195, 780505,  1702928, 3562838, 1432719, 2405207,
+     1054920, 641647,  2240939, 3617702, 383165, 652641,  879593,  1810739, 2096385,
+     4497865, 4768530, 1743968, 3582014, 1025009, 3002122, 2422190, 527647,  1251821,
+     2571153, 4095874, 3705333, 3637407, 1385567, 4043855, 4041930, 2433139, 1710383,
+     1127734, 4362316, 711588,  817839,  3214775, 910077,  1313768, 2382229, 16864, 2081770,
+     3095420, 3195272, 548711,  2259860, 1167323, 2435974, 425238,  2085179, 2630042,
+     2632881, 2867923, 3703565, 1037695, 226617,  4379130, 1541468, 3581937, 605965,
+     1137674, 4655221, 4769963, 1394370, 4425315, 2990132, 2364485, 1561137, 2713384,
+     481509, 2900382, 934766,  2986774, 1767669, 298593,  2502539, 139296,  3794229,
+     4002180, 4718138, 2909238, 423691,  3023810, 2784924, 2760160, 1971980, 316683,
+     3828090, 3253691, 4839313, 1203624, 584938,  3901482, 1747543, 1572737, 3533226,
+     774708,  1691195, 1037110, 1557763, 225120,  4424243, 3524086, 1717663, 4332507,
+     3513592, 4274932, 1232118, 873498,  1416042, 2488925, 111391,  4704545, 4492545,
+     445317,  1584812, 2187737, 2471948, 3731678, 219255, 2282627, 2589971, 2372185,
+     4609096, 3673961, 2524410, 12823,   2437155, 3015974, 4188352, 3184084, 3690756,
+     1222341, 1278376, 3652030, 4162647, 326548,  3930062, 3926100, 1551222, 2722165,
+     4526695, 3997534, 4815513, 3139056, 2547644, 3028915, 4149092, 3656554, 2691582,
+     2676699, 1878842, 260174,  3129900, 4379993, 182347,  2189338, 3783616, 2616666,
+     2596952, 243007,  4179282, 2730,    1939894, 2332032, 3335636, 182332,  3112260,
+     2174584, 587481, 4527368, 3154106, 3403059, 673206,  2150292, 446521,  1600204,
+     4819428, 2591357, 48490, 2917012, 2285923, 1072926, 2824281, 4364250, 956033,  311938,
+     37251,   3729300, 2726300, 644966,  1623020, 1419070, 4646747, 2417222, 2680238,
+     2561083, 1793801, 2349366, 339747, 611366,  4684147, 4356907, 1277161, 4510381,
+     3218352, 4161658, 3200733, 1172372, 3997786, 3169266, 3353418, 2248955, 2875885,
+     2365369, 498208,  2968066, 2681505, 2059048, 2097106, 3607540, 1121504, 2016789,
+     1762605, 3138431, 866081,  3705757, 3833066, 2599788, 760816, 4046672, 1544367,
+     2983906, 4842911, 209599,  1250954, 3333704, 561212,  4674336, 2831841, 3690724,
+     2929360, 4830834, 1177524, 2487687, 3525137, 875283,  651241,  2110742, 1296646,
+     1543739, 4349417, 2384725, 1931751, 1519208, 1520034, 3385008, 3219962, 734912, 170230,
+     1741419, 729913,  2860117, 2362381, 1199807, 2424230, 177824,  125948,  2722701,
+     4687548, 1140771, 3232742, 4522020, 4376360, 1125603, 590312,  2481884, 138951,
+     4086775, 615155, 3395781, 4587272, 283209,  568470,  4296185, 4344150, 2454321,
+     2672602, 838828,  4051647, 1709120, 3074610, 693235,  4356087, 3018806, 239410,
+     2431497, 691186,  766276,  4462126, 859155,  2370304, 1571808, 1938673, 1694955,
+     3871296, 4245059, 3987376, 301524,  2512461, 3410437, 3300380, 684922,  4581995,
+     3599557, 683515,  1850634, 3704678, 1937490, 2035591, 3718533, 2065879, 3160765,
+     1467884, 1912241, 2501509, 3668572, 3390469, 2501150, 612319, 713633,  1976262, 135946,
+     3641535, 632083,  13414,   4217765, 4137712, 2550250, 3281035, 4179598, 961045,
+     2020694, 4380006, 1345936, 289162,  1359035, 770872,  4509911, 3947317, 4719693,
+     248568,  2625660, 1237232, 2153208, 4814282, 1259954, 3677369, 861222,  2883506,
+     3339149, 3998335, 491017,  1609022, 2648112, 742132,  649609,  4206953, 3131106,
+     3504814, 3344486, 611721,  3215620, 2856233, 4447505, 1949222, 1868345, 712710,  6966,
+     4730666, 3181872, 2972889, 3038521, 3525444, 4385208, 1845613, 1124187, 2030476,
+     4468651, 2478792, 3473580, 3783357, 1852991, 1648485, 871319,  1670723, 4458328,
+     3218600, 1811100, 3443356, 2233873, 3035207, 2548692, 3337891, 3773674, 1552957,
+     4782811, 3144712, 3523466, 1491315, 3955852, 1838410, 3164028, 1092543, 776459,
+     2959379, 2541744, 4064418, 3908320, 2854145, 3960709, 1348188, 977678,  853619,
+     1304291, 2848702, 1657913, 1319826, 3322665, 788037, 2913686, 4471279, 1766285, 348304,
+     56570,   1892118, 4017244, 401006,  3524539, 4310134, 1624693, 4081113, 957511, 849400,
+     129975,  2616130, 378537,  1556787, 3916162, 1039980, 4407778, 2027690, 4213675,
+     839863,  683134,  75805,   2493150, 4215796, 81587,   751845, 1255588, 1947964,
+     1950470, 859401,  3077088, 3931110, 2316256, 1523761, 4527477, 4237511, 1123513,
+     4209796, 3584772, 4250563, 2091754, 1618766, 2139944, 4525352, 382159,  2955887, 41760,
+     2313998, 496912,  3791570, 3904792, 3613654, 873959,  127076,  2537797, 2458107,
+     4543265, 3661909, 26828,   271816,  17854,   2461269, 1776042, 1573899, 3409957,
+     4335712, 4534313, 3392751, 1230124, 2159031, 4444015, 3373087, 3848014, 2026600,
+     1382747, 3537242, 4536743, 4714155, 3788371, 3570849, 173741,  211962,  4377778,
+     119369,  2856973, 2945854, 1508054, 4503932, 3141566, 1842177, 3448683, 3384614,
+     2886508, 1573965, 990618,  3053734, 2918742, 4508753, 1032149, 60943,   4291620,
+     722607,  2883224, 169359,  4356585, 3725543, 3678729, 341673,  3592828, 4077251,
+     3382936, 3885685, 4630994, 1286698, 4449616, 1138430, 3113385, 4660578, 2539973,
+     4562286, 4085089, 494737,  3967610, 2130702, 1823755, 1369324, 3796951, 956299, 141730,
+     935144,  4381893, 4412545, 1382250, 3024476, 2364546, 3396164, 3573511, 314081, 577688,
+     4154135, 1567018, 4047761, 2446220, 1148833, 4842497, 3967186, 1175290, 3749667,
+     1209593, 3295627, 3169065, 2460328, 1838486, 1436923, 2843887, 3676426, 2079145,
+     2975635, 535071,  4287509, 3281107, 39606,   3115500, 3204573, 722131,  3124073},
+2,
+false)));*/
 CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp
index 82286b1e2fa..ad9ece99ef9 100644
--- a/cpp/tests/experimental/bfs_test.cpp
+++ b/cpp/tests/experimental/bfs_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -73,17 +73,26 @@ void bfs_reference(edge_t const* offsets,
 }
 
 typedef struct BFS_Usecase_t {
-  std::string graph_file_full_path{};
+  cugraph::test::input_graph_specifier_t input_graph_specifier{};
   size_t source{false};
 
   BFS_Usecase_t(std::string const& graph_file_path, size_t source) : source(source)
   {
+    std::string graph_file_full_path{};
     if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) {
       graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path;
     } else {
       graph_file_full_path = graph_file_path;
     }
+    input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH;
+    input_graph_specifier.graph_file_full_path = graph_file_full_path;
   };
+
+  BFS_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source)
+  {
+    input_graph_specifier.tag         = cugraph::test::input_graph_specifier_t::RMAT_PARAMS;
+    input_graph_specifier.rmat_params = rmat_params;
+  }
 } BFS_Usecase;
 
 class Tests_BFS : public ::testing::TestWithParam<BFS_Usecase> {
@@ -102,9 +111,25 @@ class Tests_BFS : public ::testing::TestWithParam<BFS_Usecase> {
 
     raft::handle_t handle{};
 
-    auto graph =
-      cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, false>(
-        handle, configuration.graph_file_full_path, false);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, false, false> graph(handle);
+    std::tie(graph, std::ignore) =
+      configuration.input_graph_specifier.tag ==
+          cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH
+        ? cugraph::test::
+            read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, false, false>(
+              handle, configuration.input_graph_specifier.graph_file_full_path, false, false)
+        : cugraph::test::generate_graph_from_rmat_params<vertex_t, edge_t, weight_t, false, false>(
+            handle,
+            configuration.input_graph_specifier.rmat_params.scale,
+            configuration.input_graph_specifier.rmat_params.edge_factor,
+            configuration.input_graph_specifier.rmat_params.a,
+            configuration.input_graph_specifier.rmat_params.b,
+            configuration.input_graph_specifier.rmat_params.c,
+            configuration.input_graph_specifier.rmat_params.seed,
+            configuration.input_graph_specifier.rmat_params.undirected,
+            configuration.input_graph_specifier.rmat_params.scramble_vertex_ids,
+            false,
+            false);
     auto graph_view = graph.view();
 
     std::vector<edge_t> h_offsets(graph_view.get_number_of_vertices() + 1);
@@ -192,13 +217,16 @@ class Tests_BFS : public ::testing::TestWithParam<BFS_Usecase> {
 // FIXME: add tests for type combinations
 TEST_P(Tests_BFS, CheckInt32Int32) { run_current_test<int32_t, int32_t>(GetParam()); }
 
-INSTANTIATE_TEST_CASE_P(simple_test,
-                        Tests_BFS,
-                        ::testing::Values(BFS_Usecase("test/datasets/karate.mtx", 0),
-                                          BFS_Usecase("test/datasets/polbooks.mtx", 0),
-                                          BFS_Usecase("test/datasets/netscience.mtx", 0),
-                                          BFS_Usecase("test/datasets/netscience.mtx", 100),
-                                          BFS_Usecase("test/datasets/wiki2003.mtx", 1000),
-                                          BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000)));
+INSTANTIATE_TEST_CASE_P(
+  simple_test,
+  Tests_BFS,
+  ::testing::Values(
+    BFS_Usecase("test/datasets/karate.mtx", 0),
+    BFS_Usecase("test/datasets/polbooks.mtx", 0),
+    BFS_Usecase("test/datasets/netscience.mtx", 0),
+    BFS_Usecase("test/datasets/netscience.mtx", 100),
+    BFS_Usecase("test/datasets/wiki2003.mtx", 1000),
+    BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000),
+    BFS_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0)));
 
 CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/experimental/coarsen_graph_test.cpp b/cpp/tests/experimental/coarsen_graph_test.cpp
index b790dfffa69..789619f2cd9 100644
--- a/cpp/tests/experimental/coarsen_graph_test.cpp
+++ b/cpp/tests/experimental/coarsen_graph_test.cpp
@@ -36,20 +36,6 @@
 #include <type_traits>
 #include <vector>
 
-template <typename vertex_t>
-std::enable_if_t<std::is_signed<vertex_t>::value, bool> is_valid_vertex(vertex_t num_vertices,
-                                                                        vertex_t v)
-{
-  return (v >= 0) && (v < num_vertices);
-}
-
-template <typename vertex_t>
-std::enable_if_t<std::is_unsigned<vertex_t>::value, bool> is_valid_vertex(vertex_t num_vertices,
-                                                                          vertex_t v)
-{
-  return v < num_vertices;
-}
-
 template <typename vertex_t, typename edge_t, typename weight_t>
 void check_coarsened_graph_results(edge_t* org_offsets,
                                    vertex_t* org_indices,
@@ -68,13 +54,13 @@ void check_coarsened_graph_results(edge_t* org_offsets,
   ASSERT_TRUE(std::count_if(org_indices,
                             org_indices + org_offsets[num_org_vertices],
                             [num_org_vertices](auto nbr) {
-                              return !is_valid_vertex(num_org_vertices, nbr);
+                              return !cugraph::test::is_valid_vertex(num_org_vertices, nbr);
                             }) == 0);
   ASSERT_TRUE(std::is_sorted(coarse_offsets, coarse_offsets + num_coarse_vertices));
   ASSERT_TRUE(std::count_if(coarse_indices,
                             coarse_indices + coarse_offsets[num_coarse_vertices],
                             [num_coarse_vertices](auto nbr) {
-                              return !is_valid_vertex(num_coarse_vertices, nbr);
+                              return !cugraph::test::is_valid_vertex(num_coarse_vertices, nbr);
                             }) == 0);
   ASSERT_TRUE(num_coarse_vertices <= num_org_vertices);
 
@@ -273,9 +259,11 @@ class Tests_CoarsenGraph : public ::testing::TestWithParam<CoarsenGraph_Usecase>
       return;
     }
 
-    auto graph = cugraph::test::
-      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed>(
-        handle, configuration.graph_file_full_path, configuration.test_weighted);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false> graph(
+      handle);
+    std::tie(graph, std::ignore) = cugraph::test::
+      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed, false>(
+        handle, configuration.graph_file_full_path, configuration.test_weighted, false);
     auto graph_view = graph.view();
 
     if (graph_view.get_number_of_vertices() == 0) { return; }
diff --git a/cpp/tests/experimental/degree_test.cpp b/cpp/tests/experimental/degree_test.cpp
index 7c7b41cdacc..581b6b29f64 100644
--- a/cpp/tests/experimental/degree_test.cpp
+++ b/cpp/tests/experimental/degree_test.cpp
@@ -83,9 +83,11 @@ class Tests_Degree : public ::testing::TestWithParam<Degree_Usecase> {
   {
     raft::handle_t handle{};
 
-    auto graph = cugraph::test::
-      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed>(
-        handle, configuration.graph_file_full_path, false);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false> graph(
+      handle);
+    std::tie(graph, std::ignore) = cugraph::test::
+      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed, false>(
+        handle, configuration.graph_file_full_path, false, false);
     auto graph_view = graph.view();
 
     std::vector<edge_t> h_offsets(graph_view.get_number_of_vertices() + 1);
diff --git a/cpp/tests/experimental/generate_rmat_test.cpp b/cpp/tests/experimental/generate_rmat_test.cpp
new file mode 100644
index 00000000000..249a1a3c6c8
--- /dev/null
+++ b/cpp/tests/experimental/generate_rmat_test.cpp
@@ -0,0 +1,285 @@
+/*
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governin_from_mtxg permissions and
+ * limitations under the License.
+ */
+
+#include <utilities/base_fixture.hpp>
+#include <utilities/test_utilities.hpp>
+
+#include <experimental/graph_generator.hpp>
+
+#include <raft/cudart_utils.h>
+#include <raft/handle.hpp>
+#include <rmm/device_uvector.hpp>
+#include <rmm/mr/device/cuda_memory_resource.hpp>
+
+#include <gtest/gtest.h>
+
+#include <algorithm>
+#include <cmath>
+#include <vector>
+
+// this function assumes that vertex IDs are not scrambled
+template <typename vertex_t>
+void validate_rmat_distribution(
+  std::tuple<vertex_t, vertex_t>* edges,
+  size_t num_edges,
+  vertex_t src_first,
+  vertex_t src_last,
+  vertex_t dst_first,
+  vertex_t dst_last,
+  double a,
+  double b,
+  double c,
+  bool clip_and_flip,
+  size_t min_edges /* stop recursion if # edges < min_edges */,
+  double error_tolerance /* (computed a|b|c - input a|b|c) shoud be smaller than error_tolerance*/)
+{
+  // we cannot expect the ratios of the edges in the four quadrants of the graph adjacency matrix to
+  // converge close to a, b, c, d if num_edges is not large enough.
+  if (num_edges < min_edges) { return; }
+
+  auto src_threshold = (src_first + src_last) / 2;
+  auto dst_threshold = (dst_first + dst_last) / 2;
+
+  auto a_plus_b_last = std::partition(edges, edges + num_edges, [src_threshold](auto edge) {
+    return std::get<0>(edge) < src_threshold;
+  });
+  auto a_last        = std::partition(
+    edges, a_plus_b_last, [dst_threshold](auto edge) { return std::get<1>(edge) < dst_threshold; });
+  auto c_last = std::partition(a_plus_b_last, edges + num_edges, [dst_threshold](auto edge) {
+    return std::get<1>(edge) < dst_threshold;
+  });
+
+  ASSERT_TRUE(std::abs((double)std::distance(edges, a_last) / num_edges - a) < error_tolerance)
+    << "# edges=" << num_edges << " computed a=" << (double)std::distance(edges, a_last) / num_edges
+    << " iput a=" << a << " error tolerance=" << error_tolerance << ".";
+  if (clip_and_flip && (src_first == dst_first) &&
+      (src_last == dst_last)) {  // if clip_and_flip and in the diagonal
+    ASSERT_TRUE(std::distance(a_last, a_plus_b_last) == 0);
+    ASSERT_TRUE(std::abs((double)std::distance(a_plus_b_last, c_last) / num_edges - (b + c)) <
+                error_tolerance)
+      << "# edges=" << num_edges
+      << " computed c=" << (double)std::distance(a_plus_b_last, c_last) / num_edges
+      << " iput (b + c)=" << (b + c) << " error tolerance=" << error_tolerance << ".";
+  } else {
+    ASSERT_TRUE(std::abs((double)std::distance(a_last, a_plus_b_last) / num_edges - b) <
+                error_tolerance)
+      << "# edges=" << num_edges
+      << " computed b=" << (double)std::distance(a_last, a_plus_b_last) / num_edges
+      << " iput b=" << b << " error tolerance=" << error_tolerance << ".";
+    ASSERT_TRUE(std::abs((double)std::distance(a_plus_b_last, c_last) / num_edges - c) <
+                error_tolerance)
+      << "# edges=" << num_edges
+      << " computed c=" << (double)std::distance(a_plus_b_last, c_last) / num_edges
+      << " iput c=" << c << " error tolerance=" << error_tolerance << ".";
+  }
+
+  validate_rmat_distribution(edges,
+                             std::distance(edges, a_last),
+                             src_first,
+                             src_threshold,
+                             dst_first,
+                             dst_threshold,
+                             a,
+                             b,
+                             c,
+                             clip_and_flip,
+                             min_edges,
+                             error_tolerance);
+  validate_rmat_distribution(a_last,
+                             std::distance(a_last, a_plus_b_last),
+                             src_first,
+                             (src_first + src_last) / 2,
+                             dst_threshold,
+                             dst_last,
+                             a,
+                             b,
+                             c,
+                             clip_and_flip,
+                             min_edges,
+                             error_tolerance);
+  validate_rmat_distribution(a_plus_b_last,
+                             std::distance(a_plus_b_last, c_last),
+                             src_threshold,
+                             src_last,
+                             dst_first,
+                             dst_threshold,
+                             a,
+                             b,
+                             c,
+                             clip_and_flip,
+                             min_edges,
+                             error_tolerance);
+  validate_rmat_distribution(c_last,
+                             std::distance(c_last, edges + num_edges),
+                             src_threshold,
+                             src_last,
+                             dst_threshold,
+                             dst_last,
+                             a,
+                             b,
+                             c,
+                             clip_and_flip,
+                             min_edges,
+                             error_tolerance);
+
+  return;
+}
+
+typedef struct GenerateRmat_Usecase_t {
+  size_t scale{0};
+  size_t edge_factor{0};
+  double a{0.0};
+  double b{0.0};
+  double c{0.0};
+  bool clip_and_flip{false};
+
+  GenerateRmat_Usecase_t(
+    size_t scale, size_t edge_factor, double a, double b, double c, bool clip_and_flip)
+    : scale(scale), edge_factor(edge_factor), a(a), b(b), c(c), clip_and_flip(clip_and_flip){};
+} GenerateRmat_Usecase;
+
+class Tests_GenerateRmat : public ::testing::TestWithParam<GenerateRmat_Usecase> {
+ public:
+  Tests_GenerateRmat() {}
+  static void SetupTestCase() {}
+  static void TearDownTestCase() {}
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  template <typename vertex_t>
+  void run_current_test(GenerateRmat_Usecase const& configuration)
+  {
+    raft::handle_t handle{};
+
+    auto num_vertices = static_cast<vertex_t>(size_t{1} << configuration.scale);
+    std::vector<size_t> no_scramble_out_degrees(num_vertices, 0);
+    std::vector<size_t> no_scramble_in_degrees(num_vertices, 0);
+    std::vector<size_t> scramble_out_degrees(num_vertices, 0);
+    std::vector<size_t> scramble_in_degrees(num_vertices, 0);
+    for (size_t scramble = 0; scramble < 2; ++scramble) {
+      rmm::device_uvector<vertex_t> d_srcs(0, handle.get_stream());
+      rmm::device_uvector<vertex_t> d_dsts(0, handle.get_stream());
+
+      CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+
+      std::tie(d_srcs, d_dsts) = cugraph::experimental::generate_rmat_edgelist<vertex_t>(
+        handle,
+        configuration.scale,
+        (size_t{1} << configuration.scale) * configuration.edge_factor,
+        configuration.a,
+        configuration.b,
+        configuration.c,
+        uint64_t{0},
+        configuration.clip_and_flip,
+        static_cast<bool>(scramble));
+
+      CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+
+      std::vector<vertex_t> h_cugraph_srcs(d_srcs.size());
+      std::vector<vertex_t> h_cugraph_dsts(d_dsts.size());
+
+      raft::update_host(h_cugraph_srcs.data(), d_srcs.data(), d_srcs.size(), handle.get_stream());
+      raft::update_host(h_cugraph_dsts.data(), d_dsts.data(), d_dsts.size(), handle.get_stream());
+      CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+
+      ASSERT_TRUE(
+        (h_cugraph_srcs.size() == (size_t{1} << configuration.scale) * configuration.edge_factor) &&
+        (h_cugraph_dsts.size() == (size_t{1} << configuration.scale) * configuration.edge_factor))
+        << "Returned an invalid number of R-mat graph edges.";
+      ASSERT_TRUE(
+        std::count_if(h_cugraph_srcs.begin(),
+                      h_cugraph_srcs.end(),
+                      [num_vertices = static_cast<vertex_t>(size_t{1} << configuration.scale)](
+                        auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0)
+        << "Returned R-mat graph edges have invalid source vertex IDs.";
+      ASSERT_TRUE(
+        std::count_if(h_cugraph_dsts.begin(),
+                      h_cugraph_dsts.end(),
+                      [num_vertices = static_cast<vertex_t>(size_t{1} << configuration.scale)](
+                        auto v) { return !cugraph::test::is_valid_vertex(num_vertices, v); }) == 0)
+        << "Returned R-mat graph edges have invalid destination vertex IDs.";
+
+      if (!scramble) {
+        if (configuration.clip_and_flip) {
+          for (size_t i = 0; i < h_cugraph_srcs.size(); ++i) {
+            ASSERT_TRUE(h_cugraph_srcs[i] >= h_cugraph_dsts[i]);
+          }
+        }
+
+        std::vector<std::tuple<vertex_t, vertex_t>> h_cugraph_edges(h_cugraph_srcs.size());
+        for (size_t i = 0; i < h_cugraph_srcs.size(); ++i) {
+          h_cugraph_edges[i] = std::make_tuple(h_cugraph_srcs[i], h_cugraph_dsts[i]);
+        }
+
+        validate_rmat_distribution(h_cugraph_edges.data(),
+                                   h_cugraph_edges.size(),
+                                   vertex_t{0},
+                                   num_vertices,
+                                   vertex_t{0},
+                                   num_vertices,
+                                   configuration.a,
+                                   configuration.b,
+                                   configuration.c,
+                                   configuration.clip_and_flip,
+                                   size_t{100000},
+                                   0.01);
+      }
+
+      if (scramble) {
+        std::for_each(h_cugraph_srcs.begin(),
+                      h_cugraph_srcs.end(),
+                      [&scramble_out_degrees](auto src) { scramble_out_degrees[src]++; });
+        std::for_each(h_cugraph_dsts.begin(),
+                      h_cugraph_dsts.end(),
+                      [&scramble_in_degrees](auto dst) { scramble_in_degrees[dst]++; });
+        std::sort(scramble_out_degrees.begin(), scramble_out_degrees.end());
+        std::sort(scramble_in_degrees.begin(), scramble_in_degrees.end());
+      } else {
+        std::for_each(h_cugraph_srcs.begin(),
+                      h_cugraph_srcs.end(),
+                      [&no_scramble_out_degrees](auto src) { no_scramble_out_degrees[src]++; });
+        std::for_each(h_cugraph_dsts.begin(),
+                      h_cugraph_dsts.end(),
+                      [&no_scramble_in_degrees](auto dst) { no_scramble_in_degrees[dst]++; });
+        std::sort(no_scramble_out_degrees.begin(), no_scramble_out_degrees.end());
+        std::sort(no_scramble_in_degrees.begin(), no_scramble_in_degrees.end());
+      }
+    }
+
+    // this relies on the fact that the edge generator is deterministic.
+    // ideally, we should test that the two graphs are isomorphic, but this is NP hard; insted, we
+    // just check out-degree & in-degree distributions
+    ASSERT_TRUE(std::equal(no_scramble_out_degrees.begin(),
+                           no_scramble_out_degrees.end(),
+                           scramble_out_degrees.begin()));
+    ASSERT_TRUE(std::equal(
+      no_scramble_in_degrees.begin(), no_scramble_in_degrees.end(), scramble_in_degrees.begin()));
+  }
+};
+
+// FIXME: add tests for type combinations
+
+TEST_P(Tests_GenerateRmat, CheckInt32) { run_current_test<int32_t>(GetParam()); }
+
+INSTANTIATE_TEST_CASE_P(simple_test,
+                        Tests_GenerateRmat,
+                        ::testing::Values(GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, true),
+                                          GenerateRmat_Usecase(20, 16, 0.57, 0.19, 0.19, false),
+                                          GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, true),
+                                          GenerateRmat_Usecase(20, 16, 0.45, 0.22, 0.22, false)));
+
+CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/experimental/graph_test.cpp b/cpp/tests/experimental/graph_test.cpp
index b80de68f95c..949f6d2e08e 100644
--- a/cpp/tests/experimental/graph_test.cpp
+++ b/cpp/tests/experimental/graph_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -91,10 +91,28 @@ class Tests_Graph : public ::testing::TestWithParam<Graph_Usecase> {
   template <typename vertex_t, typename edge_t, typename weight_t, bool store_transposed>
   void run_current_test(Graph_Usecase const& configuration)
   {
-    auto mm_graph =
-      cugraph::test::read_edgelist_from_matrix_market_file<vertex_t, edge_t, weight_t>(
-        configuration.graph_file_full_path);
-    edge_t number_of_edges = static_cast<edge_t>(mm_graph.h_rows.size());
+    raft::handle_t handle{};
+
+    rmm::device_uvector<vertex_t> d_rows(0, handle.get_stream());
+    rmm::device_uvector<vertex_t> d_cols(0, handle.get_stream());
+    rmm::device_uvector<weight_t> d_weights(0, handle.get_stream());
+    vertex_t number_of_vertices{};
+    bool is_symmetric{};
+    std::tie(d_rows, d_cols, d_weights, number_of_vertices, is_symmetric) =
+      cugraph::test::read_edgelist_from_matrix_market_file<vertex_t, weight_t>(
+        handle, configuration.graph_file_full_path, configuration.test_weighted);
+    edge_t number_of_edges = static_cast<edge_t>(d_rows.size());
+
+    std::vector<vertex_t> h_rows(number_of_edges);
+    std::vector<vertex_t> h_cols(number_of_edges);
+    std::vector<weight_t> h_weights(configuration.test_weighted ? number_of_edges : edge_t{0});
+
+    raft::update_host(h_rows.data(), d_rows.data(), number_of_edges, handle.get_stream());
+    raft::update_host(h_cols.data(), d_cols.data(), number_of_edges, handle.get_stream());
+    if (configuration.test_weighted) {
+      raft::update_host(h_weights.data(), d_weights.data(), number_of_edges, handle.get_stream());
+    }
+    CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
 
     std::vector<edge_t> h_reference_offsets{};
     std::vector<vertex_t> h_reference_indices{};
@@ -102,28 +120,12 @@ class Tests_Graph : public ::testing::TestWithParam<Graph_Usecase> {
 
     std::tie(h_reference_offsets, h_reference_indices, h_reference_weights) =
       graph_reference<store_transposed>(
-        mm_graph.h_rows.data(),
-        mm_graph.h_cols.data(),
-        configuration.test_weighted ? mm_graph.h_weights.data() : nullptr,
-        mm_graph.number_of_vertices,
+        h_rows.data(),
+        h_cols.data(),
+        configuration.test_weighted ? h_weights.data() : static_cast<weight_t*>(nullptr),
+        number_of_vertices,
         number_of_edges);
 
-    raft::handle_t handle{};
-
-    rmm::device_uvector<vertex_t> d_rows(number_of_edges, handle.get_stream());
-    rmm::device_uvector<vertex_t> d_cols(number_of_edges, handle.get_stream());
-    rmm::device_uvector<weight_t> d_weights(configuration.test_weighted ? number_of_edges : 0,
-                                            handle.get_stream());
-
-    raft::update_device(
-      d_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream());
-    raft::update_device(
-      d_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream());
-    if (configuration.test_weighted) {
-      raft::update_device(
-        d_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream());
-    }
-
     cugraph::experimental::edgelist_t<vertex_t, edge_t, weight_t> edgelist{
       d_rows.data(),
       d_cols.data(),
@@ -136,8 +138,8 @@ class Tests_Graph : public ::testing::TestWithParam<Graph_Usecase> {
       cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false>(
         handle,
         edgelist,
-        mm_graph.number_of_vertices,
-        cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false},
+        number_of_vertices,
+        cugraph::experimental::graph_properties_t{is_symmetric, false},
         false,
         true);
 
@@ -145,7 +147,7 @@ class Tests_Graph : public ::testing::TestWithParam<Graph_Usecase> {
 
     CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
 
-    ASSERT_EQ(graph_view.get_number_of_vertices(), mm_graph.number_of_vertices);
+    ASSERT_EQ(graph_view.get_number_of_vertices(), number_of_vertices);
     ASSERT_EQ(graph_view.get_number_of_edges(), number_of_edges);
 
     std::vector<edge_t> h_cugraph_offsets(graph_view.get_number_of_vertices() + 1);
@@ -174,7 +176,7 @@ class Tests_Graph : public ::testing::TestWithParam<Graph_Usecase> {
       std::equal(h_reference_offsets.begin(), h_reference_offsets.end(), h_cugraph_offsets.begin()))
       << "Graph compressed sparse format offsets do not match with the reference values.";
     ASSERT_EQ(h_reference_weights.size(), h_cugraph_weights.size());
-    for (vertex_t i = 0; i < mm_graph.number_of_vertices; ++i) {
+    for (vertex_t i = 0; i < number_of_vertices; ++i) {
       auto start  = h_reference_offsets[i];
       auto degree = h_reference_offsets[i + 1] - start;
       if (configuration.test_weighted) {
diff --git a/cpp/tests/experimental/induced_subgraph_test.cpp b/cpp/tests/experimental/induced_subgraph_test.cpp
index 72894a9349f..4e0ca9e7d92 100644
--- a/cpp/tests/experimental/induced_subgraph_test.cpp
+++ b/cpp/tests/experimental/induced_subgraph_test.cpp
@@ -113,9 +113,11 @@ class Tests_InducedSubgraph : public ::testing::TestWithParam<InducedSubgraph_Us
   {
     raft::handle_t handle{};
 
-    auto graph = cugraph::test::
-      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed>(
-        handle, configuration.graph_file_full_path, configuration.test_weighted);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false> graph(
+      handle);
+    std::tie(graph, std::ignore) = cugraph::test::
+      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed, false>(
+        handle, configuration.graph_file_full_path, configuration.test_weighted, false);
     auto graph_view = graph.view();
 
     std::vector<edge_t> h_offsets(graph_view.get_number_of_vertices() + 1);
diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp
index 3e9f0b478a0..776bb60716c 100644
--- a/cpp/tests/experimental/katz_centrality_test.cpp
+++ b/cpp/tests/experimental/katz_centrality_test.cpp
@@ -89,18 +89,31 @@ void katz_centrality_reference(edge_t const* offsets,
 }
 
 typedef struct KatzCentrality_Usecase_t {
-  std::string graph_file_full_path{};
+  cugraph::test::input_graph_specifier_t input_graph_specifier{};
+
   bool test_weighted{false};
 
   KatzCentrality_Usecase_t(std::string const& graph_file_path, bool test_weighted)
     : test_weighted(test_weighted)
   {
+    std::string graph_file_full_path{};
     if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) {
       graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path;
     } else {
       graph_file_full_path = graph_file_path;
     }
+    input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH;
+    input_graph_specifier.graph_file_full_path = graph_file_full_path;
   };
+
+  KatzCentrality_Usecase_t(cugraph::test::rmat_params_t rmat_params,
+                           double personalization_ratio,
+                           bool test_weighted)
+    : test_weighted(test_weighted)
+  {
+    input_graph_specifier.tag         = cugraph::test::input_graph_specifier_t::RMAT_PARAMS;
+    input_graph_specifier.rmat_params = rmat_params;
+  }
 } KatzCentrality_Usecase;
 
 class Tests_KatzCentrality : public ::testing::TestWithParam<KatzCentrality_Usecase> {
@@ -117,9 +130,28 @@ class Tests_KatzCentrality : public ::testing::TestWithParam<KatzCentrality_Usec
   {
     raft::handle_t handle{};
 
-    auto graph =
-      cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, true>(
-        handle, configuration.graph_file_full_path, configuration.test_weighted);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, true, false> graph(handle);
+    std::tie(graph, std::ignore) =
+      configuration.input_graph_specifier.tag ==
+          cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH
+        ? cugraph::test::
+            read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, true, false>(
+              handle,
+              configuration.input_graph_specifier.graph_file_full_path,
+              configuration.test_weighted,
+              false)
+        : cugraph::test::generate_graph_from_rmat_params<vertex_t, edge_t, weight_t, true, false>(
+            handle,
+            configuration.input_graph_specifier.rmat_params.scale,
+            configuration.input_graph_specifier.rmat_params.edge_factor,
+            configuration.input_graph_specifier.rmat_params.a,
+            configuration.input_graph_specifier.rmat_params.b,
+            configuration.input_graph_specifier.rmat_params.c,
+            configuration.input_graph_specifier.rmat_params.seed,
+            configuration.input_graph_specifier.rmat_params.undirected,
+            configuration.input_graph_specifier.rmat_params.scramble_vertex_ids,
+            configuration.test_weighted,
+            false);
     auto graph_view = graph.view();
 
     std::vector<edge_t> h_offsets(graph_view.get_number_of_vertices() + 1);
@@ -219,13 +251,26 @@ TEST_P(Tests_KatzCentrality, CheckInt32Int32FloatFloat)
 INSTANTIATE_TEST_CASE_P(
   simple_test,
   Tests_KatzCentrality,
-  ::testing::Values(KatzCentrality_Usecase("test/datasets/karate.mtx", false),
-                    KatzCentrality_Usecase("test/datasets/karate.mtx", true),
-                    KatzCentrality_Usecase("test/datasets/web-Google.mtx", false),
-                    KatzCentrality_Usecase("test/datasets/web-Google.mtx", true),
-                    KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false),
-                    KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true),
-                    KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false),
-                    KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true)));
+  ::testing::Values(
+    KatzCentrality_Usecase("test/datasets/karate.mtx", false),
+    KatzCentrality_Usecase("test/datasets/karate.mtx", true),
+    KatzCentrality_Usecase("test/datasets/web-Google.mtx", false),
+    KatzCentrality_Usecase("test/datasets/web-Google.mtx", true),
+    KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false),
+    KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true),
+    KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false),
+    KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true),
+    KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                           0.0,
+                           false),
+    KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                           0.5,
+                           false),
+    KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                           0.0,
+                           true),
+    KatzCentrality_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                           0.5,
+                           true)));
 
 CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu
index 35a26923df6..56fb2c109bf 100644
--- a/cpp/tests/experimental/louvain_test.cu
+++ b/cpp/tests/experimental/louvain_test.cu
@@ -69,9 +69,10 @@ class Tests_Louvain : public ::testing::TestWithParam<Louvain_Usecase> {
 
     std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl;
 
-    auto graph =
-      cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, false>(
-        handle, configuration.graph_file_full_path, configuration.test_weighted);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, false, false> graph(handle);
+    std::tie(graph, std::ignore) =
+      cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, false, false>(
+        handle, configuration.graph_file_full_path, configuration.test_weighted, false);
 
     auto graph_view = graph.view();
 
diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp
index 53143bf0bf3..ff3b073cbc7 100644
--- a/cpp/tests/experimental/pagerank_test.cpp
+++ b/cpp/tests/experimental/pagerank_test.cpp
@@ -124,7 +124,8 @@ void pagerank_reference(edge_t const* offsets,
 }
 
 typedef struct PageRank_Usecase_t {
-  std::string graph_file_full_path{};
+  cugraph::test::input_graph_specifier_t input_graph_specifier{};
+
   double personalization_ratio{0.0};
   bool test_weighted{false};
 
@@ -133,12 +134,24 @@ typedef struct PageRank_Usecase_t {
                      bool test_weighted)
     : personalization_ratio(personalization_ratio), test_weighted(test_weighted)
   {
+    std::string graph_file_full_path{};
     if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) {
       graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path;
     } else {
       graph_file_full_path = graph_file_path;
     }
+    input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH;
+    input_graph_specifier.graph_file_full_path = graph_file_full_path;
   };
+
+  PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params,
+                     double personalization_ratio,
+                     bool test_weighted)
+    : personalization_ratio(personalization_ratio), test_weighted(test_weighted)
+  {
+    input_graph_specifier.tag         = cugraph::test::input_graph_specifier_t::RMAT_PARAMS;
+    input_graph_specifier.rmat_params = rmat_params;
+  }
 } PageRank_Usecase;
 
 class Tests_PageRank : public ::testing::TestWithParam<PageRank_Usecase> {
@@ -155,9 +168,28 @@ class Tests_PageRank : public ::testing::TestWithParam<PageRank_Usecase> {
   {
     raft::handle_t handle{};
 
-    auto graph =
-      cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, true>(
-        handle, configuration.graph_file_full_path, configuration.test_weighted);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, true, false> graph(handle);
+    std::tie(graph, std::ignore) =
+      configuration.input_graph_specifier.tag ==
+          cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH
+        ? cugraph::test::
+            read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, true, false>(
+              handle,
+              configuration.input_graph_specifier.graph_file_full_path,
+              configuration.test_weighted,
+              false)
+        : cugraph::test::generate_graph_from_rmat_params<vertex_t, edge_t, weight_t, true, false>(
+            handle,
+            configuration.input_graph_specifier.rmat_params.scale,
+            configuration.input_graph_specifier.rmat_params.edge_factor,
+            configuration.input_graph_specifier.rmat_params.a,
+            configuration.input_graph_specifier.rmat_params.b,
+            configuration.input_graph_specifier.rmat_params.c,
+            configuration.input_graph_specifier.rmat_params.seed,
+            configuration.input_graph_specifier.rmat_params.undirected,
+            configuration.input_graph_specifier.rmat_params.scramble_vertex_ids,
+            configuration.test_weighted,
+            false);
     auto graph_view = graph.view();
 
     std::vector<edge_t> h_offsets(graph_view.get_number_of_vertices() + 1);
@@ -225,11 +257,11 @@ class Tests_PageRank : public ::testing::TestWithParam<PageRank_Usecase> {
                           handle.get_stream());
     }
 
-    std::vector<result_t> h_reference_pageranks(graph_view.get_number_of_vertices());
-
     result_t constexpr alpha{0.85};
     result_t constexpr epsilon{1e-6};
 
+    std::vector<result_t> h_reference_pageranks(graph_view.get_number_of_vertices());
+
     pagerank_reference(h_offsets.data(),
                        h_indices.data(),
                        h_weights.size() > 0 ? h_weights.data() : static_cast<weight_t*>(nullptr),
@@ -295,21 +327,34 @@ TEST_P(Tests_PageRank, CheckInt32Int32FloatFloat)
 INSTANTIATE_TEST_CASE_P(
   simple_test,
   Tests_PageRank,
-  ::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", 0.0, false),
-                    PageRank_Usecase("test/datasets/karate.mtx", 0.5, false),
-                    PageRank_Usecase("test/datasets/karate.mtx", 0.0, true),
-                    PageRank_Usecase("test/datasets/karate.mtx", 0.5, true),
-                    PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false),
-                    PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false),
-                    PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true),
-                    PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true),
-                    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false),
-                    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false),
-                    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true),
-                    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true),
-                    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false),
-                    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false),
-                    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true),
-                    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true)));
+  ::testing::Values(
+    PageRank_Usecase("test/datasets/karate.mtx", 0.0, false),
+    PageRank_Usecase("test/datasets/karate.mtx", 0.5, false),
+    PageRank_Usecase("test/datasets/karate.mtx", 0.0, true),
+    PageRank_Usecase("test/datasets/karate.mtx", 0.5, true),
+    PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false),
+    PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false),
+    PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true),
+    PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true),
+    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false),
+    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false),
+    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true),
+    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true),
+    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false),
+    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false),
+    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true),
+    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true),
+    PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                     0.0,
+                     false),
+    PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                     0.5,
+                     false),
+    PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                     0.0,
+                     true),
+    PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                     0.5,
+                     true)));
 
 CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp
index 2f7cc499d35..611abcb0d75 100644
--- a/cpp/tests/experimental/sssp_test.cpp
+++ b/cpp/tests/experimental/sssp_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2021, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -79,17 +79,26 @@ void sssp_reference(edge_t const* offsets,
 }
 
 typedef struct SSSP_Usecase_t {
-  std::string graph_file_full_path{};
+  cugraph::test::input_graph_specifier_t input_graph_specifier{};
   size_t source{false};
 
   SSSP_Usecase_t(std::string const& graph_file_path, size_t source) : source(source)
   {
+    std::string graph_file_full_path{};
     if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) {
       graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path;
     } else {
       graph_file_full_path = graph_file_path;
     }
+    input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH;
+    input_graph_specifier.graph_file_full_path = graph_file_full_path;
   };
+
+  SSSP_Usecase_t(cugraph::test::rmat_params_t rmat_params, size_t source) : source(source)
+  {
+    input_graph_specifier.tag         = cugraph::test::input_graph_specifier_t::RMAT_PARAMS;
+    input_graph_specifier.rmat_params = rmat_params;
+  }
 } SSSP_Usecase;
 
 class Tests_SSSP : public ::testing::TestWithParam<SSSP_Usecase> {
@@ -106,9 +115,25 @@ class Tests_SSSP : public ::testing::TestWithParam<SSSP_Usecase> {
   {
     raft::handle_t handle{};
 
-    auto graph =
-      cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, false>(
-        handle, configuration.graph_file_full_path, true);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, false, false> graph(handle);
+    std::tie(graph, std::ignore) =
+      configuration.input_graph_specifier.tag ==
+          cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH
+        ? cugraph::test::
+            read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, false, false>(
+              handle, configuration.input_graph_specifier.graph_file_full_path, true, false)
+        : cugraph::test::generate_graph_from_rmat_params<vertex_t, edge_t, weight_t, false, false>(
+            handle,
+            configuration.input_graph_specifier.rmat_params.scale,
+            configuration.input_graph_specifier.rmat_params.edge_factor,
+            configuration.input_graph_specifier.rmat_params.a,
+            configuration.input_graph_specifier.rmat_params.b,
+            configuration.input_graph_specifier.rmat_params.c,
+            configuration.input_graph_specifier.rmat_params.seed,
+            configuration.input_graph_specifier.rmat_params.undirected,
+            configuration.input_graph_specifier.rmat_params.scramble_vertex_ids,
+            true,
+            false);
     auto graph_view = graph.view();
 
     std::vector<edge_t> h_offsets(graph_view.get_number_of_vertices() + 1);
@@ -208,16 +233,13 @@ class Tests_SSSP : public ::testing::TestWithParam<SSSP_Usecase> {
 // FIXME: add tests for type combinations
 TEST_P(Tests_SSSP, CheckInt32Int32Float) { run_current_test<int32_t, int32_t, float>(GetParam()); }
 
-#if 0
-INSTANTIATE_TEST_CASE_P(simple_test,
-                        Tests_SSSP,
-                        ::testing::Values(SSSP_Usecase("test/datasets/karate.mtx", 0)));
-#else
-INSTANTIATE_TEST_CASE_P(simple_test,
-                        Tests_SSSP,
-                        ::testing::Values(SSSP_Usecase("test/datasets/karate.mtx", 0),
-                                          SSSP_Usecase("test/datasets/dblp.mtx", 0),
-                                          SSSP_Usecase("test/datasets/wiki2003.mtx", 1000)));
-#endif
+INSTANTIATE_TEST_CASE_P(
+  simple_test,
+  Tests_SSSP,
+  ::testing::Values(
+    SSSP_Usecase("test/datasets/karate.mtx", 0),
+    SSSP_Usecase("test/datasets/dblp.mtx", 0),
+    SSSP_Usecase("test/datasets/wiki2003.mtx", 1000),
+    SSSP_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false}, 0)));
 
 CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/experimental/weight_sum_test.cpp b/cpp/tests/experimental/weight_sum_test.cpp
index aeda7386314..9ab47b69baa 100644
--- a/cpp/tests/experimental/weight_sum_test.cpp
+++ b/cpp/tests/experimental/weight_sum_test.cpp
@@ -85,9 +85,11 @@ class Tests_WeightSum : public ::testing::TestWithParam<WeightSum_Usecase> {
   {
     raft::handle_t handle{};
 
-    auto graph = cugraph::test::
-      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed>(
-        handle, configuration.graph_file_full_path, true);
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false> graph(
+      handle);
+    std::tie(graph, std::ignore) = cugraph::test::
+      read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, store_transposed, false>(
+        handle, configuration.graph_file_full_path, true, false);
     auto graph_view = graph.view();
 
     std::vector<edge_t> h_offsets(graph_view.get_number_of_vertices() + 1);
diff --git a/cpp/tests/pagerank/mg_pagerank_test.cpp b/cpp/tests/pagerank/mg_pagerank_test.cpp
new file mode 100644
index 00000000000..85ee9a4243e
--- /dev/null
+++ b/cpp/tests/pagerank/mg_pagerank_test.cpp
@@ -0,0 +1,357 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <utilities/base_fixture.hpp>
+#include <utilities/test_utilities.hpp>
+
+#include <algorithms.hpp>
+#include <partition_manager.hpp>
+
+#include <raft/comms/comms.hpp>
+#include <raft/comms/mpi_comms.hpp>
+#include <raft/handle.hpp>
+
+#include <gtest/gtest.h>
+
+#include <random>
+
+typedef struct PageRank_Usecase_t {
+  cugraph::test::input_graph_specifier_t input_graph_specifier{};
+
+  double personalization_ratio{0.0};
+  bool test_weighted{false};
+
+  PageRank_Usecase_t(std::string const& graph_file_path,
+                     double personalization_ratio,
+                     bool test_weighted)
+    : personalization_ratio(personalization_ratio), test_weighted(test_weighted)
+  {
+    std::string graph_file_full_path{};
+    if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) {
+      graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path;
+    } else {
+      graph_file_full_path = graph_file_path;
+    }
+    input_graph_specifier.tag = cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH;
+    input_graph_specifier.graph_file_full_path = graph_file_full_path;
+  };
+
+  PageRank_Usecase_t(cugraph::test::rmat_params_t rmat_params,
+                     double personalization_ratio,
+                     bool test_weighted)
+    : personalization_ratio(personalization_ratio), test_weighted(test_weighted)
+  {
+    input_graph_specifier.tag         = cugraph::test::input_graph_specifier_t::RMAT_PARAMS;
+    input_graph_specifier.rmat_params = rmat_params;
+  }
+} PageRank_Usecase;
+
+class Tests_MGPageRank : public ::testing::TestWithParam<PageRank_Usecase> {
+ public:
+  Tests_MGPageRank() {}
+  static void SetupTestCase() {}
+  static void TearDownTestCase() {}
+
+  virtual void SetUp() {}
+  virtual void TearDown() {}
+
+  // Compare the results of running pagerank on multiple GPUs to that of a single-GPU run
+  template <typename vertex_t, typename edge_t, typename weight_t, typename result_t>
+  void run_current_test(PageRank_Usecase const& configuration)
+  {
+    // 1. initialize handle
+
+    raft::handle_t handle{};
+
+    raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD);
+    auto& comm           = handle.get_comms();
+    auto const comm_size = comm.get_size();
+    auto const comm_rank = comm.get_rank();
+
+    auto row_comm_size = static_cast<int>(sqrt(static_cast<double>(comm_size)));
+    while (comm_size % row_comm_size != 0) { --row_comm_size; }
+    cugraph::partition_2d::subcomm_factory_t<cugraph::partition_2d::key_naming_t, vertex_t>
+      subcomm_factory(handle, row_comm_size);
+
+    // 2. create SG & MG graphs
+
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, true, false> sg_graph(handle);
+    rmm::device_uvector<vertex_t> d_sg_renumber_map_labels(0, handle.get_stream());
+    std::tie(sg_graph, d_sg_renumber_map_labels) =
+      configuration.input_graph_specifier.tag ==
+          cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH
+        ? cugraph::test::
+            read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, true, false>(
+              handle,
+              configuration.input_graph_specifier.graph_file_full_path,
+              configuration.test_weighted,
+              true)
+        : cugraph::test::generate_graph_from_rmat_params<vertex_t, edge_t, weight_t, true, false>(
+            handle,
+            configuration.input_graph_specifier.rmat_params.scale,
+            configuration.input_graph_specifier.rmat_params.edge_factor,
+            configuration.input_graph_specifier.rmat_params.a,
+            configuration.input_graph_specifier.rmat_params.b,
+            configuration.input_graph_specifier.rmat_params.c,
+            configuration.input_graph_specifier.rmat_params.seed,
+            configuration.input_graph_specifier.rmat_params.undirected,
+            configuration.input_graph_specifier.rmat_params.scramble_vertex_ids,
+            configuration.test_weighted,
+            true);
+
+    auto sg_graph_view = sg_graph.view();
+
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, true, true> mg_graph(handle);
+    rmm::device_uvector<vertex_t> d_mg_renumber_map_labels(0, handle.get_stream());
+    std::tie(mg_graph, d_mg_renumber_map_labels) =
+      configuration.input_graph_specifier.tag ==
+          cugraph::test::input_graph_specifier_t::MATRIX_MARKET_FILE_PATH
+        ? cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, true, true>(
+            handle,
+            configuration.input_graph_specifier.graph_file_full_path,
+            configuration.test_weighted,
+            true)
+        : cugraph::test::generate_graph_from_rmat_params<vertex_t, edge_t, weight_t, true, true>(
+            handle,
+            configuration.input_graph_specifier.rmat_params.scale,
+            configuration.input_graph_specifier.rmat_params.edge_factor,
+            configuration.input_graph_specifier.rmat_params.a,
+            configuration.input_graph_specifier.rmat_params.b,
+            configuration.input_graph_specifier.rmat_params.c,
+            configuration.input_graph_specifier.rmat_params.seed,
+            configuration.input_graph_specifier.rmat_params.undirected,
+            configuration.input_graph_specifier.rmat_params.scramble_vertex_ids,
+            configuration.test_weighted,
+            true);
+
+    auto mg_graph_view = mg_graph.view();
+
+    std::vector<vertex_t> h_sg_renumber_map_labels(d_sg_renumber_map_labels.size());
+    raft::update_host(h_sg_renumber_map_labels.data(),
+                      d_sg_renumber_map_labels.data(),
+                      d_sg_renumber_map_labels.size(),
+                      handle.get_stream());
+
+    std::vector<vertex_t> h_mg_renumber_map_labels(mg_graph_view.get_number_of_local_vertices());
+    raft::update_host(h_mg_renumber_map_labels.data(),
+                      d_mg_renumber_map_labels.data(),
+                      d_mg_renumber_map_labels.size(),
+                      handle.get_stream());
+
+    CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+
+    // 2. generate personalization vertex/value pairs
+
+    std::vector<vertex_t> h_personalization_vertices{};
+    std::vector<result_t> h_personalization_values{};
+    if (configuration.personalization_ratio > 0.0) {
+      std::default_random_engine generator{};
+      std::uniform_real_distribution<double> distribution{0.0, 1.0};
+      h_personalization_vertices.resize(sg_graph_view.get_number_of_vertices());
+      std::iota(h_personalization_vertices.begin(), h_personalization_vertices.end(), vertex_t{0});
+      h_personalization_vertices.erase(
+        std::remove_if(h_personalization_vertices.begin(),
+                       h_personalization_vertices.end(),
+                       [&generator, &distribution, configuration](auto v) {
+                         return distribution(generator) >= configuration.personalization_ratio;
+                       }),
+        h_personalization_vertices.end());
+      h_personalization_values.resize(h_personalization_vertices.size());
+      std::for_each(h_personalization_values.begin(),
+                    h_personalization_values.end(),
+                    [&distribution, &generator](auto& val) { val = distribution(generator); });
+    }
+
+    result_t constexpr alpha{0.85};
+    result_t constexpr epsilon{1e-6};
+
+    // 3. run SG pagerank
+
+    std::vector<vertex_t> h_sg_personalization_vertices{};
+    std::vector<result_t> h_sg_personalization_values{};
+    if (h_personalization_vertices.size() > 0) {
+      for (vertex_t i = 0; i < sg_graph_view.get_number_of_vertices(); ++i) {
+        auto it = std::lower_bound(h_personalization_vertices.begin(),
+                                   h_personalization_vertices.end(),
+                                   h_sg_renumber_map_labels[i]);
+        if (*it == h_sg_renumber_map_labels[i]) {
+          h_sg_personalization_vertices.push_back(i);
+          h_sg_personalization_values.push_back(
+            h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]);
+        }
+      }
+    }
+
+    rmm::device_uvector<vertex_t> d_sg_personalization_vertices(
+      h_sg_personalization_vertices.size(), handle.get_stream());
+    rmm::device_uvector<result_t> d_sg_personalization_values(d_sg_personalization_vertices.size(),
+                                                              handle.get_stream());
+    if (d_sg_personalization_vertices.size() > 0) {
+      raft::update_device(d_sg_personalization_vertices.data(),
+                          h_sg_personalization_vertices.data(),
+                          h_sg_personalization_vertices.size(),
+                          handle.get_stream());
+      raft::update_device(d_sg_personalization_values.data(),
+                          h_sg_personalization_values.data(),
+                          h_sg_personalization_values.size(),
+                          handle.get_stream());
+    }
+
+    rmm::device_uvector<result_t> d_sg_pageranks(sg_graph_view.get_number_of_vertices(),
+                                                 handle.get_stream());
+
+    cugraph::experimental::pagerank(handle,
+                                    sg_graph_view,
+                                    static_cast<weight_t*>(nullptr),
+                                    d_sg_personalization_vertices.data(),
+                                    d_sg_personalization_values.data(),
+                                    static_cast<vertex_t>(d_sg_personalization_vertices.size()),
+                                    d_sg_pageranks.begin(),
+                                    alpha,
+                                    epsilon,
+                                    std::numeric_limits<size_t>::max(),  // max_iterations
+                                    false,
+                                    false);
+
+    std::vector<result_t> h_sg_pageranks(sg_graph_view.get_number_of_vertices());
+    raft::update_host(
+      h_sg_pageranks.data(), d_sg_pageranks.data(), d_sg_pageranks.size(), handle.get_stream());
+    CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+
+    // 4. run MG pagerank
+
+    std::vector<vertex_t> h_mg_personalization_vertices{};
+    std::vector<result_t> h_mg_personalization_values{};
+    if (h_personalization_vertices.size() > 0) {
+      for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) {
+        auto it = std::lower_bound(h_personalization_vertices.begin(),
+                                   h_personalization_vertices.end(),
+                                   h_mg_renumber_map_labels[i]);
+        if (*it == h_mg_renumber_map_labels[i]) {
+          h_mg_personalization_vertices.push_back(mg_graph_view.get_local_vertex_first() + i);
+          h_mg_personalization_values.push_back(
+            h_personalization_values[std::distance(h_personalization_vertices.begin(), it)]);
+        }
+      }
+    }
+
+    rmm::device_uvector<vertex_t> d_mg_personalization_vertices(
+      h_mg_personalization_vertices.size(), handle.get_stream());
+    rmm::device_uvector<result_t> d_mg_personalization_values(d_mg_personalization_vertices.size(),
+                                                              handle.get_stream());
+    if (d_mg_personalization_vertices.size() > 0) {
+      raft::update_device(d_mg_personalization_vertices.data(),
+                          h_mg_personalization_vertices.data(),
+                          h_mg_personalization_vertices.size(),
+                          handle.get_stream());
+      raft::update_device(d_mg_personalization_values.data(),
+                          h_mg_personalization_values.data(),
+                          h_mg_personalization_values.size(),
+                          handle.get_stream());
+    }
+
+    rmm::device_uvector<result_t> d_mg_pageranks(mg_graph_view.get_number_of_local_vertices(),
+                                                 handle.get_stream());
+
+    CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+
+    cugraph::experimental::pagerank(handle,
+                                    mg_graph_view,
+                                    static_cast<weight_t*>(nullptr),
+                                    d_mg_personalization_vertices.data(),
+                                    d_mg_personalization_values.data(),
+                                    static_cast<vertex_t>(d_mg_personalization_vertices.size()),
+                                    d_mg_pageranks.begin(),
+                                    alpha,
+                                    epsilon,
+                                    std::numeric_limits<size_t>::max(),
+                                    false,
+                                    false);
+
+    CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+
+    std::vector<result_t> h_mg_pageranks(mg_graph_view.get_number_of_local_vertices());
+    raft::update_host(
+      h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), handle.get_stream());
+    CUDA_TRY(cudaStreamSynchronize(handle.get_stream()));
+
+    // 5. copmare SG & MG results
+
+    std::vector<result_t> h_sg_shuffled_pageranks(sg_graph_view.get_number_of_vertices(),
+                                                  result_t{0.0});
+    for (size_t i = 0; i < h_sg_pageranks.size(); ++i) {
+      h_sg_shuffled_pageranks[h_sg_renumber_map_labels[i]] = h_sg_pageranks[i];
+    }
+
+    auto threshold_ratio = 1e-3;
+    auto threshold_magnitude =
+      (1.0 / static_cast<result_t>(mg_graph_view.get_number_of_vertices())) *
+      threshold_ratio;  // skip comparison for low PageRank verties (lowly ranked vertices)
+    auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) {
+      return std::abs(lhs - rhs) <
+             std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude);
+    };
+
+    for (vertex_t i = 0; i < mg_graph_view.get_number_of_local_vertices(); ++i) {
+      auto mapped_vertex = h_mg_renumber_map_labels[i];
+      ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_shuffled_pageranks[mapped_vertex]))
+        << "MG PageRank value for vertex: " << i << " in rank: " << comm_rank
+        << " has value: " << h_mg_pageranks[i]
+        << " which exceeds the error margin for comparing to SG value: "
+        << h_sg_shuffled_pageranks[mapped_vertex];
+    }
+  }
+};
+
+TEST_P(Tests_MGPageRank, CheckInt32Int32FloatFloat)
+{
+  run_current_test<int32_t, int32_t, float, float>(GetParam());
+}
+
+INSTANTIATE_TEST_CASE_P(
+  simple_test,
+  Tests_MGPageRank,
+  ::testing::Values(
+    PageRank_Usecase("test/datasets/karate.mtx", 0.0, false),
+    PageRank_Usecase("test/datasets/karate.mtx", 0.5, false),
+    PageRank_Usecase("test/datasets/karate.mtx", 0.0, true),
+    PageRank_Usecase("test/datasets/karate.mtx", 0.5, true),
+    PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, false),
+    PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, false),
+    PageRank_Usecase("test/datasets/web-Google.mtx", 0.0, true),
+    PageRank_Usecase("test/datasets/web-Google.mtx", 0.5, true),
+    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, false),
+    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, false),
+    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.0, true),
+    PageRank_Usecase("test/datasets/ljournal-2008.mtx", 0.5, true),
+    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, false),
+    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, false),
+    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.0, true),
+    PageRank_Usecase("test/datasets/webbase-1M.mtx", 0.5, true),
+    PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                     0.0,
+                     false),
+    PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                     0.5,
+                     false),
+    PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                     0.0,
+                     true),
+    PageRank_Usecase(cugraph::test::rmat_params_t{10, 16, 0.57, 0.19, 0.19, 0, false, false},
+                     0.5,
+                     true)));
+
+CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/pagerank/pagerank_mg_test.cpp b/cpp/tests/pagerank/pagerank_mg_test.cpp
deleted file mode 100644
index 7f789226bf1..00000000000
--- a/cpp/tests/pagerank/pagerank_mg_test.cpp
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <utilities/base_fixture.hpp>
-#include <utilities/mg_test_utilities.hpp>
-#include <utilities/test_utilities.hpp>
-
-#include <algorithms.hpp>
-#include <partition_manager.hpp>
-
-#include <gtest/gtest.h>
-
-#include <random>
-
-////////////////////////////////////////////////////////////////////////////////
-// Test param object. This defines the input and expected output for a test, and
-// will be instantiated as the parameter to the tests defined below using
-// INSTANTIATE_TEST_CASE_P()
-//
-typedef struct Pagerank_Testparams_t {
-  std::string graph_file_full_path{};
-  double personalization_ratio{0.0};
-  bool test_weighted{false};
-
-  Pagerank_Testparams_t(std::string const& graph_file_path,
-                        double personalization_ratio,
-                        bool test_weighted)
-    : personalization_ratio(personalization_ratio), test_weighted(test_weighted)
-  {
-    if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) {
-      graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path;
-    } else {
-      graph_file_full_path = graph_file_path;
-    }
-  };
-} Pagerank_Testparams_t;
-
-////////////////////////////////////////////////////////////////////////////////
-// Parameterized test fixture, to be used with TEST_P().  This defines common
-// setup and teardown steps as well as common utilities used by each E2E MG
-// test.  In this case, each test is identical except for the inputs and
-// expected outputs, so the entire test is defined in the run_test() method.
-//
-class Pagerank_E2E_MG_Testfixture_t : public cugraph::test::MG_TestFixture_t,
-                                      public ::testing::WithParamInterface<Pagerank_Testparams_t> {
- public:
-  Pagerank_E2E_MG_Testfixture_t() {}
-
-  // Run once for each test instance
-  virtual void SetUp() {}
-  virtual void TearDown() {}
-
-  // Return the results of running pagerank on a single GPU for the dataset in
-  // graph_file_path.
-  template <typename vertex_t, typename edge_t, typename weight_t, typename result_t>
-  std::vector<result_t> get_sg_results(raft::handle_t& handle,
-                                       const std::string& graph_file_path,
-                                       const result_t alpha,
-                                       const result_t epsilon)
-  {
-    auto graph =
-      cugraph::test::read_graph_from_matrix_market_file<vertex_t, edge_t, weight_t, true>(
-        handle, graph_file_path, true);  // FIXME: should use param.test_weighted instead of true
-
-    auto graph_view     = graph.view();
-    cudaStream_t stream = handle.get_stream();
-    rmm::device_uvector<result_t> d_pageranks(graph_view.get_number_of_vertices(), stream);
-
-    cugraph::experimental::pagerank(
-      handle,
-      graph_view,
-      static_cast<weight_t*>(nullptr),     // adj_matrix_row_out_weight_sums
-      static_cast<vertex_t*>(nullptr),     // personalization_vertices
-      static_cast<result_t*>(nullptr),     // personalization_values
-      static_cast<vertex_t>(0),            // personalization_vector_size
-      d_pageranks.begin(),                 // pageranks
-      alpha,                               // alpha (damping factor)
-      epsilon,                             // error tolerance for convergence
-      std::numeric_limits<size_t>::max(),  // max_iterations
-      false,                               // has_initial_guess
-      true);                               // do_expensive_check
-
-    std::vector<result_t> h_pageranks(graph_view.get_number_of_vertices());
-    raft::update_host(h_pageranks.data(), d_pageranks.data(), d_pageranks.size(), stream);
-
-    return h_pageranks;
-  }
-
-  // Compare the results of running pagerank on multiple GPUs to that of a
-  // single-GPU run for the configuration in param.
-  template <typename vertex_t, typename edge_t, typename weight_t, typename result_t>
-  void run_test(const Pagerank_Testparams_t& param)
-  {
-    result_t constexpr alpha{0.85};
-    result_t constexpr epsilon{1e-6};
-
-    raft::handle_t handle;
-    raft::comms::initialize_mpi_comms(&handle, MPI_COMM_WORLD);
-    const auto& comm = handle.get_comms();
-
-    cudaStream_t stream = handle.get_stream();
-
-    // Assuming 2 GPUs which means 1 row, 2 cols. 2 cols = row_comm_size of 2.
-    // FIXME: DO NOT ASSUME 2 GPUs, add code to compute prows, pcols
-    size_t row_comm_size{2};
-    cugraph::partition_2d::subcomm_factory_t<cugraph::partition_2d::key_naming_t, vertex_t>
-      subcomm_factory(handle, row_comm_size);
-
-    int my_rank = comm.get_rank();
-
-    // FIXME: graph must be weighted!
-    std::unique_ptr<cugraph::experimental::
-                      graph_t<vertex_t, edge_t, weight_t, true, true>>  // store_transposed=true,
-                                                                        // multi_gpu=true
-      mg_graph_ptr{};
-    rmm::device_uvector<vertex_t> d_renumber_map_labels(0, handle.get_stream());
-
-    std::tie(mg_graph_ptr, d_renumber_map_labels) = cugraph::test::
-      create_graph_for_gpu<vertex_t, edge_t, weight_t, true>  // store_transposed=true
-      (handle, param.graph_file_full_path);
-
-    auto mg_graph_view = mg_graph_ptr->view();
-
-    rmm::device_uvector<result_t> d_mg_pageranks(mg_graph_view.get_number_of_vertices(), stream);
-    CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
-
-    cugraph::experimental::pagerank(
-      handle,
-      mg_graph_view,
-      static_cast<weight_t*>(nullptr),     // adj_matrix_row_out_weight_sums
-      static_cast<vertex_t*>(nullptr),     // personalization_vertices
-      static_cast<result_t*>(nullptr),     // personalization_values
-      static_cast<vertex_t>(0),            // personalization_vector_size
-      d_mg_pageranks.begin(),              // pageranks
-      alpha,                               // alpha (damping factor)
-      epsilon,                             // error tolerance for convergence
-      std::numeric_limits<size_t>::max(),  // max_iterations
-      false,                               // has_initial_guess
-      true);                               // do_expensive_check
-
-    std::vector<result_t> h_mg_pageranks(mg_graph_view.get_number_of_vertices());
-
-    raft::update_host(h_mg_pageranks.data(), d_mg_pageranks.data(), d_mg_pageranks.size(), stream);
-
-    std::vector<vertex_t> h_renumber_map_labels(mg_graph_view.get_number_of_vertices());
-    raft::update_host(h_renumber_map_labels.data(),
-                      d_renumber_map_labels.data(),
-                      d_renumber_map_labels.size(),
-                      stream);
-
-    // Compare MG to SG
-    // Each GPU will have pagerank values for their range, so ech GPU must
-    // compare to specific SG results for their respective range.
-
-    auto h_sg_pageranks = get_sg_results<vertex_t, edge_t, weight_t, result_t>(
-      handle, param.graph_file_full_path, alpha, epsilon);
-
-    // For this test, each GPU will have the full set of vertices and
-    // therefore the pageranks vectors should be equal in size.
-    ASSERT_EQ(h_sg_pageranks.size(), h_mg_pageranks.size());
-
-    auto threshold_ratio = 1e-3;
-    auto threshold_magnitude =
-      (1.0 / static_cast<result_t>(mg_graph_view.get_number_of_vertices())) *
-      threshold_ratio;  // skip comparison for low PageRank verties (lowly ranked vertices)
-    auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) {
-      return std::abs(lhs - rhs) <
-             std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude);
-    };
-
-    vertex_t mapped_vertex{0};
-    for (vertex_t i = 0;
-         i + mg_graph_view.get_local_vertex_first() < mg_graph_view.get_local_vertex_last();
-         ++i) {
-      mapped_vertex = h_renumber_map_labels[i];
-      ASSERT_TRUE(nearly_equal(h_mg_pageranks[i], h_sg_pageranks[mapped_vertex]))
-        << "MG PageRank value for vertex: " << i << " in rank: " << my_rank
-        << " has value: " << h_mg_pageranks[i]
-        << " which exceeds the error margin for comparing to SG value: " << h_sg_pageranks[i];
-    }
-  }
-};
-
-////////////////////////////////////////////////////////////////////////////////
-TEST_P(Pagerank_E2E_MG_Testfixture_t, CheckInt32Int32FloatFloat)
-{
-  run_test<int32_t, int32_t, float, float>(GetParam());
-}
-
-INSTANTIATE_TEST_CASE_P(
-  e2e,
-  Pagerank_E2E_MG_Testfixture_t,
-
-  // FIXME: the personalization_ratio and use_weighted boo are not used
-  // (personilization vectors are not used, and all datasets are assumed
-  // weighted). update this to use personilization vectors and non-weighted
-  // graphs.
-  ::testing::Values(Pagerank_Testparams_t("test/datasets/karate.mtx", 0.0, true),
-                    // FIXME: The commented datasets contain isolate vertices
-                    // which result in a different number of vertices in the
-                    // renumbered MG graph (because the renumbering function
-                    // does not include them) vs. the SG graph object used for
-                    // the pagerank comparison because the SG graph reads the
-                    // COO as-is without renumbering.  Update the utility that
-                    // reads a .mtx and constructs a SG graph object to also
-                    // renumber and return the renumber vertices vector. This
-                    // will result in a comparison of an equal number of
-                    // pagerank values.
-                    //
-                    // Pagerank_Testparams_t("test/datasets/web-Google.mtx", 0.0, true),
-                    // Pagerank_Testparams_t("test/datasets/ljournal-2008.mtx", 0.0, true),
-                    Pagerank_Testparams_t("test/datasets/webbase-1M.mtx", 0.0, true)));
-
-// FIXME: Enable proper RMM configuration by using CUGRAPH_TEST_PROGRAM_MAIN().
-//        Currently seeing a RMM failure during init, need to investigate.
-// CUGRAPH_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/utilities/base_fixture.hpp b/cpp/tests/utilities/base_fixture.hpp
index 3525db73425..e8f11acfbf4 100644
--- a/cpp/tests/utilities/base_fixture.hpp
+++ b/cpp/tests/utilities/base_fixture.hpp
@@ -32,18 +32,17 @@
 namespace cugraph {
 namespace test {
 
-// FIXME: The BaseFixture class is not used in any tests. This file is only
-// needed for the CUGRAPH_TEST_PROGRAM_MAIN macro and the code that it calls, so
-// consider removing the BaseFixture class and renaming this file, or moving
-// CUGRAPH_TEST_PROGRAM_MAIN to the test_utilities.hpp file and removing this
-// file completely.
+// FIXME: The BaseFixture class is not used in any tests. This file is only needed for the
+// CUGRAPH_TEST_PROGRAM_MAIN macro and the code that it calls, so consider removing the BaseFixture
+// class and renaming this file, or moving CUGRAPH_TEST_PROGRAM_MAIN to the test_utilities.hpp file
+// and removing this file completely.
 
 /**
- * @brief Base test fixture class from which all libcudf tests should inherit.
+ * @brief Base test fixture class from which all libcugraph tests should inherit.
  *
  * Example:
  * ```
- * class MyTestFixture : public cudf::test::BaseFixture {};
+ * class MyTestFixture : public cugraph::test::BaseFixture {};
  * ```
  **/
 class BaseFixture : public ::testing::Test {
@@ -51,8 +50,8 @@ class BaseFixture : public ::testing::Test {
 
  public:
   /**
-   * @brief Returns pointer to `device_memory_resource` that should be used for
-   * all tests inheriting from this fixture
+   * @brief Returns pointer to `device_memory_resource` that should be used for all tests inheriting
+   *from this fixture
    **/
   rmm::mr::device_memory_resource *mr() { return _mr; }
 };
@@ -77,15 +76,14 @@ inline auto make_binning()
 }
 
 /**
- * @brief Creates a memory resource for the unit test environment
- * given the name of the allocation mode.
+ * @brief Creates a memory resource for the unit test environment given the name of the allocation
+ * mode.
  *
- * The returned resource instance must be kept alive for the duration of
- * the tests. Attaching the resource to a TestEnvironment causes
- * issues since the environment objects are not destroyed until
+ * The returned resource instance must be kept alive for the duration of the tests. Attaching the
+ * resource to a TestEnvironment causes issues since the environment objects are not destroyed until
  * after the runtime is shutdown.
  *
- * @throw cudf::logic_error if the `allocation_mode` is unsupported.
+ * @throw cugraph::logic_error if the `allocation_mode` is unsupported.
  *
  * @param allocation_mode String identifies which resource type.
  *        Accepted types are "pool", "cuda", and "managed" only.
@@ -105,17 +103,17 @@ inline std::shared_ptr<rmm::mr::device_memory_resource> create_memory_resource(
 }  // namespace cugraph
 
 /**
- * @brief Parses the cuDF test command line options.
+ * @brief Parses the cuGraph test command line options.
  *
- * Currently only supports 'rmm_mode' string paramater, which set the rmm
- * allocation mode. The default value of the parameter is 'pool'.
+ * Currently only supports 'rmm_mode' string paramater, which set the rmm allocation mode. The
+ * default value of the parameter is 'pool'.
  *
  * @return Parsing results in the form of cxxopts::ParseResult
  */
 inline auto parse_test_options(int argc, char **argv)
 {
   try {
-    cxxopts::Options options(argv[0], " - cuDF tests command line options");
+    cxxopts::Options options(argv[0], " - cuGraph tests command line options");
     options.allow_unrecognised_options().add_options()(
       "rmm_mode", "RMM allocation mode", cxxopts::value<std::string>()->default_value("pool"));
 
@@ -128,13 +126,11 @@ inline auto parse_test_options(int argc, char **argv)
 /**
  * @brief Macro that defines main function for gtest programs that use rmm
  *
- * Should be included in every test program that uses rmm allocators since
- * it maintains the lifespan of the rmm default memory resource.
- * This `main` function is a wrapper around the google test generated `main`,
- * maintaining the original functionality. In addition, this custom `main`
- * function parses the command line to customize test behavior, like the
- * allocation mode used for creating the default memory resource.
- *
+ * Should be included in every test program that uses rmm allocators since it maintains the lifespan
+ * of the rmm default memory resource. This `main` function is a wrapper around the google test
+ * generated `main`, maintaining the original functionality. In addition, this custom `main`
+ * function parses the command line to customize test behavior, like the allocation mode used for
+ * creating the default memory resource.
  */
 #define CUGRAPH_TEST_PROGRAM_MAIN()                                        \
   int main(int argc, char **argv)                                          \
@@ -146,3 +142,26 @@ inline auto parse_test_options(int argc, char **argv)
     rmm::mr::set_current_device_resource(resource.get());                  \
     return RUN_ALL_TESTS();                                                \
   }
+
+#define CUGRAPH_MG_TEST_PROGRAM_MAIN()                                                \
+  int main(int argc, char **argv)                                                     \
+  {                                                                                   \
+    MPI_TRY(MPI_Init(&argc, &argv));                                                  \
+    int comm_rank{};                                                                  \
+    int comm_size{};                                                                  \
+    MPI_TRY(MPI_Comm_rank(MPI_COMM_WORLD, &comm_rank));                               \
+    MPI_TRY(MPI_Comm_size(MPI_COMM_WORLD, &comm_size));                               \
+    int num_gpus{};                                                                   \
+    CUDA_TRY(cudaGetDeviceCount(&num_gpus));                                          \
+    CUGRAPH_EXPECTS(                                                                  \
+      comm_size <= num_gpus, "# MPI ranks (%d) > # GPUs (%d).", comm_size, num_gpus); \
+    CUDA_TRY(cudaSetDevice(comm_rank));                                               \
+    ::testing::InitGoogleTest(&argc, argv);                                           \
+    auto const cmd_opts = parse_test_options(argc, argv);                             \
+    auto const rmm_mode = cmd_opts["rmm_mode"].as<std::string>();                     \
+    auto resource       = cugraph::test::create_memory_resource(rmm_mode);            \
+    rmm::mr::set_current_device_resource(resource.get());                             \
+    auto ret = RUN_ALL_TESTS();                                                       \
+    MPI_TRY(MPI_Finalize());                                                          \
+    return ret;                                                                       \
+  }
diff --git a/cpp/tests/utilities/generate_graph_from_edgelist.cu b/cpp/tests/utilities/generate_graph_from_edgelist.cu
new file mode 100644
index 00000000000..1b9fe6051f7
--- /dev/null
+++ b/cpp/tests/utilities/generate_graph_from_edgelist.cu
@@ -0,0 +1,526 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <utilities/test_utilities.hpp>
+
+#include <experimental/detail/graph_utils.cuh>
+#include <experimental/graph_functions.hpp>
+#include <utilities/error.hpp>
+
+#include <rmm/thrust_rmm_allocator.h>
+
+#include <thrust/remove.h>
+
+#include <cstdint>
+
+namespace cugraph {
+namespace test {
+
+namespace detail {
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::enable_if_t<
+  multi_gpu,
+  std::tuple<
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>,
+    rmm::device_uvector<vertex_t>>>
+generate_graph_from_edgelist(raft::handle_t const& handle,
+                             rmm::device_uvector<vertex_t>&& vertices,
+                             rmm::device_uvector<vertex_t>&& edgelist_rows,
+                             rmm::device_uvector<vertex_t>&& edgelist_cols,
+                             rmm::device_uvector<weight_t>&& edgelist_weights,
+                             bool is_symmetric,
+                             bool test_weighted,
+                             bool renumber)
+{
+  CUGRAPH_EXPECTS(renumber, "renumber should be true if multi_gpu is true.");
+
+  auto& comm               = handle.get_comms();
+  auto const comm_size     = comm.get_size();
+  auto const comm_rank     = comm.get_rank();
+  auto& row_comm           = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name());
+  auto const row_comm_size = row_comm.get_size();
+  auto& col_comm           = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name());
+  auto const col_comm_size = col_comm.get_size();
+
+  vertex_t number_of_vertices = static_cast<vertex_t>(vertices.size());
+
+  auto vertex_key_func =
+    cugraph::experimental::detail::compute_gpu_id_from_vertex_t<vertex_t>{comm_size};
+  vertices.resize(thrust::distance(vertices.begin(),
+                                   thrust::remove_if(
+                                     rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                                     vertices.begin(),
+                                     vertices.end(),
+                                     [comm_rank, key_func = vertex_key_func] __device__(auto val) {
+                                       return key_func(val) != comm_rank;
+                                     })),
+                  handle.get_stream());
+  vertices.shrink_to_fit(handle.get_stream());
+
+  auto edge_key_func = cugraph::experimental::detail::compute_gpu_id_from_edge_t<vertex_t>{
+    false, comm_size, row_comm_size, col_comm_size};
+  size_t number_of_local_edges{};
+  if (test_weighted) {
+    auto edge_first = thrust::make_zip_iterator(
+      thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin(), edgelist_weights.begin()));
+    number_of_local_edges = thrust::distance(
+      edge_first,
+      thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                        edge_first,
+                        edge_first + edgelist_rows.size(),
+                        [comm_rank, key_func = edge_key_func] __device__(auto e) {
+                          auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e);
+                          auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e);
+                          return key_func(major, minor) != comm_rank;
+                        }));
+  } else {
+    auto edge_first =
+      thrust::make_zip_iterator(thrust::make_tuple(edgelist_rows.begin(), edgelist_cols.begin()));
+    number_of_local_edges = thrust::distance(
+      edge_first,
+      thrust::remove_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                        edge_first,
+                        edge_first + edgelist_rows.size(),
+                        [comm_rank, key_func = edge_key_func] __device__(auto e) {
+                          auto major = store_transposed ? thrust::get<1>(e) : thrust::get<0>(e);
+                          auto minor = store_transposed ? thrust::get<0>(e) : thrust::get<1>(e);
+                          return key_func(major, minor) != comm_rank;
+                        }));
+  }
+
+  edgelist_rows.resize(number_of_local_edges, handle.get_stream());
+  edgelist_rows.shrink_to_fit(handle.get_stream());
+  edgelist_cols.resize(number_of_local_edges, handle.get_stream());
+  edgelist_cols.shrink_to_fit(handle.get_stream());
+  if (test_weighted) {
+    edgelist_weights.resize(number_of_local_edges, handle.get_stream());
+    edgelist_weights.shrink_to_fit(handle.get_stream());
+  }
+
+  // 3. renumber
+
+  rmm::device_uvector<vertex_t> renumber_map_labels(0, handle.get_stream());
+  cugraph::experimental::partition_t<vertex_t> partition{};
+  vertex_t aggregate_number_of_vertices{};
+  edge_t number_of_edges{};
+  // FIXME: set do_expensive_check to false once validated
+  std::tie(renumber_map_labels, partition, aggregate_number_of_vertices, number_of_edges) =
+    cugraph::experimental::renumber_edgelist<vertex_t, edge_t, multi_gpu>(
+      handle,
+      vertices.data(),
+      static_cast<vertex_t>(vertices.size()),
+      store_transposed ? edgelist_cols.data() : edgelist_rows.data(),
+      store_transposed ? edgelist_rows.data() : edgelist_cols.data(),
+      edgelist_rows.size(),
+      false,
+      true);
+  assert(aggregate_number_of_vertices == number_of_vertices);
+
+  // 4. create a graph
+
+  return std::make_tuple(
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>(
+      handle,
+      std::vector<cugraph::experimental::edgelist_t<vertex_t, edge_t, weight_t>>{
+        cugraph::experimental::edgelist_t<vertex_t, edge_t, weight_t>{
+          edgelist_rows.data(),
+          edgelist_cols.data(),
+          test_weighted ? edgelist_weights.data() : nullptr,
+          static_cast<edge_t>(edgelist_rows.size())}},
+      partition,
+      number_of_vertices,
+      number_of_edges,
+      cugraph::experimental::graph_properties_t{is_symmetric, false},
+      true,
+      true),
+    std::move(renumber_map_labels));
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::enable_if_t<
+  !multi_gpu,
+  std::tuple<
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>,
+    rmm::device_uvector<vertex_t>>>
+generate_graph_from_edgelist(raft::handle_t const& handle,
+                             rmm::device_uvector<vertex_t>&& vertices,
+                             rmm::device_uvector<vertex_t>&& edgelist_rows,
+                             rmm::device_uvector<vertex_t>&& edgelist_cols,
+                             rmm::device_uvector<weight_t>&& edgelist_weights,
+                             bool is_symmetric,
+                             bool test_weighted,
+                             bool renumber)
+{
+  vertex_t number_of_vertices = static_cast<vertex_t>(vertices.size());
+
+  // FIXME: set do_expensive_check to false once validated
+  auto renumber_map_labels =
+    renumber ? cugraph::experimental::renumber_edgelist<vertex_t, edge_t, multi_gpu>(
+                 handle,
+                 vertices.data(),
+                 static_cast<vertex_t>(vertices.size()),
+                 store_transposed ? edgelist_cols.data() : edgelist_rows.data(),
+                 store_transposed ? edgelist_rows.data() : edgelist_cols.data(),
+                 static_cast<edge_t>(edgelist_rows.size()),
+                 true)
+             : rmm::device_uvector<vertex_t>(0, handle.get_stream());
+
+  // FIXME: set do_expensive_check to false once validated
+  return std::make_tuple(
+    cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>(
+      handle,
+      cugraph::experimental::edgelist_t<vertex_t, edge_t, weight_t>{
+        edgelist_rows.data(),
+        edgelist_cols.data(),
+        test_weighted ? edgelist_weights.data() : nullptr,
+        static_cast<edge_t>(edgelist_rows.size())},
+      number_of_vertices,
+      cugraph::experimental::graph_properties_t{is_symmetric, false},
+      renumber ? true : false,
+      true),
+    std::move(renumber_map_labels));
+}
+
+}  // namespace detail
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>,
+           rmm::device_uvector<vertex_t>>
+generate_graph_from_edgelist(raft::handle_t const& handle,
+                             rmm::device_uvector<vertex_t>&& vertices,
+                             rmm::device_uvector<vertex_t>&& edgelist_rows,
+                             rmm::device_uvector<vertex_t>&& edgelist_cols,
+                             rmm::device_uvector<weight_t>&& edgelist_weights,
+                             bool is_symmetric,
+                             bool test_weighted,
+                             bool renumber)
+{
+  return detail::
+    generate_graph_from_edgelist<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>(
+      handle,
+      std::move(vertices),
+      std::move(edgelist_rows),
+      std::move(edgelist_cols),
+      std::move(edgelist_weights),
+      is_symmetric,
+      test_weighted,
+      renumber);
+}
+
+// explicit instantiations
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, false, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int32_t, float, false, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, false, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int32_t, float, false, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, true, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int32_t, float, true, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, true, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int32_t, float, true, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, false, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int32_t, double, false, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, false, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int32_t, double, false, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, true, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int32_t, double, true, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, true, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int32_t, double, true, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, false, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int64_t, float, false, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, false, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int64_t, float, false, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, true, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int64_t, float, true, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, true, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int64_t, float, true, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, false, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int64_t, double, false, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, false, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int64_t, double, false, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, true, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int64_t, double, true, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, true, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_edgelist<int32_t, int64_t, double, true, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& vertices,
+  rmm::device_uvector<int32_t>&& edgelist_rows,
+  rmm::device_uvector<int32_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, false, false>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_edgelist<int64_t, int64_t, float, false, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& vertices,
+  rmm::device_uvector<int64_t>&& edgelist_rows,
+  rmm::device_uvector<int64_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, false, true>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_edgelist<int64_t, int64_t, float, false, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& vertices,
+  rmm::device_uvector<int64_t>&& edgelist_rows,
+  rmm::device_uvector<int64_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, true, false>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_edgelist<int64_t, int64_t, float, true, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& vertices,
+  rmm::device_uvector<int64_t>&& edgelist_rows,
+  rmm::device_uvector<int64_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, true, true>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_edgelist<int64_t, int64_t, float, true, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& vertices,
+  rmm::device_uvector<int64_t>&& edgelist_rows,
+  rmm::device_uvector<int64_t>&& edgelist_cols,
+  rmm::device_uvector<float>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, false, false>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_edgelist<int64_t, int64_t, double, false, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& vertices,
+  rmm::device_uvector<int64_t>&& edgelist_rows,
+  rmm::device_uvector<int64_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, false, true>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_edgelist<int64_t, int64_t, double, false, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& vertices,
+  rmm::device_uvector<int64_t>&& edgelist_rows,
+  rmm::device_uvector<int64_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, true, false>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_edgelist<int64_t, int64_t, double, true, false>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& vertices,
+  rmm::device_uvector<int64_t>&& edgelist_rows,
+  rmm::device_uvector<int64_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, true, true>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_edgelist<int64_t, int64_t, double, true, true>(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& vertices,
+  rmm::device_uvector<int64_t>&& edgelist_rows,
+  rmm::device_uvector<int64_t>&& edgelist_cols,
+  rmm::device_uvector<double>&& edgelist_weights,
+  bool is_symmetric,
+  bool test_weighted,
+  bool renumber);
+
+}  // namespace test
+}  // namespace cugraph
diff --git a/cpp/tests/utilities/matrix_market_file_utilities.cu b/cpp/tests/utilities/matrix_market_file_utilities.cu
new file mode 100644
index 00000000000..ddbbac603ee
--- /dev/null
+++ b/cpp/tests/utilities/matrix_market_file_utilities.cu
@@ -0,0 +1,585 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <utilities/test_utilities.hpp>
+
+#include <functions.hpp>
+#include <utilities/error.hpp>
+
+#include <raft/cudart_utils.h>
+#include <rmm/thrust_rmm_allocator.h>
+
+#include <thrust/sequence.h>
+
+#include <cstdint>
+
+namespace cugraph {
+namespace test {
+
+/// Read matrix properties from Matrix Market file
+/** Matrix Market file is assumed to be a sparse matrix in coordinate
+ *  format.
+ *
+ *  @param f File stream for Matrix Market file.
+ *  @param tg Boolean indicating whether to convert matrix to general
+ *  format (from symmetric, Hermitian, or skew symmetric format).
+ *  @param t (Output) MM_typecode with matrix properties.
+ *  @param m (Output) Number of matrix rows.
+ *  @param n (Output) Number of matrix columns.
+ *  @param nnz (Output) Number of non-zero matrix entries.
+ *  @return Zero if properties were read successfully. Otherwise
+ *  non-zero.
+ */
+template <typename IndexType_>
+int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz)
+{
+  // Read matrix properties from file
+  int mint, nint, nnzint;
+  if (fseek(f, 0, SEEK_SET)) {
+    fprintf(stderr, "Error: could not set position in file\n");
+    return -1;
+  }
+  if (mm_read_banner(f, t)) {
+    fprintf(stderr, "Error: could not read Matrix Market file banner\n");
+    return -1;
+  }
+  if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) {
+    fprintf(stderr, "Error: file does not contain matrix in coordinate format\n");
+    return -1;
+  }
+  if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) {
+    fprintf(stderr, "Error: could not read matrix dimensions\n");
+    return -1;
+  }
+  if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) {
+    fprintf(stderr, "Error: matrix entries are not valid type\n");
+    return -1;
+  }
+  *m   = mint;
+  *n   = nint;
+  *nnz = nnzint;
+
+  // Find total number of non-zero entries
+  if (tg && !mm_is_general(*t)) {
+    // Non-diagonal entries should be counted twice
+    *nnz *= 2;
+
+    // Diagonal entries should not be double-counted
+    int st;
+    for (int i = 0; i < nnzint; ++i) {
+      // Read matrix entry
+      // MTX only supports int for row and col idx
+      int row, col;
+      double rval, ival;
+      if (mm_is_pattern(*t))
+        st = fscanf(f, "%d %d\n", &row, &col);
+      else if (mm_is_real(*t) || mm_is_integer(*t))
+        st = fscanf(f, "%d %d %lg\n", &row, &col, &rval);
+      else  // Complex matrix
+        st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival);
+      if (ferror(f) || (st == EOF)) {
+        fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1);
+        return -1;
+      }
+
+      // Check if entry is diagonal
+      if (row == col) --(*nnz);
+    }
+  }
+
+  return 0;
+}
+
+/// Read Matrix Market file and convert to COO format matrix
+/** Matrix Market file is assumed to be a sparse matrix in coordinate
+ *  format.
+ *
+ *  @param f File stream for Matrix Market file.
+ *  @param tg Boolean indicating whether to convert matrix to general
+ *  format (from symmetric, Hermitian, or skew symmetric format).
+ *  @param nnz Number of non-zero matrix entries.
+ *  @param cooRowInd (Output) Row indices for COO matrix. Should have
+ *  at least nnz entries.
+ *  @param cooColInd (Output) Column indices for COO matrix. Should
+ *  have at least nnz entries.
+ *  @param cooRVal (Output) Real component of COO matrix
+ *  entries. Should have at least nnz entries. Ignored if null
+ *  pointer.
+ *  @param cooIVal (Output) Imaginary component of COO matrix
+ *  entries. Should have at least nnz entries. Ignored if null
+ *  pointer.
+ *  @return Zero if matrix was read successfully. Otherwise non-zero.
+ */
+template <typename IndexType_, typename ValueType_>
+int mm_to_coo(FILE* f,
+              int tg,
+              IndexType_ nnz,
+              IndexType_* cooRowInd,
+              IndexType_* cooColInd,
+              ValueType_* cooRVal,
+              ValueType_* cooIVal)
+{
+  // Read matrix properties from file
+  MM_typecode t;
+  int m, n, nnzOld;
+  if (fseek(f, 0, SEEK_SET)) {
+    fprintf(stderr, "Error: could not set position in file\n");
+    return -1;
+  }
+  if (mm_read_banner(f, &t)) {
+    fprintf(stderr, "Error: could not read Matrix Market file banner\n");
+    return -1;
+  }
+  if (!mm_is_matrix(t) || !mm_is_coordinate(t)) {
+    fprintf(stderr, "Error: file does not contain matrix in coordinate format\n");
+    return -1;
+  }
+  if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) {
+    fprintf(stderr, "Error: could not read matrix dimensions\n");
+    return -1;
+  }
+  if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) {
+    fprintf(stderr, "Error: matrix entries are not valid type\n");
+    return -1;
+  }
+
+  // Add each matrix entry in file to COO format matrix
+  int i;      // Entry index in Matrix Market file; can only be int in the MTX format
+  int j = 0;  // Entry index in COO format matrix; can only be int in the MTX format
+  for (i = 0; i < nnzOld; ++i) {
+    // Read entry from file
+    int row, col;
+    double rval, ival;
+    int st;
+    if (mm_is_pattern(t)) {
+      st   = fscanf(f, "%d %d\n", &row, &col);
+      rval = 1.0;
+      ival = 0.0;
+    } else if (mm_is_real(t) || mm_is_integer(t)) {
+      st   = fscanf(f, "%d %d %lg\n", &row, &col, &rval);
+      ival = 0.0;
+    } else  // Complex matrix
+      st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival);
+    if (ferror(f) || (st == EOF)) {
+      fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1);
+      return -1;
+    }
+
+    // Switch to 0-based indexing
+    --row;
+    --col;
+
+    // Record entry
+    cooRowInd[j] = row;
+    cooColInd[j] = col;
+    if (cooRVal != NULL) cooRVal[j] = rval;
+    if (cooIVal != NULL) cooIVal[j] = ival;
+    ++j;
+
+    // Add symmetric complement of non-diagonal entries
+    if (tg && !mm_is_general(t) && (row != col)) {
+      // Modify entry value if matrix is skew symmetric or Hermitian
+      if (mm_is_skew(t)) {
+        rval = -rval;
+        ival = -ival;
+      } else if (mm_is_hermitian(t)) {
+        ival = -ival;
+      }
+
+      // Record entry
+      cooRowInd[j] = col;
+      cooColInd[j] = row;
+      if (cooRVal != NULL) cooRVal[j] = rval;
+      if (cooIVal != NULL) cooIVal[j] = ival;
+      ++j;
+    }
+  }
+  return 0;
+}
+
+// FIXME: A similar function could be useful for CSC format
+//        There are functions above that operate coo -> csr and coo->csc
+/**
+ * @tparam
+ */
+template <typename vertex_t, typename edge_t, typename weight_t>
+std::unique_ptr<cugraph::GraphCSR<vertex_t, edge_t, weight_t>> generate_graph_csr_from_mm(
+  bool& directed, std::string mm_file)
+{
+  vertex_t number_of_vertices;
+  edge_t number_of_edges;
+
+  FILE* fpin = fopen(mm_file.c_str(), "r");
+  CUGRAPH_EXPECTS(fpin != nullptr, "fopen (%s) failure.", mm_file.c_str());
+
+  vertex_t number_of_columns = 0;
+  MM_typecode mm_typecode{0};
+  CUGRAPH_EXPECTS(
+    mm_properties<vertex_t>(
+      fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges) == 0,
+    "mm_properties query failure.");
+  CUGRAPH_EXPECTS(mm_is_matrix(mm_typecode), "Invalid input file.");
+  CUGRAPH_EXPECTS(mm_is_coordinate(mm_typecode), "Invalid input file.");
+  CUGRAPH_EXPECTS(!mm_is_complex(mm_typecode), "Invalid input file.");
+  CUGRAPH_EXPECTS(!mm_is_skew(mm_typecode), "Invalid input file.");
+
+  directed = !mm_is_symmetric(mm_typecode);
+
+  // Allocate memory on host
+  std::vector<vertex_t> coo_row_ind(number_of_edges);
+  std::vector<vertex_t> coo_col_ind(number_of_edges);
+  std::vector<weight_t> coo_val(number_of_edges);
+
+  // Read
+  CUGRAPH_EXPECTS(
+    (mm_to_coo<vertex_t, weight_t>(
+      fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)) == 0,
+    "file read failure.");
+  CUGRAPH_EXPECTS(fclose(fpin) == 0, "fclose failure.");
+
+  cugraph::GraphCOOView<vertex_t, edge_t, weight_t> cooview(
+    &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges);
+
+  return cugraph::coo_to_csr(cooview);
+}
+
+template <typename vertex_t, typename weight_t>
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<weight_t>,
+           vertex_t,
+           bool>
+read_edgelist_from_matrix_market_file(raft::handle_t const& handle,
+                                      std::string const& graph_file_full_path,
+                                      bool test_weighted)
+{
+  MM_typecode mc{};
+  vertex_t m{};
+  size_t nnz{};
+
+  FILE* file = fopen(graph_file_full_path.c_str(), "r");
+  CUGRAPH_EXPECTS(file != nullptr, "fopen failure.");
+
+  size_t tmp_m{};
+  size_t tmp_k{};
+  auto mm_ret = cugraph::test::mm_properties<size_t>(file, 1, &mc, &tmp_m, &tmp_k, &nnz);
+  CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties.");
+  m = static_cast<vertex_t>(tmp_m);
+  CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc),
+                  "invalid Matrix Market file properties.");
+
+  vertex_t number_of_vertices = m;
+  bool is_symmetric           = mm_is_symmetric(mc);
+
+  std::vector<vertex_t> h_rows(nnz);
+  std::vector<vertex_t> h_cols(nnz);
+  std::vector<weight_t> h_weights(nnz);
+
+  mm_ret = cugraph::test::mm_to_coo<vertex_t, weight_t>(
+    file, 1, nnz, h_rows.data(), h_cols.data(), h_weights.data(), static_cast<weight_t*>(nullptr));
+  CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data");
+
+  auto file_ret = fclose(file);
+  CUGRAPH_EXPECTS(file_ret == 0, "fclose failure.");
+
+  rmm::device_uvector<vertex_t> d_edgelist_rows(h_rows.size(), handle.get_stream());
+  rmm::device_uvector<vertex_t> d_edgelist_cols(h_cols.size(), handle.get_stream());
+  rmm::device_uvector<weight_t> d_edgelist_weights(test_weighted ? h_weights.size() : size_t{0},
+                                                   handle.get_stream());
+
+  raft::update_device(d_edgelist_rows.data(), h_rows.data(), h_rows.size(), handle.get_stream());
+  raft::update_device(d_edgelist_cols.data(), h_cols.data(), h_cols.size(), handle.get_stream());
+  if (test_weighted) {
+    raft::update_device(
+      d_edgelist_weights.data(), h_weights.data(), h_weights.size(), handle.get_stream());
+  }
+
+  return std::make_tuple(std::move(d_edgelist_rows),
+                         std::move(d_edgelist_cols),
+                         std::move(d_edgelist_weights),
+                         number_of_vertices,
+                         is_symmetric);
+}
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>,
+           rmm::device_uvector<vertex_t>>
+read_graph_from_matrix_market_file(raft::handle_t const& handle,
+                                   std::string const& graph_file_full_path,
+                                   bool test_weighted,
+                                   bool renumber)
+{
+  rmm::device_uvector<vertex_t> d_edgelist_rows(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> d_edgelist_cols(0, handle.get_stream());
+  rmm::device_uvector<weight_t> d_edgelist_weights(0, handle.get_stream());
+  vertex_t number_of_vertices{};
+  bool is_symmetric{};
+  std::tie(d_edgelist_rows, d_edgelist_cols, d_edgelist_weights, number_of_vertices, is_symmetric) =
+    read_edgelist_from_matrix_market_file<vertex_t, weight_t>(
+      handle, graph_file_full_path, test_weighted);
+
+  rmm::device_uvector<vertex_t> d_vertices(number_of_vertices, handle.get_stream());
+  thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                   d_vertices.begin(),
+                   d_vertices.end(),
+                   vertex_t{0});
+
+  return generate_graph_from_edgelist<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>(
+    handle,
+    std::move(d_vertices),
+    std::move(d_edgelist_rows),
+    std::move(d_edgelist_cols),
+    std::move(d_edgelist_weights),
+    is_symmetric,
+    test_weighted,
+    renumber);
+}
+
+// explicit instantiations
+
+template int32_t mm_to_coo(FILE* f,
+                           int32_t tg,
+                           int32_t nnz,
+                           int32_t* cooRowInd,
+                           int32_t* cooColInd,
+                           int32_t* cooRVal,
+                           int32_t* cooIVal);
+
+template int32_t mm_to_coo(FILE* f,
+                           int32_t tg,
+                           int32_t nnz,
+                           int32_t* cooRowInd,
+                           int32_t* cooColInd,
+                           double* cooRVal,
+                           double* cooIVal);
+
+template int32_t mm_to_coo(FILE* f,
+                           int32_t tg,
+                           int32_t nnz,
+                           int32_t* cooRowInd,
+                           int32_t* cooColInd,
+                           float* cooRVal,
+                           float* cooIVal);
+
+template std::unique_ptr<cugraph::GraphCSR<int32_t, int32_t, float>> generate_graph_csr_from_mm(
+  bool& directed, std::string mm_file);
+
+template std::unique_ptr<cugraph::GraphCSR<uint32_t, uint32_t, float>> generate_graph_csr_from_mm(
+  bool& directed, std::string mm_file);
+
+template std::unique_ptr<cugraph::GraphCSR<int32_t, int32_t, double>> generate_graph_csr_from_mm(
+  bool& directed, std::string mm_file);
+
+template std::unique_ptr<cugraph::GraphCSR<int64_t, int64_t, float>> generate_graph_csr_from_mm(
+  bool& directed, std::string mm_file);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, false, false>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int32_t, float, false, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, false, true>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int32_t, float, false, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, true, false>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int32_t, float, true, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, true, true>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int32_t, float, true, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, false, false>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int32_t, double, false, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, false, true>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int32_t, double, false, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, true, false>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int32_t, double, true, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, true, true>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int32_t, double, true, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, false, false>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int64_t, float, false, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, false, true>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int64_t, float, false, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, true, false>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int64_t, float, true, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, true, true>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int64_t, float, true, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, false, false>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int64_t, double, false, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, false, true>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int64_t, double, false, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, true, false>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int64_t, double, true, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, true, true>,
+                    rmm::device_uvector<int32_t>>
+read_graph_from_matrix_market_file<int32_t, int64_t, double, true, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, false, false>,
+                    rmm::device_uvector<int64_t>>
+read_graph_from_matrix_market_file<int64_t, int64_t, float, false, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, false, true>,
+                    rmm::device_uvector<int64_t>>
+read_graph_from_matrix_market_file<int64_t, int64_t, float, false, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, true, false>,
+                    rmm::device_uvector<int64_t>>
+read_graph_from_matrix_market_file<int64_t, int64_t, float, true, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, true, true>,
+                    rmm::device_uvector<int64_t>>
+read_graph_from_matrix_market_file<int64_t, int64_t, float, true, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, false, false>,
+                    rmm::device_uvector<int64_t>>
+read_graph_from_matrix_market_file<int64_t, int64_t, double, false, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, false, true>,
+                    rmm::device_uvector<int64_t>>
+read_graph_from_matrix_market_file<int64_t, int64_t, double, false, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, true, false>,
+                    rmm::device_uvector<int64_t>>
+read_graph_from_matrix_market_file<int64_t, int64_t, double, true, false>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, true, true>,
+                    rmm::device_uvector<int64_t>>
+read_graph_from_matrix_market_file<int64_t, int64_t, double, true, true>(
+  raft::handle_t const& handle,
+  std::string const& graph_file_full_path,
+  bool test_weighted,
+  bool renumber);
+
+}  // namespace test
+}  // namespace cugraph
diff --git a/cpp/tests/utilities/mg_test_utilities.cu b/cpp/tests/utilities/mg_test_utilities.cu
deleted file mode 100644
index 26f2450b589..00000000000
--- a/cpp/tests/utilities/mg_test_utilities.cu
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <utilities/mg_test_utilities.hpp>
-#include <utilities/test_utilities.hpp>
-
-#include <experimental/detail/graph_utils.cuh>
-#include <experimental/graph.hpp>
-#include <experimental/graph_functions.hpp>
-#include <utilities/shuffle_comm.cuh>
-
-namespace cugraph {
-namespace test {
-
-// Given a raft handle and a path to a dataset (must be a .mtx file), returns a
-// tuple containing:
-//  * graph_t instance for the partition accesible from the raft handle
-//  * vector of indices representing the original unrenumberd vertices
-//
-// This function creates a graph_t instance appropriate for MG graph
-// applications from the edgelist graph data file passed in by filtering out the
-// vertices not to be assigned to the GPU in this rank, then renumbering the
-// vertices appropriately. The returned vector of vertices contains the original
-// vertex IDs, ordered by the new sequential renumbered IDs (this is needed for
-// unrenumbering).
-template <typename vertex_t, typename edge_t, typename weight_t, bool store_transposed>
-std::tuple<
-  std::unique_ptr<cugraph::experimental::
-                    graph_t<vertex_t, edge_t, weight_t, store_transposed, true>>,  // multi_gpu=true
-  rmm::device_uvector<vertex_t>>
-create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path)
-{
-  const auto& comm = handle.get_comms();
-  auto& row_comm   = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name());
-  auto& col_comm   = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name());
-
-  int my_rank = comm.get_rank();
-
-  auto edgelist_from_mm =
-    ::cugraph::test::read_edgelist_from_matrix_market_file<vertex_t, edge_t, weight_t>(
-      graph_file_path);
-
-  edge_t total_number_edges = static_cast<edge_t>(edgelist_from_mm.h_rows.size());
-
-  //////////
-  // Copy COO to device
-  rmm::device_uvector<vertex_t> d_edgelist_rows(total_number_edges, handle.get_stream());
-  rmm::device_uvector<vertex_t> d_edgelist_cols(total_number_edges, handle.get_stream());
-  rmm::device_uvector<weight_t> d_edgelist_weights(total_number_edges, handle.get_stream());
-
-  raft::update_device(d_edgelist_rows.data(),
-                      edgelist_from_mm.h_rows.data(),
-                      total_number_edges,
-                      handle.get_stream());
-  raft::update_device(d_edgelist_cols.data(),
-                      edgelist_from_mm.h_cols.data(),
-                      total_number_edges,
-                      handle.get_stream());
-  raft::update_device(d_edgelist_weights.data(),
-                      edgelist_from_mm.h_weights.data(),
-                      total_number_edges,
-                      handle.get_stream());
-
-  //////////
-  // Filter out edges that are not to be associated with this rank
-  //
-  // Create a edge_gpu_identifier, which will be used by the individual jobs to
-  // identify if a edge belongs to a particular rank
-  cugraph::experimental::detail::compute_gpu_id_from_edge_t<vertex_t> edge_gpu_identifier{
-    false, comm.get_size(), row_comm.get_size(), col_comm.get_size()};
-
-  auto edgelist_zip_it_begin = thrust::make_zip_iterator(thrust::make_tuple(
-    d_edgelist_rows.begin(), d_edgelist_cols.begin(), d_edgelist_weights.begin()));
-  bool is_transposed{store_transposed};
-
-  // Do the removal - note: remove_if does not delete items, it moves "removed"
-  // items to the back of the vector and returns the iterator (new_end) that
-  // represents the items kept. Actual removal of items can be done by
-  // resizing (see below).
-  auto new_end = thrust::remove_if(
-    rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
-    edgelist_zip_it_begin,
-    edgelist_zip_it_begin + total_number_edges,
-    [my_rank, is_transposed, edge_gpu_identifier] __device__(auto tup) {
-      if (is_transposed) {
-        return (edge_gpu_identifier(thrust::get<1>(tup), thrust::get<0>(tup)) != my_rank);
-      } else {
-        return (edge_gpu_identifier(thrust::get<0>(tup), thrust::get<1>(tup)) != my_rank);
-      }
-    });
-
-  edge_t local_number_edges = thrust::distance(edgelist_zip_it_begin, new_end);
-  // Free the memory used for the items remove_if "removed". This not only
-  // frees memory, but keeps the actual vector sizes consistent with the data
-  // being used from this point forward.
-  d_edgelist_rows.resize(local_number_edges, handle.get_stream());
-  d_edgelist_rows.shrink_to_fit(handle.get_stream());
-  d_edgelist_cols.resize(local_number_edges, handle.get_stream());
-  d_edgelist_cols.shrink_to_fit(handle.get_stream());
-  d_edgelist_weights.resize(local_number_edges, handle.get_stream());
-  d_edgelist_weights.shrink_to_fit(handle.get_stream());
-
-  //////////
-  // renumber filtered edgelist_from_mm
-  vertex_t* major_vertices{nullptr};
-  vertex_t* minor_vertices{nullptr};
-  if (is_transposed) {
-    major_vertices = d_edgelist_cols.data();
-    minor_vertices = d_edgelist_rows.data();
-  } else {
-    major_vertices = d_edgelist_rows.data();
-    minor_vertices = d_edgelist_cols.data();
-  }
-
-  rmm::device_uvector<vertex_t> renumber_map_labels(0, handle.get_stream());
-  cugraph::experimental::partition_t<vertex_t> partition(
-    std::vector<vertex_t>(comm.get_size() + 1, 0),
-    false,  // is_hypergraph_partitioned()
-    row_comm.get_size(),
-    col_comm.get_size(),
-    row_comm.get_rank(),
-    col_comm.get_rank());
-  vertex_t number_of_vertices{};
-  edge_t number_of_edges{};
-  std::tie(renumber_map_labels, partition, number_of_vertices, number_of_edges) =
-    ::cugraph::experimental::renumber_edgelist<vertex_t, edge_t, true>  // multi_gpu=true
-    (handle,
-     major_vertices,  // edgelist_major_vertices, INOUT of vertex_t*
-     minor_vertices,  // edgelist_minor_vertices, INOUT of vertex_t*
-     local_number_edges,
-     false,  // is_hypergraph_partitioned
-     true);  // do_expensive_check
-
-  cugraph::experimental::edgelist_t<vertex_t, edge_t, weight_t> edgelist{
-    d_edgelist_rows.data(), d_edgelist_cols.data(), d_edgelist_weights.data(), local_number_edges};
-
-  std::vector<cugraph::experimental::edgelist_t<vertex_t, edge_t, weight_t>> edgelist_vect;
-  edgelist_vect.push_back(edgelist);
-  cugraph::experimental::graph_properties_t properties;
-  properties.is_symmetric  = edgelist_from_mm.is_symmetric;
-  properties.is_multigraph = false;
-
-  // Finally, create instance of graph_t using filtered & renumbered edgelist
-  return std::make_tuple(
-    std::make_unique<
-      cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, true>>(
-      handle,
-      edgelist_vect,
-      partition,
-      number_of_vertices,
-      total_number_edges,
-      properties,
-      false,  // sorted_by_global_degree_within_vertex_partition
-      true),  // do_expensive_check
-    std::move(renumber_map_labels));
-}
-
-// explicit instantiation
-template std::tuple<
-  std::unique_ptr<
-    cugraph::experimental::graph_t<int32_t, int32_t, float, true, true>>,  // store_transposed=true
-                                                                           // multi_gpu=true
-  rmm::device_uvector<int32_t>>
-create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path);
-
-}  // namespace test
-}  // namespace cugraph
diff --git a/cpp/tests/utilities/mg_test_utilities.hpp b/cpp/tests/utilities/mg_test_utilities.hpp
deleted file mode 100644
index c23f6c43a6d..00000000000
--- a/cpp/tests/utilities/mg_test_utilities.hpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <experimental/graph.hpp>
-
-#include <raft/comms/mpi_comms.hpp>
-
-#include <gtest/gtest.h>
-
-namespace cugraph {
-namespace test {
-
-// Given a raft handle and a path to a dataset (must be a .mtx file), returns a
-// tuple containing:
-//  * graph_t instance for the partition accesible from the raft handle
-//  * 4-tuple containing renumber info resulting from renumbering the
-//    edgelist for the partition
-template <typename vertex_t, typename edge_t, typename weight_t, bool store_transposed>
-std::tuple<
-  std::unique_ptr<cugraph::experimental::
-                    graph_t<vertex_t, edge_t, weight_t, store_transposed, true>>,  // multi_gpu=true
-  rmm::device_uvector<vertex_t>>
-create_graph_for_gpu(raft::handle_t& handle, const std::string& graph_file_path);
-
-/**
- * @brief Base test fixture class, responsible for handling common operations
- * needed by all MG tests.
- *
- * It's expected this class will be built out and refactored often as new MG C++
- * tests are added and new patterns evolve.
- *
- * Example:
- * ```
- * class MyTestFixture : public cugraph::test::MG_TestFixture_t<My_Testparams_t> {};
- * ```
- **/
-
-// FIXME: consider moving this to a separate file? (eg. mg_test_fixture.cpp)?
-
-class MG_TestFixture_t : public ::testing::Test {
- public:
-  static void SetUpTestCase()
-  {
-    MPI_TRY(MPI_Init(NULL, NULL));
-
-    int rank, size;
-    MPI_TRY(MPI_Comm_rank(MPI_COMM_WORLD, &rank));
-    MPI_TRY(MPI_Comm_size(MPI_COMM_WORLD, &size));
-
-    int nGpus;
-    CUDA_CHECK(cudaGetDeviceCount(&nGpus));
-
-    ASSERT(
-      nGpus >= size, "Number of GPUs are lesser than MPI ranks! ngpus=%d, nranks=%d", nGpus, size);
-
-    CUDA_CHECK(cudaSetDevice(rank));
-  }
-
-  static void TearDownTestCase() { MPI_TRY(MPI_Finalize()); }
-};
-
-}  // namespace test
-}  // namespace cugraph
diff --git a/cpp/tests/utilities/misc_utilities.cpp b/cpp/tests/utilities/misc_utilities.cpp
new file mode 100644
index 00000000000..14f0df2f35d
--- /dev/null
+++ b/cpp/tests/utilities/misc_utilities.cpp
@@ -0,0 +1,33 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <utilities/test_utilities.hpp>
+
+namespace cugraph {
+namespace test {
+
+std::string getFileName(const std::string& s)
+{
+  char sep = '/';
+#ifdef _WIN32
+  sep = '\\';
+#endif
+  size_t i = s.rfind(sep, s.length());
+  if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); }
+  return ("");
+}
+
+}  // namespace test
+}  // namespace cugraph
diff --git a/cpp/tests/utilities/rmat_utilities.cu b/cpp/tests/utilities/rmat_utilities.cu
new file mode 100644
index 00000000000..16ea7a486fc
--- /dev/null
+++ b/cpp/tests/utilities/rmat_utilities.cu
@@ -0,0 +1,431 @@
+/*
+ * Copyright (c) 2021, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <utilities/test_utilities.hpp>
+
+#include <experimental/graph_generator.hpp>
+#include <utilities/error.hpp>
+
+#include <rmm/thrust_rmm_allocator.h>
+#include <raft/random/rng.cuh>
+
+#include <thrust/sequence.h>
+
+#include <cstdint>
+
+namespace cugraph {
+namespace test {
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>,
+           rmm::device_uvector<vertex_t>>
+generate_graph_from_rmat_params(raft::handle_t const& handle,
+                                size_t scale,
+                                size_t edge_factor,
+                                double a,
+                                double b,
+                                double c,
+                                uint64_t seed,
+                                bool undirected,
+                                bool scramble_vertex_ids,
+                                bool test_weighted,
+                                bool renumber)
+{
+  rmm::device_uvector<vertex_t> d_edgelist_rows(0, handle.get_stream());
+  rmm::device_uvector<vertex_t> d_edgelist_cols(0, handle.get_stream());
+  std::tie(d_edgelist_rows, d_edgelist_cols) =
+    cugraph::experimental::generate_rmat_edgelist<vertex_t>(
+      handle, scale, edge_factor, a, b, c, seed, undirected ? true : false, scramble_vertex_ids);
+  if (undirected) {
+    // FIXME: need to symmetrize
+    CUGRAPH_FAIL("unimplemented.");
+  }
+
+  rmm::device_uvector<weight_t> d_edgelist_weights(test_weighted ? d_edgelist_rows.size() : 0,
+                                                   handle.get_stream());
+  if (test_weighted) {
+    raft::random::Rng rng(seed + 1);
+    rng.uniform<weight_t, size_t>(d_edgelist_weights.data(),
+                                  d_edgelist_weights.size(),
+                                  weight_t{0.0},
+                                  weight_t{1.0},
+                                  handle.get_stream());
+  }
+
+  rmm::device_uvector<vertex_t> d_vertices(static_cast<vertex_t>(size_t{1} << scale),
+                                           handle.get_stream());
+  thrust::sequence(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()),
+                   d_vertices.begin(),
+                   d_vertices.end(),
+                   vertex_t{0});
+
+  return generate_graph_from_edgelist<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>(
+    handle,
+    std::move(d_vertices),
+    std::move(d_edgelist_rows),
+    std::move(d_edgelist_cols),
+    std::move(d_edgelist_weights),
+    false,
+    test_weighted,
+    renumber);
+}
+
+// explicit instantiations
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, false, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int32_t, float, false, false>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, false, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int32_t, float, false, true>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, true, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int32_t, float, true, false>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, float, true, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int32_t, float, true, true>(raft::handle_t const& handle,
+                                                                     size_t scale,
+                                                                     size_t edge_factor,
+                                                                     double a,
+                                                                     double b,
+                                                                     double c,
+                                                                     uint64_t seed,
+                                                                     bool undirected,
+                                                                     bool scramble_vertex_ids,
+                                                                     bool test_weighted,
+                                                                     bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, false, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int32_t, double, false, false>(
+  raft::handle_t const& handle,
+  size_t scale,
+  size_t edge_factor,
+  double a,
+  double b,
+  double c,
+  uint64_t seed,
+  bool undirected,
+  bool scramble_vertex_ids,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, false, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int32_t, double, false, true>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, true, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int32_t, double, true, false>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int32_t, double, true, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int32_t, double, true, true>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, false, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int64_t, float, false, false>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, false, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int64_t, float, false, true>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, true, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int64_t, float, true, false>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, float, true, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int64_t, float, true, true>(raft::handle_t const& handle,
+                                                                     size_t scale,
+                                                                     size_t edge_factor,
+                                                                     double a,
+                                                                     double b,
+                                                                     double c,
+                                                                     uint64_t seed,
+                                                                     bool undirected,
+                                                                     bool scramble_vertex_ids,
+                                                                     bool test_weighted,
+                                                                     bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, false, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int64_t, double, false, false>(
+  raft::handle_t const& handle,
+  size_t scale,
+  size_t edge_factor,
+  double a,
+  double b,
+  double c,
+  uint64_t seed,
+  bool undirected,
+  bool scramble_vertex_ids,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, false, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int64_t, double, false, true>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, true, false>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int64_t, double, true, false>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int32_t, int64_t, double, true, true>,
+                    rmm::device_uvector<int32_t>>
+generate_graph_from_rmat_params<int32_t, int64_t, double, true, true>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, false, false>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_rmat_params<int64_t, int64_t, float, false, false>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, false, true>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_rmat_params<int64_t, int64_t, float, false, true>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, true, false>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_rmat_params<int64_t, int64_t, float, true, false>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, float, true, true>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_rmat_params<int64_t, int64_t, float, true, true>(raft::handle_t const& handle,
+                                                                     size_t scale,
+                                                                     size_t edge_factor,
+                                                                     double a,
+                                                                     double b,
+                                                                     double c,
+                                                                     uint64_t seed,
+                                                                     bool undirected,
+                                                                     bool scramble_vertex_ids,
+                                                                     bool test_weighted,
+                                                                     bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, false, false>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_rmat_params<int64_t, int64_t, double, false, false>(
+  raft::handle_t const& handle,
+  size_t scale,
+  size_t edge_factor,
+  double a,
+  double b,
+  double c,
+  uint64_t seed,
+  bool undirected,
+  bool scramble_vertex_ids,
+  bool test_weighted,
+  bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, false, true>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_rmat_params<int64_t, int64_t, double, false, true>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, true, false>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_rmat_params<int64_t, int64_t, double, true, false>(raft::handle_t const& handle,
+                                                                       size_t scale,
+                                                                       size_t edge_factor,
+                                                                       double a,
+                                                                       double b,
+                                                                       double c,
+                                                                       uint64_t seed,
+                                                                       bool undirected,
+                                                                       bool scramble_vertex_ids,
+                                                                       bool test_weighted,
+                                                                       bool renumber);
+
+template std::tuple<cugraph::experimental::graph_t<int64_t, int64_t, double, true, true>,
+                    rmm::device_uvector<int64_t>>
+generate_graph_from_rmat_params<int64_t, int64_t, double, true, true>(raft::handle_t const& handle,
+                                                                      size_t scale,
+                                                                      size_t edge_factor,
+                                                                      double a,
+                                                                      double b,
+                                                                      double c,
+                                                                      uint64_t seed,
+                                                                      bool undirected,
+                                                                      bool scramble_vertex_ids,
+                                                                      bool test_weighted,
+                                                                      bool renumber);
+
+}  // namespace test
+}  // namespace cugraph
diff --git a/cpp/tests/utilities/test_utilities.cpp b/cpp/tests/utilities/test_utilities.cpp
deleted file mode 100644
index abb416a632d..00000000000
--- a/cpp/tests/utilities/test_utilities.cpp
+++ /dev/null
@@ -1,442 +0,0 @@
-/*
- * Copyright (c) 2021, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <utilities/test_utilities.hpp>
-
-#include <experimental/graph.hpp>
-#include <functions.hpp>
-#include <utilities/error.hpp>
-
-#include <gtest/gtest.h>
-
-extern "C" {
-#include "mmio.h"
-}
-
-#include <gtest/gtest.h>
-
-#include <cfloat>
-#include <cstdio>
-#include <string>
-#include <vector>
-
-namespace cugraph {
-namespace test {
-
-std::string getFileName(const std::string& s)
-{
-  char sep = '/';
-#ifdef _WIN32
-  sep = '\\';
-#endif
-  size_t i = s.rfind(sep, s.length());
-  if (i != std::string::npos) { return (s.substr(i + 1, s.length() - i)); }
-  return ("");
-}
-
-/// Read matrix properties from Matrix Market file
-/** Matrix Market file is assumed to be a sparse matrix in coordinate
- *  format.
- *
- *  @param f File stream for Matrix Market file.
- *  @param tg Boolean indicating whether to convert matrix to general
- *  format (from symmetric, Hermitian, or skew symmetric format).
- *  @param t (Output) MM_typecode with matrix properties.
- *  @param m (Output) Number of matrix rows.
- *  @param n (Output) Number of matrix columns.
- *  @param nnz (Output) Number of non-zero matrix entries.
- *  @return Zero if properties were read successfully. Otherwise
- *  non-zero.
- */
-template <typename IndexType_>
-int mm_properties(FILE* f, int tg, MM_typecode* t, IndexType_* m, IndexType_* n, IndexType_* nnz)
-{
-  // Read matrix properties from file
-  int mint, nint, nnzint;
-  if (fseek(f, 0, SEEK_SET)) {
-    fprintf(stderr, "Error: could not set position in file\n");
-    return -1;
-  }
-  if (mm_read_banner(f, t)) {
-    fprintf(stderr, "Error: could not read Matrix Market file banner\n");
-    return -1;
-  }
-  if (!mm_is_matrix(*t) || !mm_is_coordinate(*t)) {
-    fprintf(stderr, "Error: file does not contain matrix in coordinate format\n");
-    return -1;
-  }
-  if (mm_read_mtx_crd_size(f, &mint, &nint, &nnzint)) {
-    fprintf(stderr, "Error: could not read matrix dimensions\n");
-    return -1;
-  }
-  if (!mm_is_pattern(*t) && !mm_is_real(*t) && !mm_is_integer(*t) && !mm_is_complex(*t)) {
-    fprintf(stderr, "Error: matrix entries are not valid type\n");
-    return -1;
-  }
-  *m   = mint;
-  *n   = nint;
-  *nnz = nnzint;
-
-  // Find total number of non-zero entries
-  if (tg && !mm_is_general(*t)) {
-    // Non-diagonal entries should be counted twice
-    *nnz *= 2;
-
-    // Diagonal entries should not be double-counted
-    int st;
-    for (int i = 0; i < nnzint; ++i) {
-      // Read matrix entry
-      // MTX only supports int for row and col idx
-      int row, col;
-      double rval, ival;
-      if (mm_is_pattern(*t))
-        st = fscanf(f, "%d %d\n", &row, &col);
-      else if (mm_is_real(*t) || mm_is_integer(*t))
-        st = fscanf(f, "%d %d %lg\n", &row, &col, &rval);
-      else  // Complex matrix
-        st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival);
-      if (ferror(f) || (st == EOF)) {
-        fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1);
-        return -1;
-      }
-
-      // Check if entry is diagonal
-      if (row == col) --(*nnz);
-    }
-  }
-
-  return 0;
-}
-
-/// Read Matrix Market file and convert to COO format matrix
-/** Matrix Market file is assumed to be a sparse matrix in coordinate
- *  format.
- *
- *  @param f File stream for Matrix Market file.
- *  @param tg Boolean indicating whether to convert matrix to general
- *  format (from symmetric, Hermitian, or skew symmetric format).
- *  @param nnz Number of non-zero matrix entries.
- *  @param cooRowInd (Output) Row indices for COO matrix. Should have
- *  at least nnz entries.
- *  @param cooColInd (Output) Column indices for COO matrix. Should
- *  have at least nnz entries.
- *  @param cooRVal (Output) Real component of COO matrix
- *  entries. Should have at least nnz entries. Ignored if null
- *  pointer.
- *  @param cooIVal (Output) Imaginary component of COO matrix
- *  entries. Should have at least nnz entries. Ignored if null
- *  pointer.
- *  @return Zero if matrix was read successfully. Otherwise non-zero.
- */
-template <typename IndexType_, typename ValueType_>
-int mm_to_coo(FILE* f,
-              int tg,
-              IndexType_ nnz,
-              IndexType_* cooRowInd,
-              IndexType_* cooColInd,
-              ValueType_* cooRVal,
-              ValueType_* cooIVal)
-{
-  // Read matrix properties from file
-  MM_typecode t;
-  int m, n, nnzOld;
-  if (fseek(f, 0, SEEK_SET)) {
-    fprintf(stderr, "Error: could not set position in file\n");
-    return -1;
-  }
-  if (mm_read_banner(f, &t)) {
-    fprintf(stderr, "Error: could not read Matrix Market file banner\n");
-    return -1;
-  }
-  if (!mm_is_matrix(t) || !mm_is_coordinate(t)) {
-    fprintf(stderr, "Error: file does not contain matrix in coordinate format\n");
-    return -1;
-  }
-  if (mm_read_mtx_crd_size(f, &m, &n, &nnzOld)) {
-    fprintf(stderr, "Error: could not read matrix dimensions\n");
-    return -1;
-  }
-  if (!mm_is_pattern(t) && !mm_is_real(t) && !mm_is_integer(t) && !mm_is_complex(t)) {
-    fprintf(stderr, "Error: matrix entries are not valid type\n");
-    return -1;
-  }
-
-  // Add each matrix entry in file to COO format matrix
-  int i;      // Entry index in Matrix Market file; can only be int in the MTX format
-  int j = 0;  // Entry index in COO format matrix; can only be int in the MTX format
-  for (i = 0; i < nnzOld; ++i) {
-    // Read entry from file
-    int row, col;
-    double rval, ival;
-    int st;
-    if (mm_is_pattern(t)) {
-      st   = fscanf(f, "%d %d\n", &row, &col);
-      rval = 1.0;
-      ival = 0.0;
-    } else if (mm_is_real(t) || mm_is_integer(t)) {
-      st   = fscanf(f, "%d %d %lg\n", &row, &col, &rval);
-      ival = 0.0;
-    } else  // Complex matrix
-      st = fscanf(f, "%d %d %lg %lg\n", &row, &col, &rval, &ival);
-    if (ferror(f) || (st == EOF)) {
-      fprintf(stderr, "Error: error %d reading Matrix Market file (entry %d)\n", st, i + 1);
-      return -1;
-    }
-
-    // Switch to 0-based indexing
-    --row;
-    --col;
-
-    // Record entry
-    cooRowInd[j] = row;
-    cooColInd[j] = col;
-    if (cooRVal != NULL) cooRVal[j] = rval;
-    if (cooIVal != NULL) cooIVal[j] = ival;
-    ++j;
-
-    // Add symmetric complement of non-diagonal entries
-    if (tg && !mm_is_general(t) && (row != col)) {
-      // Modify entry value if matrix is skew symmetric or Hermitian
-      if (mm_is_skew(t)) {
-        rval = -rval;
-        ival = -ival;
-      } else if (mm_is_hermitian(t)) {
-        ival = -ival;
-      }
-
-      // Record entry
-      cooRowInd[j] = col;
-      cooColInd[j] = row;
-      if (cooRVal != NULL) cooRVal[j] = rval;
-      if (cooIVal != NULL) cooIVal[j] = ival;
-      ++j;
-    }
-  }
-  return 0;
-}
-
-int read_binary_vector(FILE* fpin, int n, std::vector<float>& val)
-{
-  size_t is_read1;
-
-  double* t_storage = new double[n];
-  is_read1          = fread(t_storage, sizeof(double), n, fpin);
-  for (int i = 0; i < n; i++) {
-    if (t_storage[i] == DBL_MAX)
-      val[i] = FLT_MAX;
-    else if (t_storage[i] == -DBL_MAX)
-      val[i] = -FLT_MAX;
-    else
-      val[i] = static_cast<float>(t_storage[i]);
-  }
-  delete[] t_storage;
-
-  if (is_read1 != (size_t)n) {
-    printf("%s", "I/O fail\n");
-    return 1;
-  }
-  return 0;
-}
-
-int read_binary_vector(FILE* fpin, int n, std::vector<double>& val)
-{
-  size_t is_read1;
-
-  is_read1 = fread(&val[0], sizeof(double), n, fpin);
-
-  if (is_read1 != (size_t)n) {
-    printf("%s", "I/O fail\n");
-    return 1;
-  }
-  return 0;
-}
-
-// FIXME: A similar function could be useful for CSC format
-//        There are functions above that operate coo -> csr and coo->csc
-/**
- * @tparam
- */
-template <typename vertex_t, typename edge_t, typename weight_t>
-std::unique_ptr<cugraph::GraphCSR<vertex_t, edge_t, weight_t>> generate_graph_csr_from_mm(
-  bool& directed, std::string mm_file)
-{
-  vertex_t number_of_vertices;
-  edge_t number_of_edges;
-
-  FILE* fpin = fopen(mm_file.c_str(), "r");
-  EXPECT_NE(fpin, nullptr);
-
-  vertex_t number_of_columns = 0;
-  MM_typecode mm_typecode{0};
-  EXPECT_EQ(mm_properties<vertex_t>(
-              fpin, 1, &mm_typecode, &number_of_vertices, &number_of_columns, &number_of_edges),
-            0);
-  EXPECT_TRUE(mm_is_matrix(mm_typecode));
-  EXPECT_TRUE(mm_is_coordinate(mm_typecode));
-  EXPECT_FALSE(mm_is_complex(mm_typecode));
-  EXPECT_FALSE(mm_is_skew(mm_typecode));
-
-  directed = !mm_is_symmetric(mm_typecode);
-
-  // Allocate memory on host
-  std::vector<vertex_t> coo_row_ind(number_of_edges);
-  std::vector<vertex_t> coo_col_ind(number_of_edges);
-  std::vector<weight_t> coo_val(number_of_edges);
-
-  // Read
-  EXPECT_EQ((mm_to_coo<vertex_t, weight_t>(
-              fpin, 1, number_of_edges, &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], NULL)),
-            0);
-  EXPECT_EQ(fclose(fpin), 0);
-
-  cugraph::GraphCOOView<vertex_t, edge_t, weight_t> cooview(
-    &coo_row_ind[0], &coo_col_ind[0], &coo_val[0], number_of_vertices, number_of_edges);
-
-  return cugraph::coo_to_csr(cooview);
-}
-
-template <typename vertex_t, typename edge_t, typename weight_t>
-edgelist_from_market_matrix_file_t<vertex_t, weight_t> read_edgelist_from_matrix_market_file(
-  std::string const& graph_file_full_path)
-{
-  edgelist_from_market_matrix_file_t<vertex_t, weight_t> ret{};
-
-  MM_typecode mc{};
-  vertex_t m{};
-  edge_t nnz{};
-
-  FILE* file = fopen(graph_file_full_path.c_str(), "r");
-  CUGRAPH_EXPECTS(file != nullptr, "fopen failure.");
-
-  edge_t tmp_m{};
-  edge_t tmp_k{};
-  auto mm_ret = cugraph::test::mm_properties<edge_t>(file, 1, &mc, &tmp_m, &tmp_k, &nnz);
-  CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties.");
-  m = static_cast<vertex_t>(tmp_m);
-  CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc),
-                  "invalid Matrix Market file properties.");
-
-  ret.h_rows.assign(nnz, vertex_t{0});
-  ret.h_cols.assign(nnz, vertex_t{0});
-  ret.h_weights.assign(nnz, weight_t{0.0});
-  ret.number_of_vertices = m;
-  ret.is_symmetric       = mm_is_symmetric(mc);
-
-  mm_ret = cugraph::test::mm_to_coo<vertex_t, weight_t>(
-    file, 1, nnz, ret.h_rows.data(), ret.h_cols.data(), ret.h_weights.data(), nullptr);
-  CUGRAPH_EXPECTS(mm_ret == 0, "could not read matrix data");
-
-  auto file_ret = fclose(file);
-  CUGRAPH_EXPECTS(file_ret == 0, "fclose failure.");
-
-  return std::move(ret);
-}
-
-template <typename vertex_t, typename edge_t, typename weight_t, bool store_transposed>
-cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false>
-read_graph_from_matrix_market_file(raft::handle_t const& handle,
-                                   std::string const& graph_file_full_path,
-                                   bool test_weighted)
-{
-  auto mm_graph =
-    read_edgelist_from_matrix_market_file<vertex_t, edge_t, weight_t>(graph_file_full_path);
-  edge_t number_of_edges = static_cast<edge_t>(mm_graph.h_rows.size());
-
-  rmm::device_uvector<vertex_t> d_edgelist_rows(number_of_edges, handle.get_stream());
-  rmm::device_uvector<vertex_t> d_edgelist_cols(number_of_edges, handle.get_stream());
-  rmm::device_uvector<weight_t> d_edgelist_weights(test_weighted ? number_of_edges : 0,
-                                                   handle.get_stream());
-
-  raft::update_device(
-    d_edgelist_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream());
-  raft::update_device(
-    d_edgelist_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream());
-  if (test_weighted) {
-    raft::update_device(
-      d_edgelist_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream());
-  }
-
-  cugraph::experimental::edgelist_t<vertex_t, edge_t, weight_t> edgelist{
-    d_edgelist_rows.data(),
-    d_edgelist_cols.data(),
-    test_weighted ? d_edgelist_weights.data() : nullptr,
-    number_of_edges};
-
-  return cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false>(
-    handle,
-    edgelist,
-    mm_graph.number_of_vertices,
-    cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false},
-    false,
-    true);
-}
-
-// explicit instantiations
-
-template int mm_to_coo(
-  FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, int* cooRVal, int* cooIVal);
-
-template int mm_to_coo(
-  FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, double* cooRVal, double* cooIVal);
-
-template int mm_to_coo(
-  FILE* f, int tg, int nnz, int* cooRowInd, int* cooColInd, float* cooRVal, float* cooIVal);
-
-template std::unique_ptr<cugraph::GraphCSR<unsigned int, unsigned int, float>>
-generate_graph_csr_from_mm(bool& directed, std::string mm_file);
-
-template std::unique_ptr<cugraph::GraphCSR<int, int, float>> generate_graph_csr_from_mm(
-  bool& directed, std::string mm_file);
-
-template std::unique_ptr<cugraph::GraphCSR<int, int, double>> generate_graph_csr_from_mm(
-  bool& directed, std::string mm_file);
-
-template std::unique_ptr<cugraph::GraphCSR<long, long, float>> generate_graph_csr_from_mm(
-  bool& directed, std::string mm_file);
-
-template cugraph::experimental::graph_t<int, int, float, false, false>
-read_graph_from_matrix_market_file<int, int, float, false>(raft::handle_t const& handle,
-                                                           std::string const& graph_file_full_path,
-                                                           bool test_weighted);
-
-template cugraph::experimental::graph_t<int, int, float, true, false>
-read_graph_from_matrix_market_file<int, int, float, true>(raft::handle_t const& handle,
-                                                          std::string const& graph_file_full_path,
-                                                          bool test_weighted);
-
-template cugraph::experimental::graph_t<int, long, float, false, false>
-read_graph_from_matrix_market_file<int, long, float, false>(raft::handle_t const& handle,
-                                                            std::string const& graph_file_full_path,
-                                                            bool test_weighted);
-
-template cugraph::experimental::graph_t<int, long, double, false, false>
-read_graph_from_matrix_market_file<int, long, double, false>(
-  raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted);
-
-template cugraph::experimental::graph_t<int, int, double, false, false>
-read_graph_from_matrix_market_file<int, int, double, false>(raft::handle_t const& handle,
-                                                            std::string const& graph_file_full_path,
-                                                            bool test_weighted);
-
-template cugraph::experimental::graph_t<long, long, double, false, false>
-read_graph_from_matrix_market_file<long, long, double, false>(
-  raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted);
-
-template cugraph::experimental::graph_t<long, long, float, false, false>
-read_graph_from_matrix_market_file<long, long, float, false>(
-  raft::handle_t const& handle, std::string const& graph_file_full_path, bool test_weighted);
-
-}  // namespace test
-}  // namespace cugraph
diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp
index 406f09048e0..37e87c62247 100644
--- a/cpp/tests/utilities/test_utilities.hpp
+++ b/cpp/tests/utilities/test_utilities.hpp
@@ -18,7 +18,9 @@
 #include <experimental/graph.hpp>
 #include <graph.hpp>
 
-#include <cstdio>
+#include <raft/handle.hpp>
+#include <rmm/device_uvector.hpp>
+
 #include <string>
 #include <vector>
 
@@ -77,10 +79,6 @@ int mm_to_coo(FILE* f,
               ValueType_* cooRVal,
               ValueType_* cooIVal);
 
-int read_binary_vector(FILE* fpin, int n, std::vector<float>& val);
-
-int read_binary_vector(FILE* fpin, int n, std::vector<double>& val);
-
 // FIXME: A similar function could be useful for CSC format
 //        There are functions above that operate coo -> csr and coo->csc
 /**
@@ -108,24 +106,95 @@ static const std::string& get_rapids_dataset_root_dir()
   return rdrd;
 }
 
+// returns a tuple of (rows, columns, weights, number_of_vertices, is_symmetric)
 template <typename vertex_t, typename weight_t>
-struct edgelist_from_market_matrix_file_t {
-  std::vector<vertex_t> h_rows{};
-  std::vector<vertex_t> h_cols{};
-  std::vector<weight_t> h_weights{};
-  vertex_t number_of_vertices{};
-  bool is_symmetric{};
+std::tuple<rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<vertex_t>,
+           rmm::device_uvector<weight_t>,
+           vertex_t,
+           bool>
+read_edgelist_from_matrix_market_file(raft::handle_t const& handle,
+                                      std::string const& graph_file_full_path,
+                                      bool test_weighted);
+
+// renumber must be true if multi_gpu is true
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>,
+           rmm::device_uvector<vertex_t>>
+read_graph_from_matrix_market_file(raft::handle_t const& handle,
+                                   std::string const& graph_file_full_path,
+                                   bool test_weighted,
+                                   bool renumber);
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>,
+           rmm::device_uvector<vertex_t>>
+generate_graph_from_edgelist(raft::handle_t const& handle,
+                             rmm::device_uvector<vertex_t>&& vertices,
+                             rmm::device_uvector<vertex_t>&& edgelist_rows,
+                             rmm::device_uvector<vertex_t>&& edgelist_cols,
+                             rmm::device_uvector<weight_t>&& edgelist_weights,
+                             bool is_symmetric,
+                             bool test_weighted,
+                             bool renumber);
+
+template <typename vertex_t,
+          typename edge_t,
+          typename weight_t,
+          bool store_transposed,
+          bool multi_gpu>
+std::tuple<cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, multi_gpu>,
+           rmm::device_uvector<vertex_t>>
+generate_graph_from_rmat_params(raft::handle_t const& handle,
+                                size_t scale,
+                                size_t edge_factor,
+                                double a,
+                                double b,
+                                double c,
+                                uint64_t seed,
+                                bool undirected,
+                                bool scramble_vertex_ids,
+                                bool test_weighted,
+                                bool renumber);
+
+struct rmat_params_t {
+  size_t scale{};
+  size_t edge_factor{};
+  double a{};
+  double b{};
+  double c{};
+  uint64_t seed{};
+  bool undirected{};
+  bool scramble_vertex_ids{};
 };
 
-template <typename vertex_t, typename edge_t, typename weight_t>
-edgelist_from_market_matrix_file_t<vertex_t, weight_t> read_edgelist_from_matrix_market_file(
-  std::string const& graph_file_full_path);
+struct input_graph_specifier_t {
+  enum { MATRIX_MARKET_FILE_PATH, RMAT_PARAMS } tag{};
+  std::string graph_file_full_path{};
+  rmat_params_t rmat_params{};
+};
 
-template <typename vertex_t, typename edge_t, typename weight_t, bool store_transposed>
-cugraph::experimental::graph_t<vertex_t, edge_t, weight_t, store_transposed, false>
-read_graph_from_matrix_market_file(raft::handle_t const& handle,
-                                   std::string const& graph_file_full_path,
-                                   bool test_weighted);
+template <typename vertex_t>
+std::enable_if_t<std::is_signed<vertex_t>::value, bool> is_valid_vertex(vertex_t num_vertices,
+                                                                        vertex_t v)
+{
+  return (v >= 0) && (v < num_vertices);
+}
+
+template <typename vertex_t>
+std::enable_if_t<std::is_unsigned<vertex_t>::value, bool> is_valid_vertex(vertex_t num_vertices,
+                                                                          vertex_t v)
+{
+  return v < num_vertices;
+}
 
 }  // namespace test
 }  // namespace cugraph
diff --git a/datasets/README.md b/datasets/README.md
index c7f76a91dfe..e42413fc996 100644
--- a/datasets/README.md
+++ b/datasets/README.md
@@ -1,67 +1,132 @@
-# Cugraph test and benchmark data
-
-## Python
-
-This directory contains small public datasets in `mtx` and `csv` format used by cuGraph's python tests. Graph details:
-
-| Graph         | V     | E     | Directed | Weighted |
-| ------------- | ----- | ----- | -------- | -------- |
-| karate        | 34    | 156   | No       | No       |
-| dolphin       | 62    | 318   | No       | No       |
-| netscience    | 1,589 | 5,484 | No       | Yes      |
-
-**karate** : The graph "karate" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977.
-
-**dolphin** : The graph dolphins contains an undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003).
-
-**netscience** : The graph netscience contains a coauthorship network of scientists working on network theory and experiment, as compiled by M. Newman in May 2006.
-
-## C++
-Cugraph's C++ analytics tests need larger datasets (>5GB uncompressed) and reference results (>125MB uncompressed). They can be downloaded by running the provided script from the `datasets` directory.
-```
-cd <repo>/datasets
-./get_test_data.sh
-```
-You may run this script from elsewhere and store C++ test input to another location.
-
-Before running the tests, you should let cuGraph know where to find the test input by using:
-```
-export RAPIDS_DATASET_ROOT_DIR=<path_to_ccp_test_and_reference_data>
-```
-
-## Benchmarks
-Cugraph benchmarks (which can be found [here](../benchmarks)) also use datasets installed to this folder. Because the datasets used for benchmarking are also quite large (~14GB uncompressed), they are not installed by default. To install datasets for benchmarks, run the same script shown above from the `datasets` directory using the `--benchmark` option:
-```
-cd <repo>/datasets
-./get_test_data.sh --benchmark
-```
-The datasets installed for benchmarks currently include CSV files for use in creating both directed and undirected graphs:
-```
-<repo>/datasets/csv
- |- directed
- |--- cit-Patents.csv       (250M)
- |--- soc-LiveJournal1.csv  (965M)
- |- undirected
- |--- europe_osm.csv        (1.8G)
- |--- hollywood.csv         (1.5G)
- |--- soc-twitter-2010.csv  (8.8G)
-```
-The benchmark datasets are described below:
-| Graph             | V          | E             | Directed | Weighted |
-| ----------------- | ---------- | ------------- | -------- | -------- |
-| cit-Patents       |  3,774,768 |    16,518,948 | Yes      | No       |
-| soc-LiveJournal1  |  4,847,571 |    43,369,619 | Yes      | No       |
-| europe_osm        | 50,912,018 |    54,054,660 | No       | No       |
-| hollywood         |  1,139,905 |    57,515,616 | No       | No       |
-| soc-twitter-2010  | 21,297,772 |   265,025,809 | No       | No       |
-
-**cit-Patents** : A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations.
-**soc-LiveJournal** : A graph of the LiveJournal social network.
-**europe_osm** : A graph of OpenStreetMap data for Europe.
-**hollywood** : A graph of movie actors where vertices are actors, and two actors are joined by an edge whenever they appeared in a movie together.
-**soc-twitter-2010** : A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i.
-
-_NOTE: the benchmark datasets were converted to a CSV format from their original format described in the reference URL below, and in doing so had edge weights and isolated vertices discarded._
-
-## Reference
-The SuiteSparse Matrix Collection (formerly the University of Florida Sparse Matrix Collection) : https://sparse.tamu.edu/
+# Cugraph test and benchmark data
+
+## Python
+
+This directory contains small public datasets in `mtx` and `csv` format used by cuGraph's python tests. Graph details:
+
+| Graph         | V     | E     | Directed | Weighted |
+| ------------- | ----- | ----- | -------- | -------- |
+| karate        | 34    | 156   | No       | No       |
+| dolphin       | 62    | 318   | No       | No       |
+| netscience    | 1,589 | 5,484 | No       | Yes      |
+
+**karate** : The graph "karate" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977.
+
+**dolphin** : The graph dolphins contains an undirected social network of frequent associations between 62 dolphins in a community living off Doubtful Sound, New Zealand, as compiled by Lusseau et al. (2003).
+
+**netscience** : The graph netscience contains a coauthorship network of scientists working on network theory and experiment, as compiled by M. Newman in May 2006.
+
+
+
+### Modified datasets 
+
+The datasets below were added to provide input that contains self-loops, string vertex IDs, isolated vertices, and multiple edges.
+
+| Graph               | V       | E          | Directed | Weighted  | self-loops | Isolated V | String V IDs | Multi-edges | 
+| ------------------- | ------- | ---------- | -------- | --------- | ---------- | ---------- | ------------ | ----------- |
+| karate_multi_edge   | 34      | 160        | No       | Yes       | No         | No         | No           | Yes         |
+| dolphins_multi_edge | 62      | 325        | No       | Yes       | No         | No         | No           | Yes         |
+| karate_s_loop       | 34      | 160        | No       | Yes       | Yes        | No         | No           | No          |
+| dolphins_s_loop     | 62      | 321        | No       | Yes       | Yes        | No         | No           | No          |
+| karate_mod          | 37      | 156        | No       | No        | No         | Yes        | No           | No          |
+| karate_str          | 34      | 156        | No       | Yes       | No         | No         | Yes          | No          |
+
+**karate_multi_edge** : The graph "karate_multi_edge" is a modified version of the  "karate" graph where multi-edges were added
+
+**dolphins_multi_edge** : The graph "dolphins_multi_edge" is a modified version of the  "dolphin" graph where multi-edges were added
+
+**karate_s_loop** : The graph "karate_s_loop" is a modified version of the  "karate" graph where self-loops were added
+
+**dolphins_s_loop** : The graph "dolphins_s_loop" is a modified version of the  "dolphin" graph where self-loops were added
+
+**karate_mod** : The graph "karate_mod" is a modified version of the  "karate" graph where vertices and edges were added
+
+**karate_str** : The graph "karate_str" contains the network of friendships between the 34 members of a karate club at a US university, as described by Wayne Zachary in 1977. The integer vertices were replaced by strings
+
+
+### Additional datasets
+
+Larger datasets containing self-loops can be downloaded by running the provided script from the `datasets` directory using the `--self_loops` 
+option: 
+```
+cd <repo>/datasets
+./get_test_data.sh --self_loops
+```
+```
+<repo>/datasets/self_loops
+ |-ca-AstroPh  (5.3M) 
+ |-ca-CondMat  (2.8M)
+ |-ca-GrQc     (348K)
+ |-ca-HepTh    (763K)
+```
+These datasets are not currently used by any tests or benchmarks
+
+| Graph         | V       | E          | Directed | Weighted | self-loops | Isolated V | String V IDs | Multi-edges |  
+| ------------- | ------- | --------   | -------- | -------- | ---------- | ---------- | ------------ | ----------- |
+| ca-AstroPh    | 18,772  | 198,110    | No       | No       | Yes        | No         | No           | No          |
+| ca-CondMat    | 23,133  | 93,497     | No       | Yes      | Yes        | No         | No           | No          |
+| ca-GrQc       | 5,242   | 14,387     | No       | No       | Yes        | No         | No           | No          |
+| ca-HepTh      | 9,877   | 25,998     | No       | Yes      | Yes        | No         | No           | No          |
+
+**ca-AstroPh** : The graph "ca-AstroPh" covers scientific collaborations between authors papers submitted to Astro Physics category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007.
+
+**ca-CondMat** : The graph "ca-CondMat" covers scientific collaborations between authors papers submitted to Condense Matter category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007.
+
+**ca-GrQc** : The graph "ca-GrQc" covers scientific collaborations between authors papers submitted to General Relativity and Quantum Cosmology category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007.
+
+**ca-HepTh** : The graph "ca-HepTh" covers scientific collaborations between authors papers submitted to High Energy Physics - Theory category in the period from January 1993 to April 2003 (124 months), as described by J. Leskovec, J. Kleinberg and C. Faloutsos in 2007.
+
+
+## Custom path to larger datasets directory  
+
+Cugraph's C++ and Python analytics tests need larger datasets (>5GB uncompressed) and reference results (>125MB uncompressed). They can be downloaded by running the provided script from the `datasets` directory.
+```
+cd <repo>/datasets
+./get_test_data.sh
+```
+You may run this script from elsewhere and store C++ or Python test input to another location.
+
+Before running the tests, you should let cuGraph know where to find the test input by using:
+```
+export RAPIDS_DATASET_ROOT_DIR=<path_to_datasets_dir>
+```
+
+
+## Benchmarks
+
+Cugraph benchmarks (which can be found [here](../benchmarks)) also use datasets installed to this folder. Because the datasets used for benchmarking are also quite large (~14GB uncompressed), they are not installed by default. To install datasets for benchmarks, run the same script shown above from the `datasets` directory using the `--benchmark` option:
+```
+cd <repo>/datasets
+./get_test_data.sh --benchmark
+```
+The datasets installed for benchmarks currently include CSV files for use in creating both directed and undirected graphs:
+```
+<repo>/datasets/csv
+ |- directed
+ |--- cit-Patents.csv       (250M)
+ |--- soc-LiveJournal1.csv  (965M)
+ |- undirected
+ |--- europe_osm.csv        (1.8G)
+ |--- hollywood.csv         (1.5G)
+ |--- soc-twitter-2010.csv  (8.8G)
+```
+The benchmark datasets are described below:
+| Graph             | V          | E             | Directed | Weighted |
+| ----------------- | ---------- | ------------- | -------- | -------- |
+| cit-Patents       |  3,774,768 |    16,518,948 | Yes      | No       |
+| soc-LiveJournal1  |  4,847,571 |    43,369,619 | Yes      | No       |
+| europe_osm        | 50,912,018 |    54,054,660 | No       | No       |
+| hollywood         |  1,139,905 |    57,515,616 | No       | No       |
+| soc-twitter-2010  | 21,297,772 |   265,025,809 | No       | No       |
+
+**cit-Patents** : A citation graph that includes all citations made by patents granted between 1975 and 1999, totaling 16,522,438 citations.
+**soc-LiveJournal** : A graph of the LiveJournal social network.
+**europe_osm** : A graph of OpenStreetMap data for Europe.
+**hollywood** : A graph of movie actors where vertices are actors, and two actors are joined by an edge whenever they appeared in a movie together.
+**soc-twitter-2010** : A network of follower relationships from a snapshot of Twitter in 2010, where an edge from i to j indicates that j is a follower of i.
+
+_NOTE: the benchmark datasets were converted to a CSV format from their original format described in the reference URL below, and in doing so had edge weights and isolated vertices discarded._
+
+## Reference
+The SuiteSparse Matrix Collection (formerly the University of Florida Sparse Matrix Collection) : https://sparse.tamu.edu/
+The Stanford Network Analysis Platform (SNAP) 
diff --git a/datasets/dolphins_multi_edge.csv b/datasets/dolphins_multi_edge.csv
new file mode 100644
index 00000000000..cf6bc70918e
--- /dev/null
+++ b/datasets/dolphins_multi_edge.csv
@@ -0,0 +1,325 @@
+10 0 1.0
+14 0 1.0
+15 0 1.0
+40 0 1.0
+42 0 1.0
+47 0 1.0
+17 1 1.0
+19 1 1.0
+26 1 1.0
+27 1 1.0
+28 1 1.0
+36 1 1.0
+41 1 1.0
+54 1 1.0
+10 2 1.0
+42 2 1.0
+44 2 1.0
+61 2 1.0
+8 3 1.0
+14 3 1.0
+59 3 1.0
+51 4 1.0
+9 5 1.0
+13 5 1.0
+56 5 1.0
+57 5 1.0
+9 6 1.0
+13 6 1.0
+17 6 1.0
+54 6 1.0
+56 6 1.0
+57 6 1.0
+19 7 1.0
+27 7 1.0
+30 7 1.0
+40 7 1.0
+54 7 1.0
+20 8 1.0
+28 8 1.0
+37 8 1.0
+45 8 1.0
+59 8 1.0
+13 9 1.0
+17 9 1.0
+32 9 1.0
+41 9 1.0
+57 9 1.0
+29 10 1.0
+42 10 1.0
+47 10 1.0
+51 11 1.0
+33 12 1.0
+17 13 1.0
+32 13 1.0
+41 13 1.0
+54 13 1.0
+57 13 1.0
+16 14 1.0
+24 14 1.0
+33 14 1.0
+34 14 1.0
+37 14 1.0
+38 14 1.0
+40 14 1.0
+43 14 1.0
+50 14 1.0
+52 14 1.0
+18 15 1.0
+24 15 1.0
+40 15 1.0
+45 15 1.0
+55 15 1.0
+59 15 1.0
+20 16 1.0
+33 16 1.0
+37 16 1.0
+38 16 1.0
+50 16 1.0
+22 17 1.0
+25 17 1.0
+27 17 1.0
+31 17 1.0
+57 17 1.0
+20 18 1.0
+21 18 1.0
+24 18 1.0
+29 18 1.0
+45 18 1.0
+51 18 1.0
+30 19 1.0
+54 19 1.0
+28 20 1.0
+36 20 1.0
+38 20 1.0
+44 20 1.0
+47 20 1.0
+50 20 1.0
+29 21 1.0
+33 21 1.0
+37 21 1.0
+45 21 1.0
+51 21 1.0
+36 23 1.0
+45 23 1.0
+51 23 1.0
+29 24 1.0
+45 24 1.0
+51 24 1.0
+26 25 1.0
+27 25 1.0
+27 26 1.0
+30 28 1.0
+47 28 1.0
+35 29 1.0
+43 29 1.0
+45 29 1.0
+51 29 1.0
+52 29 1.0
+42 30 1.0
+47 30 1.0
+60 32 1.0
+34 33 1.0
+37 33 1.0
+38 33 1.0
+40 33 1.0
+43 33 1.0
+50 33 1.0
+37 34 1.0
+44 34 1.0
+49 34 1.0
+37 36 1.0
+39 36 1.0
+40 36 1.0
+59 36 1.0
+40 37 1.0
+43 37 1.0
+45 37 1.0
+61 37 1.0
+43 38 1.0
+44 38 1.0
+52 38 1.0
+58 38 1.0
+57 39 1.0
+52 40 1.0
+54 41 1.0
+54 41 1.0
+57 41 1.0
+47 42 1.0
+50 42 1.0
+50 42 1.0
+46 43 1.0
+53 43 1.0
+50 45 1.0
+51 45 1.0
+59 45 1.0
+59 45 1.0
+49 46 1.0
+57 48 1.0
+51 50 1.0
+55 51 1.0
+61 53 1.0
+57 54 1.0
+0 10 1.0
+0 14 1.0
+0 15 1.0
+59 45 1.0
+0 40 1.0
+0 42 1.0
+0 47 1.0
+1 17 1.0
+1 19 1.0
+1 26 1.0
+1 27 1.0
+1 28 1.0
+1 36 1.0
+1 41 1.0
+1 54 1.0
+2 10 1.0
+2 42 1.0
+2 44 1.0
+2 61 1.0
+54 41 1.0
+3 8 1.0
+3 14 1.0
+3 59 1.0
+4 51 1.0
+56 6 1.0
+5 9 1.0
+5 13 1.0
+5 56 1.0
+5 57 1.0
+6 9 1.0
+6 13 1.0
+6 17 1.0
+6 54 1.0
+6 56 1.0
+6 57 1.0
+7 19 1.0
+7 27 1.0
+7 30 1.0
+7 40 1.0
+7 54 1.0
+8 20 1.0
+8 28 1.0
+8 37 1.0
+8 45 1.0
+2 61 1.0
+8 59 1.0
+9 13 1.0
+9 17 1.0
+9 32 1.0
+9 41 1.0
+9 57 1.0
+10 29 1.0
+10 42 1.0
+10 47 1.0
+11 51 1.0
+12 33 1.0
+13 17 1.0
+13 32 1.0
+13 41 1.0
+13 54 1.0
+13 57 1.0
+14 16 1.0
+14 24 1.0
+14 33 1.0
+14 34 1.0
+14 37 1.0
+14 38 1.0
+14 40 1.0
+14 43 1.0
+14 50 1.0
+14 52 1.0
+15 18 1.0
+15 24 1.0
+15 40 1.0
+15 45 1.0
+15 55 1.0
+15 59 1.0
+16 20 1.0
+16 33 1.0
+16 37 1.0
+16 38 1.0
+16 50 1.0
+17 22 1.0
+17 25 1.0
+17 27 1.0
+17 31 1.0
+17 57 1.0
+18 20 1.0
+18 21 1.0
+18 24 1.0
+18 29 1.0
+18 45 1.0
+18 51 1.0
+19 30 1.0
+19 54 1.0
+20 28 1.0
+20 36 1.0
+20 38 1.0
+20 44 1.0
+20 47 1.0
+20 50 1.0
+21 29 1.0
+21 33 1.0
+21 37 1.0
+21 45 1.0
+21 51 1.0
+23 36 1.0
+23 45 1.0
+23 51 1.0
+24 29 1.0
+24 45 1.0
+24 51 1.0
+25 26 1.0
+25 27 1.0
+26 27 1.0
+28 30 1.0
+28 47 1.0
+29 35 1.0
+29 43 1.0
+29 45 1.0
+29 51 1.0
+29 52 1.0
+30 42 1.0
+30 47 1.0
+32 60 1.0
+33 34 1.0
+33 37 1.0
+33 38 1.0
+33 40 1.0
+33 43 1.0
+33 50 1.0
+34 37 1.0
+34 44 1.0
+34 49 1.0
+36 37 1.0
+36 39 1.0
+36 40 1.0
+36 59 1.0
+37 40 1.0
+37 43 1.0
+37 45 1.0
+37 61 1.0
+38 43 1.0
+38 44 1.0
+38 52 1.0
+38 58 1.0
+39 57 1.0
+40 52 1.0
+41 54 1.0
+41 57 1.0
+42 47 1.0
+42 50 1.0
+43 46 1.0
+43 53 1.0
+45 50 1.0
+45 51 1.0
+45 59 1.0
+46 49 1.0
+48 57 1.0
+50 51 1.0
+51 55 1.0
+53 61 1.0
+54 57 1.0
diff --git a/datasets/dolphins_s_loop.csv b/datasets/dolphins_s_loop.csv
new file mode 100644
index 00000000000..703b8440afa
--- /dev/null
+++ b/datasets/dolphins_s_loop.csv
@@ -0,0 +1,321 @@
+10 0 1.0
+14 0 1.0
+15 0 1.0
+40 0 1.0
+42 0 1.0
+47 0 1.0
+17 1 1.0
+19 1 1.0
+26 1 1.0
+27 1 1.0
+28 1 1.0
+36 1 1.0
+41 1 1.0
+54 1 1.0
+10 2 1.0
+42 2 1.0
+44 2 1.0
+61 2 1.0
+8 3 1.0
+14 3 1.0
+59 3 1.0
+51 4 1.0
+9 5 1.0
+13 5 1.0
+56 5 1.0
+57 5 1.0
+9 6 1.0
+13 6 1.0
+17 6 1.0
+54 6 1.0
+56 6 1.0
+57 6 1.0
+19 7 1.0
+27 7 1.0
+30 7 1.0
+40 7 1.0
+54 7 1.0
+20 8 1.0
+28 8 1.0
+37 8 1.0
+45 8 1.0
+59 8 1.0
+13 9 1.0
+17 9 1.0
+32 9 1.0
+41 9 1.0
+57 9 1.0
+29 10 1.0
+42 10 1.0
+47 10 1.0
+51 11 1.0
+33 12 1.0
+17 13 1.0
+32 13 1.0
+41 13 1.0
+54 13 1.0
+57 13 1.0
+16 14 1.0
+24 14 1.0
+33 14 1.0
+34 14 1.0
+37 14 1.0
+38 14 1.0
+40 14 1.0
+43 14 1.0
+50 14 1.0
+52 14 1.0
+18 15 1.0
+24 15 1.0
+40 15 1.0
+45 15 1.0
+55 15 1.0
+59 15 1.0
+20 16 1.0
+33 16 1.0
+37 16 1.0
+38 16 1.0
+50 16 1.0
+22 17 1.0
+25 17 1.0
+27 17 1.0
+31 17 1.0
+57 17 1.0
+20 18 1.0
+21 18 1.0
+24 18 1.0
+29 18 1.0
+45 18 1.0
+51 18 1.0
+30 19 1.0
+54 19 1.0
+28 20 1.0
+36 20 1.0
+38 20 1.0
+44 20 1.0
+47 20 1.0
+50 20 1.0
+29 21 1.0
+33 21 1.0
+37 21 1.0
+45 21 1.0
+51 21 1.0
+36 23 1.0
+45 23 1.0
+51 23 1.0
+29 24 1.0
+45 24 1.0
+51 24 1.0
+26 25 1.0
+27 25 1.0
+27 26 1.0
+30 28 1.0
+47 28 1.0
+35 29 1.0
+43 29 1.0
+45 29 1.0
+51 29 1.0
+52 29 1.0
+42 30 1.0
+47 30 1.0
+60 32 1.0
+34 33 1.0
+37 33 1.0
+38 33 1.0
+40 33 1.0
+43 33 1.0
+50 33 1.0
+37 34 1.0
+44 34 1.0
+49 34 1.0
+37 36 1.0
+39 36 1.0
+40 36 1.0
+59 36 1.0
+40 37 1.0
+43 37 1.0
+43 43 1.0
+45 37 1.0
+61 37 1.0
+43 38 1.0
+44 38 1.0
+52 38 1.0
+58 38 1.0
+57 39 1.0
+52 40 1.0
+52 52 1.0
+54 41 1.0
+57 41 1.0
+47 42 1.0
+50 42 1.0
+46 43 1.0
+53 43 1.0
+50 45 1.0
+51 45 1.0
+59 45 1.0
+49 46 1.0
+57 48 1.0
+51 50 1.0
+55 51 1.0
+61 53 1.0
+57 54 1.0
+0 10 1.0
+0 14 1.0
+0 15 1.0
+0 40 1.0
+0 42 1.0
+0 47 1.0
+1 17 1.0
+1 19 1.0
+1 26 1.0
+1 1 1.0
+1 27 1.0
+1 28 1.0
+1 36 1.0
+1 41 1.0
+1 54 1.0
+2 10 1.0
+2 42 1.0
+2 44 1.0
+2 61 1.0
+3 8 1.0
+3 14 1.0
+3 59 1.0
+4 51 1.0
+5 9 1.0
+5 13 1.0
+5 56 1.0
+5 57 1.0
+6 9 1.0
+6 13 1.0
+6 17 1.0
+6 54 1.0
+6 56 1.0
+6 57 1.0
+7 19 1.0
+7 27 1.0
+7 30 1.0
+7 40 1.0
+7 54 1.0
+8 20 1.0
+8 28 1.0
+8 37 1.0
+8 45 1.0
+8 59 1.0
+9 13 1.0
+9 17 1.0
+9 32 1.0
+9 41 1.0
+9 57 1.0
+10 29 1.0
+10 42 1.0
+10 47 1.0
+11 51 1.0
+12 33 1.0
+13 17 1.0
+13 32 1.0
+13 41 1.0
+13 54 1.0
+13 57 1.0
+14 16 1.0
+14 24 1.0
+14 33 1.0
+14 34 1.0
+14 37 1.0
+14 38 1.0
+14 40 1.0
+14 43 1.0
+14 50 1.0
+14 52 1.0
+15 18 1.0
+15 24 1.0
+15 40 1.0
+15 45 1.0
+15 55 1.0
+15 59 1.0
+16 20 1.0
+16 33 1.0
+16 37 1.0
+16 38 1.0
+16 50 1.0
+17 22 1.0
+17 25 1.0
+17 27 1.0
+17 31 1.0
+17 57 1.0
+18 20 1.0
+18 21 1.0
+18 24 1.0
+18 29 1.0
+18 45 1.0
+18 51 1.0
+19 30 1.0
+19 54 1.0
+20 28 1.0
+20 36 1.0
+20 38 1.0
+20 44 1.0
+20 47 1.0
+20 50 1.0
+21 29 1.0
+21 33 1.0
+21 37 1.0
+21 45 1.0
+21 51 1.0
+23 36 1.0
+23 45 1.0
+23 51 1.0
+24 29 1.0
+24 45 1.0
+24 51 1.0
+25 26 1.0
+25 27 1.0
+26 27 1.0
+28 30 1.0
+28 47 1.0
+29 35 1.0
+29 43 1.0
+29 45 1.0
+29 51 1.0
+29 52 1.0
+30 42 1.0
+30 47 1.0
+32 60 1.0
+33 34 1.0
+33 37 1.0
+33 38 1.0
+33 40 1.0
+33 43 1.0
+33 50 1.0
+34 37 1.0
+34 44 1.0
+34 49 1.0
+36 37 1.0
+36 39 1.0
+36 40 1.0
+36 59 1.0
+37 40 1.0
+37 43 1.0
+37 45 1.0
+37 61 1.0
+38 43 1.0
+38 44 1.0
+38 52 1.0
+38 58 1.0
+39 57 1.0
+40 52 1.0
+41 54 1.0
+41 57 1.0
+42 47 1.0
+42 50 1.0
+43 46 1.0
+43 53 1.0
+45 50 1.0
+45 51 1.0
+45 59 1.0
+46 49 1.0
+48 57 1.0
+50 51 1.0
+51 55 1.0
+53 61 1.0
+54 57 1.0
diff --git a/datasets/get_test_data.sh b/datasets/get_test_data.sh
index 3e0b6c55c37..0bd97b55cb5 100755
--- a/datasets/get_test_data.sh
+++ b/datasets/get_test_data.sh
@@ -61,6 +61,12 @@ BENCHMARK_DATASET_DATA="
 https://rapidsai-data.s3.us-east-2.amazonaws.com/cugraph/benchmark/benchmark_csv_data.tgz
 csv
 "
+
+SELF_LOOPS_DATASET_DATA="
+# ~1s download 
+https://rapidsai-data.s3.us-east-2.amazonaws.com/cugraph/benchmark/benchmark_csv_data_self_loops.tgz
+self_loops
+"
 ################################################################################
 # Do not change the script below this line if only adding/updating a dataset
 
@@ -71,7 +77,7 @@ function hasArg {
 }
 
 if hasArg -h || hasArg --help; then
-    echo "$0 [--subset | --benchmark]"
+    echo "$0 [--subset | --benchmark | --self_loops]"
     exit 0
 fi
 
@@ -80,6 +86,8 @@ if hasArg "--benchmark"; then
     DATASET_DATA="${BENCHMARK_DATASET_DATA}"
 elif hasArg "--subset"; then
     DATASET_DATA="${BASE_DATASET_DATA}"
+elif hasArg "--self_loops"; then 
+    DATASET_DATA="${SELF_LOOPS_DATASET_DATA}"
 # Do not include benchmark datasets by default - too big
 else
     DATASET_DATA="${BASE_DATASET_DATA} ${EXTENDED_DATASET_DATA}"
diff --git a/datasets/karate_mod.mtx b/datasets/karate_mod.mtx
new file mode 100644
index 00000000000..3a562406800
--- /dev/null
+++ b/datasets/karate_mod.mtx
@@ -0,0 +1,81 @@
+2 1
+3 1
+4 1
+5 1
+6 1
+7 1
+8 1
+9 1
+11 1
+12 1
+13 1
+14 1
+18 1
+20 1
+22 1
+32 1
+3 2
+4 2
+8 2
+14 2
+18 2
+20 2
+22 2
+31 2
+4 3
+8 3
+9 3
+10 3
+14 3
+28 3
+29 3
+33 3
+8 4
+13 4
+14 4
+7 5
+11 5
+7 6
+11 6
+17 6
+17 7
+31 9 
+33 9
+34 9
+34 10
+34 14
+33 15
+34 15
+33 16
+34 16
+33 19
+34 19
+34 20
+33 21
+34 21
+33 23
+34 23
+26 24
+28 24
+30 24
+33 24
+34 24
+26 25
+28 25
+32 25
+32 26
+30 27
+34 27
+34 28
+32 29
+34 29
+33 30
+34 30
+33 31
+34 31
+33 32
+34 32
+34 33
+35
+36
+37
diff --git a/datasets/karate_multi_edge.csv b/datasets/karate_multi_edge.csv
new file mode 100644
index 00000000000..6f331b77a59
--- /dev/null
+++ b/datasets/karate_multi_edge.csv
@@ -0,0 +1,160 @@
+1 0 1.0
+2 0 1.0
+3 0 1.0
+4 0 1.0
+5 0 1.0
+6 0 1.0
+7 0 1.0
+8 0 1.0
+10 0 1.0
+11 0 1.0
+12 0 1.0
+13 0 1.0
+17 0 1.0
+19 0 1.0
+21 0 1.0
+31 0 1.0
+2 1 1.0
+3 1 1.0
+7 1 1.0
+13 1 1.0
+7 0 1.0
+17 1 1.0
+19 1 1.0
+21 1 1.0
+30 1 1.0
+3 2 1.0
+7 2 1.0
+8 2 1.0
+9 2 1.0
+13 2 1.0
+27 2 1.0
+28 2 1.0
+32 2 1.0
+7 3 1.0
+12 3 1.0
+13 3 1.0
+6 4 1.0
+10 4 1.0
+6 5 1.0
+10 5 1.0
+16 5 1.0
+16 6 1.0
+30 8 1.0
+32 8 1.0
+33 8 1.0
+28 2 1.0
+33 9 1.0
+33 13 1.0
+32 14 1.0
+33 14 1.0
+32 15 1.0
+33 15 1.0
+32 18 1.0
+33 18 1.0
+33 19 1.0
+32 20 1.0
+33 20 1.0
+32 22 1.0
+33 22 1.0
+25 23 1.0
+27 23 1.0
+29 23 1.0
+32 23 1.0
+33 23 1.0
+25 24 1.0
+27 24 1.0
+31 24 1.0
+31 25 1.0
+29 26 1.0
+33 26 1.0
+33 27 1.0
+31 28 1.0
+33 28 1.0
+32 29 1.0
+33 29 1.0
+32 22 1.0
+32 30 1.0
+33 30 1.0
+32 31 1.0
+33 31 1.0
+33 32 1.0
+0 1 1.0
+0 2 1.0
+0 3 1.0
+0 4 1.0
+0 5 1.0
+0 6 1.0
+0 7 1.0
+0 8 1.0
+0 10 1.0
+0 11 1.0
+0 12 1.0
+0 6 1.0
+0 13 1.0
+0 17 1.0
+0 19 1.0
+0 21 1.0
+0 31 1.0
+1 2 1.0
+1 3 1.0
+1 7 1.0
+1 13 1.0
+1 17 1.0
+1 19 1.0
+1 21 1.0
+1 30 1.0
+2 3 1.0
+2 7 1.0
+2 8 1.0
+2 9 1.0
+2 13 1.0
+2 27 1.0
+2 28 1.0
+2 32 1.0
+3 7 1.0
+3 12 1.0
+3 13 1.0
+4 6 1.0
+4 10 1.0
+5 6 1.0
+5 10 1.0
+5 16 1.0
+6 16 1.0
+8 30 1.0
+8 32 1.0
+8 33 1.0
+9 33 1.0
+13 33 1.0
+14 32 1.0
+14 33 1.0
+15 32 1.0
+15 33 1.0
+18 32 1.0
+18 33 1.0
+19 33 1.0
+20 32 1.0
+20 33 1.0
+22 32 1.0
+22 33 1.0
+23 25 1.0
+23 27 1.0
+23 29 1.0
+23 32 1.0
+23 33 1.0
+24 25 1.0
+24 27 1.0
+24 31 1.0
+25 31 1.0
+26 29 1.0
+26 33 1.0
+27 33 1.0
+28 31 1.0
+28 33 1.0
+29 32 1.0
+29 33 1.0
+30 32 1.0
+30 33 1.0
+31 32 1.0
+31 33 1.0
+32 33 1.0
diff --git a/datasets/karate_s_loop.csv b/datasets/karate_s_loop.csv
new file mode 100644
index 00000000000..3959e5f98b3
--- /dev/null
+++ b/datasets/karate_s_loop.csv
@@ -0,0 +1,160 @@
+1 0 1.0
+2 0 1.0
+3 0 1.0
+4 0 1.0
+5 0 1.0
+6 0 1.0
+7 0 1.0
+8 0 1.0
+10 0 1.0
+11 0 1.0
+12 0 1.0
+13 0 1.0
+17 0 1.0
+19 0 1.0
+21 0 1.0
+31 0 1.0
+2 1 1.0
+3 1 1.0
+7 1 1.0
+13 1 1.0
+17 1 1.0
+19 1 1.0
+21 1 1.0
+30 1 1.0
+3 2 1.0
+7 2 1.0
+8 2 1.0
+9 2 1.0
+13 2 1.0
+27 2 1.0
+28 2 1.0
+32 2 1.0
+7 3 1.0
+12 3 1.0
+13 3 1.0
+6 4 1.0
+10 4 1.0
+6 5 1.0
+10 5 1.0
+10 10 1.0
+16 5 1.0
+16 6 1.0
+30 8 1.0
+32 8 1.0
+33 8 1.0
+33 9 1.0
+33 13 1.0
+32 14 1.0
+33 14 1.0
+32 15 1.0
+33 15 1.0
+32 18 1.0
+33 18 1.0
+33 19 1.0
+32 20 1.0
+33 20 1.0
+32 22 1.0
+33 22 1.0
+25 23 1.0
+27 23 1.0
+29 23 1.0
+32 23 1.0
+33 23 1.0
+25 24 1.0
+27 24 1.0
+31 24 1.0
+31 25 1.0
+29 26 1.0
+33 26 1.0
+33 27 1.0
+31 28 1.0
+33 28 1.0
+32 29 1.0
+33 29 1.0
+32 30 1.0
+33 30 1.0
+32 31 1.0
+33 31 1.0
+33 32 1.0
+0 1 1.0
+0 2 1.0
+0 3 1.0
+0 4 1.0
+0 5 1.0
+0 6 1.0
+0 7 1.0
+0 8 1.0
+0 10 1.0
+0 11 1.0
+0 12 1.0
+0 13 1.0
+0 17 1.0
+0 19 1.0
+0 21 1.0
+0 31 1.0
+1 2 1.0
+1 3 1.0
+1 7 1.0
+1 13 1.0
+1 1 1.0
+1 17 1.0
+1 19 1.0
+1 21 1.0
+1 30 1.0
+2 3 1.0
+2 7 1.0
+2 8 1.0
+2 9 1.0
+2 13 1.0
+2 27 1.0
+2 28 1.0
+2 32 1.0
+3 7 1.0
+3 12 1.0
+3 13 1.0
+4 6 1.0
+4 10 1.0
+5 6 1.0
+5 10 1.0
+5 16 1.0
+6 16 1.0
+8 30 1.0
+8 32 1.0
+8 33 1.0
+9 33 1.0
+13 33 1.0
+13 13 1.0
+14 32 1.0
+14 33 1.0
+15 32 1.0
+15 33 1.0
+18 32 1.0
+18 33 1.0
+19 33 1.0
+20 32 1.0
+20 33 1.0
+22 32 1.0
+22 33 1.0
+23 25 1.0
+23 27 1.0
+23 29 1.0
+23 32 1.0
+23 33 1.0
+24 25 1.0
+24 27 1.0
+24 31 1.0
+25 31 1.0
+26 29 1.0
+26 33 1.0
+27 33 1.0
+28 31 1.0
+28 33 1.0
+29 32 1.0
+29 33 1.0
+30 32 1.0
+30 33 1.0
+31 32 1.0
+31 31 1.0
+31 33 1.0
+32 33 1.0
diff --git a/datasets/karate_str.mtx b/datasets/karate_str.mtx
new file mode 100644
index 00000000000..0564d30f91d
--- /dev/null
+++ b/datasets/karate_str.mtx
@@ -0,0 +1,78 @@
+9q a9 1
+ts a9 1
+kt a9 1
+j7 a9 1
+wr a9 1
+n3 a9 1
+2w a9 1
+8a a9 1
+ci a9 1
+cq a9 1
+ca a9 1
+gd a9 1
+y4 a9 1
+kx a9 1 
+u3 a9 1
+id a9 1
+ts 9q 1
+kt 9q 1
+2w 9q 1
+gd 9q 1
+y4 9q 1
+kx 9q 1
+u3 9q 1
+7p 9q 1
+kt ts 1
+2w ts 1
+8a ts 1
+ax ts 1
+gd ts 1
+84 ts 1
+ar ts 1
+05 ts 1
+2w kt 1
+ca kt 1
+gd kt 1
+n3 j7 1
+ci j7 1
+n3 wr 1
+ci wr 1
+27 wr 1
+27 n3 1
+7p 8a 1
+05 8a 1
+ux 8a 1
+ux ax 1
+ux gd 1
+05 r9 1
+ux r9 1
+05 44 1
+ux 44 1
+05 a6 1
+ux a6 1
+ux kx 1
+05 d5 1
+ux d5 1
+05 gk 1
+ux gk 1
+fo em 1
+84 em 1
+wc em 1
+05 em 1
+ux em 1
+fo 1j 1
+84 1j 1
+id 1j 1
+id fo 1
+wc nm 1
+ux nm 1
+ux 84 1
+id ar 1
+ux ar 1
+05 wc 1
+ux wc 1
+05 7p 1
+ux 7p 1
+05 id 1
+ux id 1
+ux 05 1
diff --git a/docs/source/api.rst b/docs/source/api.rst
index dcdf3e6ff33..b02f8f488c5 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -48,6 +48,13 @@ Katz Centrality
     :undoc-members:
 
 
+Katz Centrality (MG)
+--------------------
+
+.. automodule:: cugraph.dask.centrality.katz_centrality
+    :members:
+    :undoc-members:
+
 Community
 =========
 
@@ -86,6 +93,14 @@ Louvain
     :members:
     :undoc-members:
 
+Louvain (MG)
+------------
+
+.. automodule:: cugraph.dask.community.louvain
+    :members:
+    :undoc-members:
+
+
 Spectral Clustering
 -------------------
 
@@ -148,6 +163,17 @@ Force Atlas 2
     :undoc-members:
 
 
+Linear Assignment
+=================
+
+Hungarian
+-------------
+
+.. automodule:: cugraph.linear_assignment.hungarian
+    :members:
+    :undoc-members:
+
+
 Link Analysis
 =============
 
@@ -165,6 +191,13 @@ Pagerank
     :members:
     :undoc-members:
 
+Pagerank (MG)
+---------
+
+.. automodule:: cugraph.dask.link_analysis.pagerank
+    :members: pagerank
+    :undoc-members: 
+
 
 Link Prediction
 ===============
@@ -202,6 +235,13 @@ Breadth-first-search
     :members:
     :undoc-members:
 
+Breadth-first-search (MG)
+--------------------
+
+.. automodule:: cugraph.dask.traversal.bfs
+    :members:
+    :undoc-members:
+
 Single-source-shortest-path
 ---------------------------
 
@@ -209,6 +249,13 @@ Single-source-shortest-path
     :members:
     :undoc-members:
 
+Single-source-shortest-path (MG)
+---------------------------
+
+.. automodule:: cugraph.dask.traversal.sssp
+    :members:
+    :undoc-members:
+
 
 Tree
 =========
@@ -227,3 +274,18 @@ Maximum Spanning Tree
     :members:
     :undoc-members:
 
+
+DASK MG Helper functions 
+===========================
+
+.. automodule:: cugraph.comms.comms
+    :members: initialize
+    :undoc-members:
+
+.. automodule:: cugraph.comms.comms
+    :members: destroy
+    :undoc-members:
+
+.. automodule:: cugraph.dask.common.read_utils
+    :members: get_chunksize
+    :undoc-members:
diff --git a/docs/source/dask-cugraph.rst b/docs/source/dask-cugraph.rst
index b27ad382809..51487bfbf05 100644
--- a/docs/source/dask-cugraph.rst
+++ b/docs/source/dask-cugraph.rst
@@ -13,58 +13,41 @@ With cuGraph and Dask, whether you’re using a single NVIDIA GPU or multiple no
 
 If your graph comfortably fits in memory on a single GPU, you would want to use the single-GPU version of cuGraph. If you want to distribute your workflow across multiple GPUs and have more data than you can fit in memory on a single GPU, you would want to use cuGraph's multi-GPU features.
 
+Example
+========
 
-Distributed Graph Algorithms
-----------------------------
+.. code-block:: python
 
-.. automodule:: cugraph.dask.link_analysis.pagerank
-    :members: pagerank
-    :undoc-members: 
+    from dask.distributed import Client, wait
+    from dask_cuda import LocalCUDACluster
+    import cugraph.comms as Comms
+    import cugraph.dask as dask_cugraph
 
-.. automodule:: cugraph.dask.traversal.bfs
-    :members: bfs
-    :undoc-members: 
+    cluster = LocalCUDACluster()
+    client = Client(cluster)
+    Comms.initialize(p2p=True)
 
+    # Helper function to set the reader chunk size to automatically get one partition per GPU  
+    chunksize = dask_cugraph.get_chunksize(input_data_path)
 
-Helper functions 
-----------------
+    # Multi-GPU CSV reader
+    e_list = dask_cudf.read_csv(input_data_path, 
+            chunksize = chunksize, 
+            delimiter=' ', 
+            names=['src', 'dst'], 
+            dtype=['int32', 'int32'])
 
-.. automodule:: cugraph.comms.comms
-    :members: initialize
-    :undoc-members:
+    G = cugraph.DiGraph()
+    G.from_dask_cudf_edgelist(e_list, source='src', destination='dst')
 
-.. automodule:: cugraph.comms.comms
-    :members: destroy
-    :undoc-members:
+    # now run PageRank
+    pr_df = dask_cugraph.pagerank(G, tol=1e-4)
 
-.. automodule:: cugraph.dask.common.read_utils
-    :members: get_chunksize
-    :undoc-members:
+    # All done, clean up
+    Comms.destroy()
+    client.close()
+    cluster.close()
 
-Consolidation
-=============
 
-cuGraph can transparently interpret the Dask cuDF Dataframe as a regular Dataframe when loading the edge list. This is particularly helpful for workflows extracting a single GPU sized edge list from a distributed dataset. From there any existing single GPU feature will just work on this input.
+|
 
-For instance, consolidation allows leveraging Dask cuDF CSV reader to load file(s) on multiple GPUs and consolidate this input to a single GPU graph. Reading is often the time and memory bottleneck, with this feature users can call the Multi-GPU version of the reader without changing anything else. 
-
-Batch Processing
-================
-
-cuGraph can leverage multi GPUs to increase processing speed for graphs that fit on a single GPU, providing faster analytics on such graphs.
-You will be able to use the Graph the same way as you used to in a Single GPU environment, but analytics that support batch processing will automatically use the GPUs available to the dask client.
-For example, Betweenness Centrality scores can be slow to obtain depending on the number of vertices used in the approximation. Thank to Multi GPUs Batch Processing,
-you can create Single GPU graph as you would regularly do it using cuDF CSV reader, enable Batch analytics on it, and obtain scores much faster as each GPU will handle a sub-set of the sources.
-In order to use Batch Analytics you need to set up a Dask Cluster and Client in addition to the cuGraph communicator, then you can simply call `enable_batch()` on you graph, and algorithms supporting batch processing will use multiple GPUs.
-
-Algorithms supporting Batch Processing
---------------------------------------
-.. automodule:: cugraph.centrality
-    :members: betweenness_centrality
-    :undoc-members:
-    :noindex:
-
-.. automodule:: cugraph.centrality
-    :members: edge_betweenness_centrality
-    :undoc-members:
-    :noindex:
diff --git a/python/cugraph/bsp/traversal/bfs_bsp.py b/python/cugraph/bsp/traversal/bfs_bsp.py
index 28a71631443..9a2fd48e201 100644
--- a/python/cugraph/bsp/traversal/bfs_bsp.py
+++ b/python/cugraph/bsp/traversal/bfs_bsp.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -11,7 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-
+import warnings
 import cudf
 from collections import OrderedDict
 
@@ -59,6 +59,12 @@ def bfs_df_pregel(_df, start, src_col='src', dst_col='dst', copy_data=True):
 
     """
 
+    warnings.warn(
+            "This feature is deprecated and will be"
+            "dropped from cuGraph in release 0.20.",
+            FutureWarning,
+        )
+
     # extract the src and dst into a dataframe that can be modified
     if copy_data:
         coo_data = _df[[src_col, dst_col]]
diff --git a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx
index e3d6e04006f..855de3327ba 100644
--- a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx
+++ b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,18 +17,12 @@
 # cython: language_level = 3
 
 from cugraph.centrality.betweenness_centrality cimport betweenness_centrality as c_betweenness_centrality
-from cugraph.centrality.betweenness_centrality cimport handle_t
 from cugraph.structure.graph import DiGraph
 from cugraph.structure.graph_primtypes cimport *
 from libc.stdint cimport uintptr_t
 from libcpp cimport bool
 import cudf
 import numpy as np
-import numpy.ctypeslib as ctypeslib
-
-import dask_cudf
-import dask_cuda
-
 import cugraph.comms.comms as Comms
 from cugraph.dask.common.mg_utils import get_client
 import dask.distributed
diff --git a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx
index 3c14d590750..136bde1b0e3 100644
--- a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx
+++ b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -24,8 +24,6 @@ from libc.stdint cimport uintptr_t
 from libcpp cimport bool
 import cudf
 import numpy as np
-import numpy.ctypeslib as ctypeslib
-
 from cugraph.dask.common.mg_utils import get_client
 import cugraph.comms.comms as Comms
 import dask.distributed
diff --git a/python/cugraph/centrality/katz_centrality.pxd b/python/cugraph/centrality/katz_centrality.pxd
index ebf94c78263..ce9ab5291f6 100644
--- a/python/cugraph/centrality/katz_centrality.pxd
+++ b/python/cugraph/centrality/katz_centrality.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 
 cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
diff --git a/python/cugraph/centrality/katz_centrality_wrapper.pyx b/python/cugraph/centrality/katz_centrality_wrapper.pyx
index 088042395fd..d38a0b82824 100644
--- a/python/cugraph/centrality/katz_centrality_wrapper.pyx
+++ b/python/cugraph/centrality/katz_centrality_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,13 +17,10 @@
 # cython: language_level = 3
 
 from cugraph.centrality.katz_centrality cimport call_katz_centrality
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from cugraph.structure import graph_primtypes_wrapper
-from libcpp cimport bool
 from libc.stdint cimport uintptr_t
-
 import cudf
-import rmm
 import numpy as np
 
 
diff --git a/python/cugraph/comms/comms.pxd b/python/cugraph/comms/comms.pxd
index 44f7ee77562..3984ade9a9c 100644
--- a/python/cugraph/comms/comms.pxd
+++ b/python/cugraph/comms/comms.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
-from cugraph.structure.graph_primtypes cimport handle_t
+from cugraph.raft.common.handle cimport *
 
 
 cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
diff --git a/python/cugraph/comms/comms_wrapper.pyx b/python/cugraph/comms/comms_wrapper.pyx
index c1148b4c887..09fa3b1c5c7 100644
--- a/python/cugraph/comms/comms_wrapper.pyx
+++ b/python/cugraph/comms/comms_wrapper.pyx
@@ -1,5 +1,23 @@
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
 
-from cugraph.structure.graph_primtypes cimport handle_t
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+
+from cugraph.raft.common.handle cimport *
 from cugraph.comms.comms cimport init_subcomms as c_init_subcomms
 
 
diff --git a/python/cugraph/community/egonet.pxd b/python/cugraph/community/egonet.pxd
index 3ddf929674f..cf1c84fb5f7 100644
--- a/python/cugraph/community/egonet.pxd
+++ b/python/cugraph/community/egonet.pxd
@@ -12,7 +12,7 @@
 # limitations under the License.
 
 
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 
 cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
     cdef unique_ptr[cy_multi_edgelists_t] call_egonet[vertex_t, weight_t](
diff --git a/python/cugraph/community/egonet.py b/python/cugraph/community/egonet.py
index 9ff12158b13..ca3c6149ece 100644
--- a/python/cugraph/community/egonet.py
+++ b/python/cugraph/community/egonet.py
@@ -74,6 +74,17 @@ def ego_graph(G, n, radius=1, center=True, undirected=False, distance=None):
     G_ego : cuGraph.Graph or networkx.Graph
         A graph descriptor with a minimum spanning tree or forest.
         The networkx graph will not have all attributes copied over
+
+    Examples
+    --------
+    >>> M = cudf.read_csv('datasets/karate.csv',
+                          delimiter = ' ',
+                          dtype=['int32', 'int32', 'float32'],
+                          header=None)
+    >>> G = cugraph.Graph()
+    >>> G.from_cudf_edgelist(M, source='0', destination='1')
+    >>> ego_graph = cugraph.ego_graph(G, seed, radius=2)
+
     """
 
     (G, input_type) = ensure_cugraph_obj(G, nx_weight_attr="weight")
diff --git a/python/cugraph/community/egonet_wrapper.pyx b/python/cugraph/community/egonet_wrapper.pyx
index 122dedbfabd..ff9f2b8b3de 100644
--- a/python/cugraph/community/egonet_wrapper.pyx
+++ b/python/cugraph/community/egonet_wrapper.pyx
@@ -12,14 +12,12 @@
 # limitations under the License.
 
 from cugraph.community.egonet cimport call_egonet
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t
 from cugraph.structure import graph_primtypes_wrapper
 import cudf
-import rmm
 import numpy as np
-import numpy.ctypeslib as ctypeslib
 from rmm._lib.device_buffer cimport DeviceBuffer
 from cudf.core.buffer import Buffer
 
@@ -58,9 +56,11 @@ def egonet(input_graph, vertices, radius=1):
     # Pointers for egonet
     cdef uintptr_t c_source_vertex_ptr = vertices.__cuda_array_interface__['data'][0]
     n_subgraphs = vertices.size
-
+    n_streams = 1
+    if n_subgraphs > 1 :
+        n_streams = min(n_subgraphs, 32)
     cdef unique_ptr[handle_t] handle_ptr
-    handle_ptr.reset(new handle_t())
+    handle_ptr.reset(new handle_t(n_streams))
     handle_ = handle_ptr.get();
 
     cdef graph_container_t graph_container
diff --git a/python/cugraph/community/ktruss_subgraph_wrapper.pyx b/python/cugraph/community/ktruss_subgraph_wrapper.pyx
index 9f8138f4d57..9f38b33d774 100644
--- a/python/cugraph/community/ktruss_subgraph_wrapper.pyx
+++ b/python/cugraph/community/ktruss_subgraph_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,12 +19,6 @@
 from cugraph.community.ktruss_subgraph cimport *
 from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
-from libcpp cimport bool
-from libc.stdint cimport uintptr_t
-from libc.float cimport FLT_MAX_EXP
-
-import cudf
-import rmm
 import numpy as np
 
 
diff --git a/python/cugraph/community/leiden_wrapper.pyx b/python/cugraph/community/leiden_wrapper.pyx
index 70fcfcf701b..1b41134c625 100644
--- a/python/cugraph/community/leiden_wrapper.pyx
+++ b/python/cugraph/community/leiden_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -22,7 +22,6 @@ from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 
 import cudf
-import rmm
 import numpy as np
 
 
diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd
index eca15ba3d20..1f75c13dbaf 100644
--- a/python/cugraph/community/louvain.pxd
+++ b/python/cugraph/community/louvain.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -18,7 +18,7 @@
 
 
 from libcpp.utility cimport pair
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 
 
 cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx
index 6b218a0b962..c7ce4e8db66 100644
--- a/python/cugraph/community/louvain_wrapper.pyx
+++ b/python/cugraph/community/louvain_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,12 +17,11 @@
 # cython: language_level = 3
 
 from cugraph.community cimport louvain as c_louvain
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 
 import cudf
-import rmm
 import numpy as np
 
 
diff --git a/python/cugraph/community/spectral_clustering_wrapper.pyx b/python/cugraph/community/spectral_clustering_wrapper.pyx
index 0593d987c0d..7934a386bb7 100644
--- a/python/cugraph/community/spectral_clustering_wrapper.pyx
+++ b/python/cugraph/community/spectral_clustering_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -23,12 +23,9 @@ from cugraph.community.spectral_clustering cimport analyzeClustering_edge_cut as
 from cugraph.community.spectral_clustering cimport analyzeClustering_ratio_cut as c_analyze_clustering_ratio_cut
 from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
-from libcpp cimport bool
 from libc.stdint cimport uintptr_t
-
 import cugraph
 import cudf
-import rmm
 import numpy as np
 
 
diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx
index 35b3c743987..31c5d2372f0 100644
--- a/python/cugraph/community/subgraph_extraction_wrapper.pyx
+++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx
@@ -20,9 +20,7 @@ from cugraph.community.subgraph_extraction cimport extract_subgraph_vertex as c_
 from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
-
 import cudf
-import rmm
 import numpy as np
 
 
diff --git a/python/cugraph/community/triangle_count_wrapper.pyx b/python/cugraph/community/triangle_count_wrapper.pyx
index d7cabd4676f..f1e842f9de4 100644
--- a/python/cugraph/community/triangle_count_wrapper.pyx
+++ b/python/cugraph/community/triangle_count_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -21,9 +21,7 @@ from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 import numpy as np
-
 import cudf
-import rmm
 
 
 def triangles(input_graph):
diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py
index 7c68afd7ced..72f33ebfcbb 100644
--- a/python/cugraph/components/connectivity.py
+++ b/python/cugraph/components/connectivity.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -378,7 +378,7 @@ def connected_components(G,
                           header=None)
     >>> G = cugraph.Graph()
     >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr=None)
-    >>> df = cugraph.strongly_connected_components(G)
+    >>> df = cugraph.connected_components(G, connection="weak")
     """
     if connection == "weak":
         return weakly_connected_components(G, directed,
diff --git a/python/cugraph/components/connectivity_wrapper.pyx b/python/cugraph/components/connectivity_wrapper.pyx
index 8b678d16ff8..76d279a8116 100644
--- a/python/cugraph/components/connectivity_wrapper.pyx
+++ b/python/cugraph/components/connectivity_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -23,7 +23,6 @@ from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 from cugraph.structure.symmetrize import symmetrize
 from cugraph.structure.graph import Graph as type_Graph
-
 import cudf
 import numpy as np
 
diff --git a/python/cugraph/cores/core_number_wrapper.pyx b/python/cugraph/cores/core_number_wrapper.pyx
index 3df1df5f8e9..9fcc3b4746c 100644
--- a/python/cugraph/cores/core_number_wrapper.pyx
+++ b/python/cugraph/cores/core_number_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -20,9 +20,7 @@ cimport cugraph.cores.core_number as c_core
 from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
-
 import cudf
-import rmm
 import numpy as np
 
 
diff --git a/python/cugraph/cores/k_core_wrapper.pyx b/python/cugraph/cores/k_core_wrapper.pyx
index 51ecec09dc5..a0ef99a8e8b 100644
--- a/python/cugraph/cores/k_core_wrapper.pyx
+++ b/python/cugraph/cores/k_core_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,12 +19,7 @@
 from cugraph.cores.k_core cimport k_core as c_k_core
 from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
-from libcpp cimport bool
 from libc.stdint cimport uintptr_t
-from libc.float cimport FLT_MAX_EXP
-
-import cudf
-import rmm
 import numpy as np
 
 
diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py
index cf6ad95f974..e690e291928 100644
--- a/python/cugraph/dask/centrality/katz_centrality.py
+++ b/python/cugraph/dask/centrality/katz_centrality.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -115,7 +115,8 @@ def katz_centrality(input_graph,
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize(p2p=True)
+    >>> ... Init a DASK Cluster
+    >>    see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
                                  delimiter=' ',
@@ -125,7 +126,6 @@ def katz_centrality(input_graph,
     >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst',
                                    edge_attr='value')
     >>> pr = dcg.katz_centrality(dg)
-    >>> Comms.destroy()
     """
 
     nstart = None
diff --git a/python/cugraph/dask/centrality/mg_katz_centrality.pxd b/python/cugraph/dask/centrality/mg_katz_centrality.pxd
index 345457b1963..fb1730da13b 100644
--- a/python/cugraph/dask/centrality/mg_katz_centrality.pxd
+++ b/python/cugraph/dask/centrality/mg_katz_centrality.pxd
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 
 
diff --git a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx
index b8cab4e4286..ccae26fe7e6 100644
--- a/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx
+++ b/python/cugraph/dask/centrality/mg_katz_centrality_wrapper.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 from cugraph.structure.utils_wrapper import *
 from cugraph.dask.centrality cimport mg_katz_centrality as c_katz_centrality
 import cudf
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 from cython.operator cimport dereference as deref
diff --git a/python/cugraph/dask/common/part_utils.py b/python/cugraph/dask/common/part_utils.py
index 505272fa563..ac0ff6a9a43 100644
--- a/python/cugraph/dask/common/part_utils.py
+++ b/python/cugraph/dask/common/part_utils.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -83,7 +83,9 @@ async def _extract_partitions(dask_obj, client=None):
     client = default_client() if client is None else client
     # dask.dataframe or dask.array
     if isinstance(dask_obj, (daskDataFrame, daskArray, daskSeries)):
-        parts = persist_distributed_data(dask_obj, client)
+        # parts = persist_distributed_data(dask_obj, client)
+        persisted = client.persist(dask_obj)
+        parts = futures_of(persisted)
     # iterable of dask collections (need to colocate them)
     elif isinstance(dask_obj, collections.Sequence):
         # NOTE: We colocate (X, y) here by zipping delayed
diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd
index b6b4cd23143..738309dac8a 100644
--- a/python/cugraph/dask/community/louvain.pxd
+++ b/python/cugraph/dask/community/louvain.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,7 +17,7 @@
 # cython: language_level = 3
 
 from libcpp.utility cimport pair
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 
 
 cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py
index 11ecb78375f..495061c0f81 100644
--- a/python/cugraph/dask/community/louvain.py
+++ b/python/cugraph/dask/community/louvain.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -55,7 +55,8 @@ def louvain(input_graph, max_iter=100, resolution=1.0):
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize(p2p=True)
+    >>> ... Init a DASK Cluster
+    >>    see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv('datasets/karate.csv', chunksize=chunksize,
                                  delimiter=' ',
diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx
index c2a12cf81f3..f58630d07aa 100644
--- a/python/cugraph/dask/community/louvain_wrapper.pyx
+++ b/python/cugraph/dask/community/louvain_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,8 +19,7 @@
 from libc.stdint cimport uintptr_t
 
 from cugraph.dask.community cimport louvain as c_louvain
-from cugraph.structure.graph_primtypes cimport *
-
+from cugraph.structure.graph_utilities cimport *
 import cudf
 import numpy as np
 
diff --git a/python/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/dask/link_analysis/mg_pagerank.pxd
index 91104d9127c..55bbc0dba7e 100644
--- a/python/cugraph/dask/link_analysis/mg_pagerank.pxd
+++ b/python/cugraph/dask/link_analysis/mg_pagerank.pxd
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 
 
@@ -31,4 +31,4 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
         double alpha,
         double tolerance,
         long long max_iter,
-        bool has_guess) except +
\ No newline at end of file
+        bool has_guess) except +
diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx
index 1cd80397b17..12f2342559b 100644
--- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx
+++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 from cugraph.structure.utils_wrapper import *
 from cugraph.dask.link_analysis cimport mg_pagerank as c_pagerank
 import cudf
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 from cython.operator cimport dereference as deref
diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py
index 1e9d79e0aa6..d8a76f1231e 100644
--- a/python/cugraph/dask/link_analysis/pagerank.py
+++ b/python/cugraph/dask/link_analysis/pagerank.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -105,7 +105,8 @@ def pagerank(input_graph,
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize(p2p=True)
+    >>> ... Init a DASK Cluster
+    >>    see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
                                  delimiter=' ',
@@ -115,7 +116,6 @@ def pagerank(input_graph,
     >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst',
                                    edge_attr='value')
     >>> pr = dcg.pagerank(dg)
-    >>> Comms.destroy()
     """
     from cugraph.structure.graph import null_check
 
diff --git a/python/cugraph/dask/structure/renumber.py b/python/cugraph/dask/structure/renumber.py
deleted file mode 100644
index 606a6bc4dc1..00000000000
--- a/python/cugraph/dask/structure/renumber.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# Copyright (c) 2021, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-from dask.distributed import wait, default_client
-from cugraph.dask.common.input_utils import get_distributed_data
-from cugraph.dask.structure import renumber_wrapper as renumber_w
-import cugraph.comms.comms as Comms
-import dask_cudf
-
-
-def call_renumber(sID,
-                  data,
-                  num_verts,
-                  num_edges,
-                  is_mnmg):
-    wid = Comms.get_worker_id(sID)
-    handle = Comms.get_handle(sID)
-    return renumber_w.mg_renumber(data[0],
-                                  num_verts,
-                                  num_edges,
-                                  wid,
-                                  handle,
-                                  is_mnmg)
-
-
-def renumber(input_graph):
-
-    client = default_client()
-
-    ddf = input_graph.edgelist.edgelist_df
-
-    num_edges = len(ddf)
-
-    if isinstance(ddf, dask_cudf.DataFrame):
-        is_mnmg = True
-    else:
-        is_mnmg = False
-
-    num_verts = input_graph.number_of_vertices()
-
-    if is_mnmg:
-        data = get_distributed_data(ddf)
-        result = [client.submit(call_renumber,
-                                Comms.get_session_id(),
-                                wf[1],
-                                num_verts,
-                                num_edges,
-                                is_mnmg,
-                                workers=[wf[0]])
-                  for idx, wf in enumerate(data.worker_to_parts.items())]
-        wait(result)
-        ddf = dask_cudf.from_delayed(result)
-    else:
-        call_renumber(Comms.get_session_id(),
-                      ddf,
-                      num_verts,
-                      num_edges,
-                      is_mnmg)
-    return ddf
diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py
index 7a2c50a3bc0..51e0dc0de5d 100644
--- a/python/cugraph/dask/traversal/bfs.py
+++ b/python/cugraph/dask/traversal/bfs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -76,7 +76,8 @@ def bfs(graph,
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize(p2p=True)
+    >>> ... Init a DASK Cluster
+    >>    see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
                                  delimiter=' ',
@@ -85,7 +86,6 @@ def bfs(graph,
     >>> dg = cugraph.DiGraph()
     >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst')
     >>> df = dcg.bfs(dg, 0)
-    >>> Comms.destroy()
     """
 
     client = default_client()
diff --git a/python/cugraph/dask/traversal/mg_bfs.pxd b/python/cugraph/dask/traversal/mg_bfs.pxd
index 82c6e97d668..afd209158c4 100644
--- a/python/cugraph/dask/traversal/mg_bfs.pxd
+++ b/python/cugraph/dask/traversal/mg_bfs.pxd
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,7 +14,7 @@
 # limitations under the License.
 #
 
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 
 
diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx
index c92f28eb407..527cb2bcf0a 100644
--- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx
+++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 from cugraph.structure.utils_wrapper import *
 from cugraph.dask.traversal cimport mg_bfs as c_bfs
 import cudf
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 
diff --git a/python/cugraph/dask/traversal/mg_sssp.pxd b/python/cugraph/dask/traversal/mg_sssp.pxd
index f846facd269..d56575da567 100644
--- a/python/cugraph/dask/traversal/mg_sssp.pxd
+++ b/python/cugraph/dask/traversal/mg_sssp.pxd
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -13,7 +13,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 
 
diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx
index b7aec103098..15d956836b4 100644
--- a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx
+++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,7 +17,7 @@
 from cugraph.structure.utils_wrapper import *
 from cugraph.dask.traversal cimport mg_sssp as c_sssp
 import cudf
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 
diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py
index ce0c7908664..52f2b9b256c 100644
--- a/python/cugraph/dask/traversal/sssp.py
+++ b/python/cugraph/dask/traversal/sssp.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -76,7 +76,8 @@ def sssp(graph,
     Examples
     --------
     >>> import cugraph.dask as dcg
-    >>> Comms.initialize(p2p=True)
+    >>> ... Init a DASK Cluster
+    >>    see https://docs.rapids.ai/api/cugraph/stable/dask-cugraph.html
     >>> chunksize = dcg.get_chunksize(input_data_path)
     >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize,
                                  delimiter=' ',
@@ -85,7 +86,6 @@ def sssp(graph,
     >>> dg = cugraph.DiGraph()
     >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst')
     >>> df = dcg.sssp(dg, 0)
-    >>> Comms.destroy()
     """
 
     client = default_client()
diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx
index 785ddda47bd..4515c577f78 100644
--- a/python/cugraph/layout/force_atlas2_wrapper.pyx
+++ b/python/cugraph/layout/force_atlas2_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,15 +19,11 @@
 from cugraph.layout.force_atlas2 cimport force_atlas2 as c_force_atlas2
 from cugraph.structure import graph_primtypes_wrapper
 from cugraph.structure.graph_primtypes cimport *
-from cugraph.structure import utils_wrapper
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t
-
 import cudf
-import cudf._lib as libcudf
 from numba import cuda
 import numpy as np
-import numpy.ctypeslib as ctypeslib
 
 cdef extern from "internals.hpp" namespace "cugraph::internals":
     cdef cppclass GraphBasedDimRedCallback
diff --git a/python/cugraph/linear_assignment/lap_wrapper.pyx b/python/cugraph/linear_assignment/lap_wrapper.pyx
index 0769ef42f0f..7cd2124b8d9 100644
--- a/python/cugraph/linear_assignment/lap_wrapper.pyx
+++ b/python/cugraph/linear_assignment/lap_wrapper.pyx
@@ -21,11 +21,10 @@ from cugraph.linear_assignment.lap cimport dense_hungarian as c_dense_hungarian
 from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
-from cugraph.structure.graph import Graph as type_Graph
-
 import cudf
 import numpy as np
 
+
 def sparse_hungarian(input_graph, workers):
     """
     Call the hungarian algorithm
diff --git a/python/cugraph/link_analysis/hits_wrapper.pyx b/python/cugraph/link_analysis/hits_wrapper.pyx
index 3e19e38a023..2a2d33dea0b 100644
--- a/python/cugraph/link_analysis/hits_wrapper.pyx
+++ b/python/cugraph/link_analysis/hits_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -18,13 +18,10 @@
 
 from cugraph.link_analysis.hits cimport hits as c_hits
 from cugraph.structure.graph_primtypes cimport *
-from libcpp cimport bool
 from libc.stdint cimport uintptr_t
 from cugraph.structure import graph_primtypes_wrapper
 import cudf
-import rmm
 import numpy as np
-import numpy.ctypeslib as ctypeslib
 
 
 def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True):
@@ -48,8 +45,6 @@ def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True):
     df['hubs'] = cudf.Series(np.zeros(num_verts, dtype=np.float32))
     df['authorities'] = cudf.Series(np.zeros(num_verts, dtype=np.float32))
 
-    #cdef bool normalized = <bool> 1
-
     cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0];
     cdef uintptr_t c_hubs = df['hubs'].__cuda_array_interface__['data'][0];
     cdef uintptr_t c_authorities = df['authorities'].__cuda_array_interface__['data'][0];
diff --git a/python/cugraph/link_analysis/pagerank.pxd b/python/cugraph/link_analysis/pagerank.pxd
index 79cb033f74b..2c8bea12016 100644
--- a/python/cugraph/link_analysis/pagerank.pxd
+++ b/python/cugraph/link_analysis/pagerank.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 
 
diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx
index fea1939db6a..81a68d42360 100644
--- a/python/cugraph/link_analysis/pagerank_wrapper.pyx
+++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx
@@ -16,16 +16,13 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
-#cimport cugraph.link_analysis.pagerank as c_pagerank
 from cugraph.link_analysis.pagerank cimport call_pagerank
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t
 from cugraph.structure import graph_primtypes_wrapper
 import cudf
-import rmm
 import numpy as np
-import numpy.ctypeslib as ctypeslib
 
 
 def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-5, nstart=None):
diff --git a/python/cugraph/link_prediction/jaccard_wrapper.pyx b/python/cugraph/link_prediction/jaccard_wrapper.pyx
index cacd13dec65..8d236c60ee2 100644
--- a/python/cugraph/link_prediction/jaccard_wrapper.pyx
+++ b/python/cugraph/link_prediction/jaccard_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -21,8 +21,6 @@ from cugraph.link_prediction.jaccard cimport jaccard_list as c_jaccard_list
 from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
-from cython cimport floating
-
 import cudf
 import numpy as np
 
diff --git a/python/cugraph/link_prediction/overlap_wrapper.pyx b/python/cugraph/link_prediction/overlap_wrapper.pyx
index 9e2f3ba49d7..4cb17aa21a6 100644
--- a/python/cugraph/link_prediction/overlap_wrapper.pyx
+++ b/python/cugraph/link_prediction/overlap_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -21,8 +21,6 @@ from cugraph.link_prediction.overlap cimport overlap_list as c_overlap_list
 from cugraph.structure.graph_primtypes cimport *
 from cugraph.structure import graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
-from cython cimport floating
-
 import cudf
 import numpy as np
 
diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd
index 07132df2598..1e0d9626727 100644
--- a/python/cugraph/structure/graph_primtypes.pxd
+++ b/python/cugraph/structure/graph_primtypes.pxd
@@ -20,13 +20,9 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport pair
 from libcpp.vector cimport vector
-
+from cugraph.raft.common.handle cimport *
 from rmm._lib.device_buffer cimport device_buffer
 
-cdef extern from "raft/handle.hpp" namespace "raft":
-    cdef cppclass handle_t:
-        handle_t() except +
-
 cdef extern from "graph.hpp" namespace "cugraph":
 
     ctypedef enum PropType:
@@ -127,7 +123,6 @@ cdef extern from "graph.hpp" namespace "cugraph":
         GraphCSRView[VT,ET,WT] view()
 
 
-
 cdef extern from "algorithms.hpp" namespace "cugraph":
 
     cdef unique_ptr[GraphCOO[VT, ET, WT]] get_two_hop_neighbors[VT,ET,WT](
@@ -144,89 +139,6 @@ cdef extern from "functions.hpp" namespace "cugraph":
         ET *map_size) except +
 
 
-# renumber_edgelist() interface:
-#
-#
-# 1. `cdef extern partition_t`:
-#
-cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental":
-
-    cdef cppclass partition_t[vertex_t]:
-        pass
-
-
-# 2. return type for shuffle:
-#
-cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
-
-    cdef cppclass major_minor_weights_t[vertex_t, weight_t]:
-        major_minor_weights_t(const handle_t &handle)
-        pair[unique_ptr[device_buffer], size_t] get_major_wrap()
-        pair[unique_ptr[device_buffer], size_t] get_minor_wrap()
-        pair[unique_ptr[device_buffer], size_t] get_weights_wrap()
-
-
-ctypedef fused shuffled_vertices_t:
-    major_minor_weights_t[int, float]
-    major_minor_weights_t[int, double]
-    major_minor_weights_t[long, float]
-    major_minor_weights_t[long, double]
-    
-# 3. return type for renumber:
-#
-cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
-
-    cdef cppclass renum_quad_t[vertex_t, edge_t]:
-        renum_quad_t(const handle_t &handle)
-        pair[unique_ptr[device_buffer], size_t] get_dv_wrap()
-        vertex_t& get_num_vertices()
-        edge_t& get_num_edges()
-        int get_part_row_size()
-        int get_part_col_size()
-        int get_part_comm_rank()
-        unique_ptr[vector[vertex_t]] get_partition_offsets()
-        pair[vertex_t, vertex_t] get_part_local_vertex_range()
-        vertex_t get_part_local_vertex_first()
-        vertex_t get_part_local_vertex_last()
-        pair[vertex_t, vertex_t] get_part_vertex_partition_range(size_t vertex_partition_idx)
-        vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx)
-        vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx)
-        vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx)
-        size_t get_part_number_of_matrix_partitions()
-        vertex_t get_part_matrix_partition_major_first(size_t partition_idx)
-        vertex_t get_part_matrix_partition_major_last(size_t partition_idx)
-        vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx)
-        pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range()
-        vertex_t get_part_matrix_partition_minor_first()
-        vertex_t get_part_matrix_partition_minor_last()        
-
-# 4. `groupby_gpuid_and_shuffle_values()` wrapper:
-#
-cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
-
-    cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t](
-        const handle_t &handle,
-        vertex_t *edgelist_major_vertices,
-        vertex_t *edgelist_minor_vertices,
-        weight_t* edgelist_weights,
-        edge_t num_edges,
-        bool is_hyper_partitioned) except +
-
-
-# 5. `renumber_edgelist()` wrapper
-#
-cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
-
-    cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t](
-        const handle_t &handle,
-        vertex_t *edgelist_major_vertices,
-        vertex_t *edgelist_minor_vertices,
-        edge_t num_edges,
-        bool is_hyper_partitioned,
-        bool do_check,
-        bool multi_gpu) except +
-
-
 cdef extern from "<utility>" namespace "std" nogil:
     cdef unique_ptr[GraphCOO[int,int,float]] move(unique_ptr[GraphCOO[int,int,float]])
     cdef unique_ptr[GraphCOO[int,int,double]] move(unique_ptr[GraphCOO[int,int,double]])
@@ -275,67 +187,3 @@ ctypedef fused GraphViewType:
 cdef coo_to_df(GraphCOOPtrType graph)
 cdef csr_to_series(GraphCSRPtrType graph)
 cdef GraphViewType get_graph_view(input_graph, bool weightless=*, GraphViewType* dummy=*)
-
-
-# C++ utilities specifically for Cython
-cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
-
-    ctypedef enum numberTypeEnum:
-        int32Type "cugraph::cython::numberTypeEnum::int32Type"
-        int64Type "cugraph::cython::numberTypeEnum::int64Type"
-        floatType "cugraph::cython::numberTypeEnum::floatType"
-        doubleType "cugraph::cython::numberTypeEnum::doubleType"
-
-    cdef cppclass graph_container_t:
-       pass
-
-    cdef void populate_graph_container(
-        graph_container_t &graph_container,
-        handle_t &handle,
-        void *src_vertices,
-        void *dst_vertices,
-        void *weights,
-        void *vertex_partition_offsets,
-        numberTypeEnum vertexType,
-        numberTypeEnum edgeType,
-        numberTypeEnum weightType,
-        size_t num_partition_edges,
-        size_t num_global_vertices,
-        size_t num_global_edges,
-        bool sorted_by_degree,
-        bool transposed,
-        bool multi_gpu) except +
-
-    ctypedef enum graphTypeEnum:
-        LegacyCSR "cugraph::cython::graphTypeEnum::LegacyCSR"
-        LegacyCSC "cugraph::cython::graphTypeEnum::LegacyCSC"
-        LegacyCOO "cugraph::cython::graphTypeEnum::LegacyCOO"
-
-    cdef void populate_graph_container_legacy(
-        graph_container_t &graph_container,
-        graphTypeEnum legacyType,
-        const handle_t &handle,
-        void *offsets,
-        void *indices,
-        void *weights,
-        numberTypeEnum offsetType,
-        numberTypeEnum indexType,
-        numberTypeEnum weightType,
-        size_t num_global_vertices,
-        size_t num_global_edges,
-        int *local_vertices,
-        int *local_edges,
-        int *local_offsets) except +
-
-    cdef cppclass cy_multi_edgelists_t:
-        size_t number_of_vertices
-        size_t number_of_edges
-        size_t number_of_subgraph
-        unique_ptr[device_buffer] src_indices
-        unique_ptr[device_buffer] dst_indices
-        unique_ptr[device_buffer] edge_data
-        unique_ptr[device_buffer] subgraph_offsets
-
-cdef extern from "<utility>" namespace "std" nogil:
-    cdef cy_multi_edgelists_t move(cy_multi_edgelists_t)
-    cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t])
diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd
new file mode 100644
index 00000000000..10c90f44cb8
--- /dev/null
+++ b/python/cugraph/structure/graph_utilities.pxd
@@ -0,0 +1,173 @@
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# cython: profile=False
+# distutils: language = c++
+# cython: embedsignature = True
+# cython: language_level = 3
+
+
+from cugraph.raft.common.handle cimport *
+from libcpp cimport bool
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport pair
+from libcpp.vector cimport vector
+from rmm._lib.device_buffer cimport device_buffer
+
+# C++ graph utilities
+cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
+
+    ctypedef enum numberTypeEnum:
+        int32Type "cugraph::cython::numberTypeEnum::int32Type"
+        int64Type "cugraph::cython::numberTypeEnum::int64Type"
+        floatType "cugraph::cython::numberTypeEnum::floatType"
+        doubleType "cugraph::cython::numberTypeEnum::doubleType"
+
+    cdef cppclass graph_container_t:
+       pass
+
+    cdef void populate_graph_container(
+        graph_container_t &graph_container,
+        handle_t &handle,
+        void *src_vertices,
+        void *dst_vertices,
+        void *weights,
+        void *vertex_partition_offsets,
+        numberTypeEnum vertexType,
+        numberTypeEnum edgeType,
+        numberTypeEnum weightType,
+        size_t num_partition_edges,
+        size_t num_global_vertices,
+        size_t num_global_edges,
+        bool sorted_by_degree,
+        bool transposed,
+        bool multi_gpu) except +
+
+    ctypedef enum graphTypeEnum:
+        LegacyCSR "cugraph::cython::graphTypeEnum::LegacyCSR"
+        LegacyCSC "cugraph::cython::graphTypeEnum::LegacyCSC"
+        LegacyCOO "cugraph::cython::graphTypeEnum::LegacyCOO"
+
+    cdef void populate_graph_container_legacy(
+        graph_container_t &graph_container,
+        graphTypeEnum legacyType,
+        const handle_t &handle,
+        void *offsets,
+        void *indices,
+        void *weights,
+        numberTypeEnum offsetType,
+        numberTypeEnum indexType,
+        numberTypeEnum weightType,
+        size_t num_global_vertices,
+        size_t num_global_edges,
+        int *local_vertices,
+        int *local_edges,
+        int *local_offsets) except +
+
+    cdef cppclass cy_multi_edgelists_t:
+        size_t number_of_vertices
+        size_t number_of_edges
+        size_t number_of_subgraph
+        unique_ptr[device_buffer] src_indices
+        unique_ptr[device_buffer] dst_indices
+        unique_ptr[device_buffer] edge_data
+        unique_ptr[device_buffer] subgraph_offsets
+
+cdef extern from "<utility>" namespace "std" nogil:
+    cdef device_buffer move(device_buffer)
+    cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) 
+    cdef cy_multi_edgelists_t move(cy_multi_edgelists_t)
+    cdef unique_ptr[cy_multi_edgelists_t] move(unique_ptr[cy_multi_edgelists_t])
+    #cdef device_buffer move(device_buffer)
+    #cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer])
+
+# renumber_edgelist() interface utilities:
+#
+#
+# 1. `cdef extern partition_t`:
+#
+cdef extern from "experimental/graph_view.hpp" namespace "cugraph::experimental":
+
+    cdef cppclass partition_t[vertex_t]:
+        pass
+
+
+# 2. return type for shuffle:
+#
+cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
+
+    cdef cppclass major_minor_weights_t[vertex_t, weight_t]:
+        major_minor_weights_t(const handle_t &handle)
+        pair[unique_ptr[device_buffer], size_t] get_major_wrap()
+        pair[unique_ptr[device_buffer], size_t] get_minor_wrap()
+        pair[unique_ptr[device_buffer], size_t] get_weights_wrap()
+
+
+ctypedef fused shuffled_vertices_t:
+    major_minor_weights_t[int, float]
+    major_minor_weights_t[int, double]
+    major_minor_weights_t[long, float]
+    major_minor_weights_t[long, double]
+    
+# 3. return type for renumber:
+#
+cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
+
+    cdef cppclass renum_quad_t[vertex_t, edge_t]:
+        renum_quad_t(const handle_t &handle)
+        pair[unique_ptr[device_buffer], size_t] get_dv_wrap()
+        vertex_t& get_num_vertices()
+        edge_t& get_num_edges()
+        int get_part_row_size()
+        int get_part_col_size()
+        int get_part_comm_rank()
+        unique_ptr[vector[vertex_t]] get_partition_offsets()
+        pair[vertex_t, vertex_t] get_part_local_vertex_range()
+        vertex_t get_part_local_vertex_first()
+        vertex_t get_part_local_vertex_last()
+        pair[vertex_t, vertex_t] get_part_vertex_partition_range(size_t vertex_partition_idx)
+        vertex_t get_part_vertex_partition_first(size_t vertex_partition_idx)
+        vertex_t get_part_vertex_partition_last(size_t vertex_partition_idx)
+        vertex_t get_part_vertex_partition_size(size_t vertex_partition_idx)
+        size_t get_part_number_of_matrix_partitions()
+        vertex_t get_part_matrix_partition_major_first(size_t partition_idx)
+        vertex_t get_part_matrix_partition_major_last(size_t partition_idx)
+        vertex_t get_part_matrix_partition_major_value_start_offset(size_t partition_idx)
+        pair[vertex_t, vertex_t] get_part_matrix_partition_minor_range()
+        vertex_t get_part_matrix_partition_minor_first()
+        vertex_t get_part_matrix_partition_minor_last()        
+
+# 4. `sort_and_shuffle_values()` wrapper:
+#
+cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
+
+    cdef unique_ptr[major_minor_weights_t[vertex_t, weight_t]] call_shuffle[vertex_t, edge_t, weight_t](
+        const handle_t &handle,
+        vertex_t *edgelist_major_vertices,
+        vertex_t *edgelist_minor_vertices,
+        weight_t* edgelist_weights,
+        edge_t num_edges,
+        bool is_hyper_partitioned) except +
+
+# 5. `renumber_edgelist()` wrapper
+#
+cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
+
+    cdef unique_ptr[renum_quad_t[vertex_t, edge_t]] call_renumber[vertex_t, edge_t](
+        const handle_t &handle,
+        vertex_t *edgelist_major_vertices,
+        vertex_t *edgelist_minor_vertices,
+        edge_t num_edges,
+        bool is_hyper_partitioned,
+        bool do_check,
+        bool multi_gpu) except +
diff --git a/python/cugraph/structure/new_number_map.py b/python/cugraph/structure/new_number_map.py
new file mode 100644
index 00000000000..f8a2164d2c4
--- /dev/null
+++ b/python/cugraph/structure/new_number_map.py
@@ -0,0 +1,317 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from dask.distributed import wait, default_client
+from cugraph.dask.common.input_utils import get_distributed_data
+from cugraph.structure import renumber_wrapper as c_renumber
+import cugraph.comms as Comms
+import dask_cudf
+import numpy as np
+import cudf
+import cugraph.structure.number_map as legacy_number_map
+
+
+def call_renumber(sID,
+                  data,
+                  num_edges,
+                  is_mnmg,
+                  store_transposed):
+    wid = Comms.get_worker_id(sID)
+    handle = Comms.get_handle(sID)
+    return c_renumber.renumber(data[0],
+                               num_edges,
+                               wid,
+                               handle,
+                               is_mnmg,
+                               store_transposed)
+
+
+class NumberMap:
+
+    class SingleGPU:
+        def __init__(self, df, src_col_names, dst_col_names, id_type,
+                     store_transposed):
+            self.col_names = NumberMap.compute_vals(src_col_names)
+            self.df = cudf.DataFrame()
+            self.id_type = id_type
+            self.store_transposed = store_transposed
+            self.numbered = False
+
+        def to_internal_vertex_id(self, df, col_names):
+            tmp_df = df[col_names].rename(
+                columns=dict(zip(col_names, self.col_names)), copy=False
+            )
+            index_name = NumberMap.generate_unused_column_name(df.columns)
+            tmp_df[index_name] = tmp_df.index
+            return (
+                self.df.merge(tmp_df, on=self.col_names, how="right")
+                .sort_values(index_name)
+                .drop(columns=[index_name])
+                .reset_index()["id"]
+            )
+
+        def from_internal_vertex_id(
+            self, df, internal_column_name, external_column_names
+        ):
+            tmp_df = self.df.merge(
+                df,
+                right_on=internal_column_name,
+                left_on="id",
+                how="right",
+            )
+            if internal_column_name != "id":
+                tmp_df = tmp_df.drop(columns=["id"])
+            if external_column_names is None:
+                return tmp_df
+            else:
+                return tmp_df.rename(
+                    columns=dict(zip(self.col_names, external_column_names)),
+                    copy=False,
+                )
+
+    class MultiGPU:
+        def __init__(
+            self, ddf, src_col_names, dst_col_names, id_type, store_transposed
+        ):
+            self.col_names = NumberMap.compute_vals(src_col_names)
+            self.val_types = NumberMap.compute_vals_types(ddf, src_col_names)
+            self.val_types["count"] = np.int32
+            self.id_type = id_type
+            self.store_transposed = store_transposed
+            self.numbered = False
+
+        def to_internal_vertex_id(self, ddf, col_names):
+            return self.ddf.merge(
+                ddf,
+                right_on=col_names,
+                left_on=self.col_names,
+                how="right",
+            )["global_id"]
+
+        def from_internal_vertex_id(
+            self, df, internal_column_name, external_column_names
+        ):
+            tmp_df = self.ddf.merge(
+                df,
+                right_on=internal_column_name,
+                left_on="global_id",
+                how="right"
+            ).map_partitions(lambda df: df.drop(columns="global_id"))
+
+            if external_column_names is None:
+                return tmp_df
+            else:
+                return tmp_df.map_partitions(
+                    lambda df:
+                    df.rename(
+                        columns=dict(
+                            zip(self.col_names, external_column_names)
+                        ),
+                        copy=False
+                    )
+                )
+
+    def __init__(self, id_type=np.int32):
+        self.implementation = None
+        self.id_type = id_type
+
+    def compute_vals_types(df, column_names):
+        """
+        Helper function to compute internal column names and types
+        """
+        return {
+            str(i): df[column_names[i]].dtype for i in range(len(column_names))
+        }
+
+    def generate_unused_column_name(column_names):
+        """
+        Helper function to generate an unused column name
+        """
+        name = 'x'
+        while name in column_names:
+            name = name + "x"
+
+        return name
+
+    def compute_vals(column_names):
+        """
+        Helper function to compute internal column names based on external
+        column names
+        """
+        return [str(i) for i in range(len(column_names))]
+
+    def renumber(df, src_col_names, dst_col_names, preserve_order=False,
+                 store_transposed=False):
+
+        if isinstance(src_col_names, list):
+            renumber_type = 'legacy'
+        # elif isinstance(df[src_col_names].dtype, string):
+        #    renumber_type = 'legacy'
+        else:
+            renumber_type = 'experimental'
+
+        if renumber_type == 'legacy':
+            renumber_map, renumbered_df = legacy_number_map.renumber(
+                                              df,
+                                              src_col_names,
+                                              dst_col_names,
+                                              preserve_order,
+                                              store_transposed)
+            # Add shuffling once algorithms are switched to new renumber
+            # (ddf,
+            # num_verts,
+            # partition_row_size,
+            # partition_col_size,
+            # vertex_partition_offsets) = shuffle(input_graph, transposed=True)
+            return renumber_map, renumbered_df
+
+        renumber_map = NumberMap()
+        if not isinstance(src_col_names, list):
+            src_col_names = [src_col_names]
+            dst_col_names = [dst_col_names]
+        if type(df) is cudf.DataFrame:
+            renumber_map.implementation = NumberMap.SingleGPU(
+                df, src_col_names, dst_col_names, renumber_map.id_type,
+                store_transposed
+            )
+        elif type(df) is dask_cudf.DataFrame:
+            renumber_map.implementation = NumberMap.MultiGPU(
+                df, src_col_names, dst_col_names, renumber_map.id_type,
+                store_transposed
+            )
+        else:
+            raise Exception("df must be cudf.DataFrame or dask_cudf.DataFrame")
+
+        num_edges = len(df)
+
+        if isinstance(df, dask_cudf.DataFrame):
+            is_mnmg = True
+        else:
+            is_mnmg = False
+
+        if is_mnmg:
+            client = default_client()
+            data = get_distributed_data(df)
+            result = [(client.submit(call_renumber,
+                                     Comms.get_session_id(),
+                                     wf[1],
+                                     num_edges,
+                                     is_mnmg,
+                                     store_transposed,
+                                     workers=[wf[0]]), wf[0])
+                      for idx, wf in enumerate(data.worker_to_parts.items())]
+            wait(result)
+
+            def get_renumber_map(data):
+                return data[0]
+
+            def get_renumbered_df(data):
+                return data[1]
+
+            renumbering_map = dask_cudf.from_delayed(
+                                 [client.submit(get_renumber_map,
+                                                data,
+                                                workers=[wf])
+                                     for (data, wf) in result])
+            renumbered_df = dask_cudf.from_delayed(
+                               [client.submit(get_renumbered_df,
+                                              data,
+                                              workers=[wf])
+                                   for (data, wf) in result])
+
+            renumber_map.implementation.ddf = renumbering_map
+            renumber_map.implementation.numbered = True
+
+            return renumbered_df, renumber_map
+        else:
+            renumbering_map, renumbered_df = c_renumber.renumber(
+                                             df,
+                                             num_edges,
+                                             0,
+                                             Comms.get_default_handle(),
+                                             is_mnmg,
+                                             store_transposed)
+            renumber_map.implementation.df = renumbering_map
+            renumber_map.implementation.numbered = True
+            return renumbered_df, renumber_map
+
+    def unrenumber(self, df, column_name, preserve_order=False):
+        """
+        Given a DataFrame containing internal vertex ids in the identified
+        column, replace this with external vertex ids.  If the renumbering
+        is from a single column, the output dataframe will use the same
+        name for the external vertex identifiers.  If the renumbering is from
+        a multi-column input, the output columns will be labeled 0 through
+        n-1 with a suffix of _column_name.
+        Note that this function does not guarantee order or partitioning in
+        multi-GPU mode.
+        Parameters
+        ----------
+        df: cudf.DataFrame or dask_cudf.DataFrame
+            A DataFrame containing internal vertex identifiers that will be
+            converted into external vertex identifiers.
+        column_name: string
+            Name of the column containing the internal vertex id.
+        preserve_order: (optional) bool
+            If True, preserve the ourder of the rows in the output
+            DataFrame to match the input DataFrame
+        Returns
+        ---------
+        df : cudf.DataFrame or dask_cudf.DataFrame
+            The original DataFrame columns exist unmodified.  The external
+            vertex identifiers are added to the DataFrame, the internal
+            vertex identifier column is removed from the dataframe.
+        Examples
+        --------
+        >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ',
+        >>>                   dtype=['int32', 'int32', 'float32'], header=None)
+        >>>
+        >>> df, number_map = NumberMap.renumber(df, '0', '1')
+        >>>
+        >>> G = cugraph.Graph()
+        >>> G.from_cudf_edgelist(df, 'src', 'dst')
+        >>>
+        >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500,
+        >>>                       tol = 1.0e-05)
+        >>>
+        >>> pr = number_map.unrenumber(pr, 'vertex')
+        >>>
+        """
+        if len(self.col_names) == 1:
+            # Output will be renamed to match input
+            mapping = {"0": column_name}
+        else:
+            # Output will be renamed to ${i}_${column_name}
+            mapping = {}
+            for nm in self.col_names:
+                mapping[nm] = nm + "_" + column_name
+
+        if preserve_order:
+            index_name = NumberMap.generate_unused_column_name(df)
+            df[index_name] = df.index
+
+        df = self.from_internal_vertex_id(df, column_name, drop=True)
+
+        if preserve_order:
+            df = df.sort_values(
+                index_name
+            ).drop(columns=index_name).reset_index(drop=True)
+
+        if type(df) is dask_cudf.DataFrame:
+            return df.map_partitions(
+                lambda df: df.rename(columns=mapping, copy=False)
+            )
+        else:
+            return df.rename(columns=mapping, copy=False)
diff --git a/python/cugraph/dask/structure/renumber_wrapper.pyx b/python/cugraph/structure/renumber_wrapper.pyx
similarity index 73%
rename from python/cugraph/dask/structure/renumber_wrapper.pyx
rename to python/cugraph/structure/renumber_wrapper.pyx
index 40dd80aeb67..302fcfe583b 100644
--- a/python/cugraph/dask/structure/renumber_wrapper.pyx
+++ b/python/cugraph/structure/renumber_wrapper.pyx
@@ -16,7 +16,7 @@
 
 from cugraph.structure.utils_wrapper import *
 import cudf
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper
 from libc.stdint cimport uintptr_t
 from cython.operator cimport dereference as deref
@@ -25,41 +25,45 @@ import numpy as np
 from libcpp.utility cimport move
 from rmm._lib.device_buffer cimport device_buffer, DeviceBuffer
 
-cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w):
+cdef renumber_helper(shuffled_vertices_t* ptr_maj_min_w, vertex_t, weights):
     # extract shuffled result:
     #
     cdef pair[unique_ptr[device_buffer], size_t] pair_s_major   = deref(ptr_maj_min_w).get_major_wrap()
     cdef pair[unique_ptr[device_buffer], size_t] pair_s_minor   = deref(ptr_maj_min_w).get_minor_wrap()
     cdef pair[unique_ptr[device_buffer], size_t] pair_s_weights = deref(ptr_maj_min_w).get_weights_wrap()
     
-    shufled_major_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_major.first))
-    shufled_major_buffer = Buffer(shufled_major_buffer)
+    shuffled_major_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_major.first))
+    shuffled_major_buffer = Buffer(shuffled_major_buffer)
     
-    shufled_major_series = cudf.Series(data=shufled_major_buffer, dtype=vertex_t)
+    shuffled_major_series = cudf.Series(data=shuffled_major_buffer, dtype=vertex_t)
     
-    shufled_minor_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_minor.first))
-    shufled_minor_buffer = Buffer(shufled_minor_buffer)
-    
-    shufled_minor_series = cudf.Series(data=shufled_minor_buffer, dtype=vertex_t)
-    
-    shufled_weights_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_weights.first))
-    shufled_weights_buffer = Buffer(shufled_weights_buffer)
-    
-    shufled_weights_series = cudf.Series(data=shufled_weights_buffer, dtype=weight_t)
+    shuffled_minor_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_minor.first))
+    shuffled_minor_buffer = Buffer(shuffled_minor_buffer)
     
+    shuffled_minor_series = cudf.Series(data=shuffled_minor_buffer, dtype=vertex_t)
+
     shuffled_df = cudf.DataFrame()
     shuffled_df['src']=shuffled_major_series
     shuffled_df['dst']=shuffled_minor_series
-    shuffled_df['weights']= shuffled_weights_series
+
+    if weights is not None:
+        weight_t = weights.dtype
+        shuffled_weights_buffer = DeviceBuffer.c_from_unique_ptr(move(pair_s_weights.first))
+        shuffled_weights_buffer = Buffer(shuffled_weights_buffer)
+    
+        shuffled_weights_series = cudf.Series(data=shuffled_weights_buffer, dtype=weight_t)
+    
+        shuffled_df['weights']= shuffled_weights_series
     
     return shuffled_df
 
-def mg_renumber(input_df,           # maybe use cpdef ?
-                num_global_verts,
-                num_global_edges,    
-                rank,
-                handle,
-                is_multi_gpu):
+
+def renumber(input_df,           # maybe use cpdef ?
+             num_global_edges,    
+             rank,
+             handle,
+             is_multi_gpu,
+             transposed):
     """
     Call MNMG renumber
     """
@@ -67,11 +71,16 @@ def mg_renumber(input_df,           # maybe use cpdef ?
     # TODO: get handle_t out of handle...
     handle_ptr = <handle_t*>handle_size_t
 
-    src = input_df['src']
-    dst = input_df['dst']
+    if not transposed:
+        major_vertices = input_df['src']
+        minor_vertices = input_df['dst']
+    else:
+        major_vertices = input_df['dst']
+        minor_vertices = input_df['src']
+
     cdef uintptr_t c_edge_weights = <uintptr_t>NULL # set below...
     
-    vertex_t = src.dtype
+    vertex_t = major_vertices.dtype
     if num_global_edges > (2**31 - 1):
         edge_t = np.dtype("int64")
     else:
@@ -81,6 +90,7 @@ def mg_renumber(input_df,           # maybe use cpdef ?
         weight_t = weights.dtype
         c_edge_weights = weights.__cuda_array_interface__['data'][0]
     else:
+        weights = None
         weight_t = np.dtype("float32")
         
     if (vertex_t != np.dtype("int32") and vertex_t != np.dtype("int64")):
@@ -93,10 +103,10 @@ def mg_renumber(input_df,           # maybe use cpdef ?
         raise Exception("Incompatible vertex_t and edge_t types.")
 
     # FIXME: needs to be edge_t type not int
-    cdef int num_partition_edges = len(src)
+    cdef int num_partition_edges = len(major_vertices)
 
-    cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0]
-    cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0]
+    cdef uintptr_t c_major_vertices = major_vertices.__cuda_array_interface__['data'][0]
+    cdef uintptr_t c_minor_vertices = minor_vertices.__cuda_array_interface__['data'][0]
 
     cdef bool is_hyper_partitioned = False # for now
 
@@ -132,27 +142,29 @@ def mg_renumber(input_df,           # maybe use cpdef ?
     if (vertex_t == np.dtype("int32")):
         if ( edge_t == np.dtype("int32")):
             if( weight_t == np.dtype("float32")):
-                ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr),
-                                                                       <int*>c_src_vertices,
-                                                                       <int*>c_dst_vertices,
-                                                                       <float*>c_edge_weights,
-                                                                       num_partition_edges,
-                                                                       is_hyper_partitioned).release())
-                
-                shuffled_df = renumber_helper(ptr_shuffled_32_32.get())
-                
-                shuffled_src = shufled_df['src']
-                shuffled_dst = shufled_df['dst']
-                        
+                if(is_multi_gpu):
+                    ptr_shuffled_32_32.reset(call_shuffle[int, int, float](deref(handle_ptr),
+                                                                           <int*>c_major_vertices,
+                                                                           <int*>c_minor_vertices,
+                                                                           <float*>c_edge_weights,
+                                                                           num_partition_edges,
+                                                                           is_hyper_partitioned).release())
+                    shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights)
+                else:
+                    shuffled_df = input_df  
+ 
+                shuffled_src = shuffled_df['src']
+                shuffled_dst = shuffled_df['dst']
+                num_partition_edges = len(shuffled_df)
+                       
                 shuffled_major = shuffled_src.__cuda_array_interface__['data'][0]
                 shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0]
-                
                 ptr_renum_quad_32_32.reset(call_renumber[int, int](deref(handle_ptr),
                                                                    <int*>shuffled_major,
                                                                    <int*>shuffled_minor,
                                                                    num_partition_edges,
                                                                    is_hyper_partitioned,
-                                                                   do_check,
+                                                                   1,
                                                                    mg_flag).release())
                 
                 pair_original = ptr_renum_quad_32_32.get().get_dv_wrap() # original vertices: see helper
@@ -174,8 +186,9 @@ def mg_renumber(input_df,           # maybe use cpdef ?
                                                        uniq_partition_vector_32.get()[0].at(rank_indx+1)),
                                              dtype=vertex_t)
                 else:
-                    new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t)
-                
+                    new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0),
+                                                       uniq_partition_vector_32.get()[0].at(1)),
+                                             dtype=vertex_t)                
                 # create new cudf df
                 #
                 # and add the previous series to it:
@@ -185,18 +198,23 @@ def mg_renumber(input_df,           # maybe use cpdef ?
                 renumbered_map['new_ids'] = new_series
 
                 return renumbered_map, shuffled_df
+
             elif( weight_t == np.dtype("float64")):
-                ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr),
-                                                                        <int*>c_src_vertices,
-                                                                        <int*>c_dst_vertices,
-                                                                        <double*>c_edge_weights,
-                                                                        num_partition_edges,
-                                                                        is_hyper_partitioned).release())
-                
-                shuffled_df = renumber_helper(ptr_shuffled_32_64.get())
-                
-                shuffled_src = shufled_df['src']
-                shuffled_dst = shufled_df['dst']
+                if(is_multi_gpu):
+                    ptr_shuffled_32_64.reset(call_shuffle[int, int, double](deref(handle_ptr),
+                                                                            <int*>c_major_vertices,
+                                                                            <int*>c_minor_vertices,
+                                                                            <double*>c_edge_weights,
+                                                                            num_partition_edges,
+                                                                            is_hyper_partitioned).release())
+                
+                    shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights)
+                else:
+                    shuffled_df = input_df
+
+                shuffled_src = shuffled_df['src']
+                shuffled_dst = shuffled_df['dst']
+                num_partition_edges = len(shuffled_df)
                         
                 shuffled_major = shuffled_src.__cuda_array_interface__['data'][0]
                 shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0]
@@ -228,7 +246,9 @@ def mg_renumber(input_df,           # maybe use cpdef ?
                                                        uniq_partition_vector_32.get()[0].at(rank_indx+1)),
                                              dtype=vertex_t)
                 else:
-                    new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t)
+                    new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0),
+                                                       uniq_partition_vector_32.get()[0].at(1)),
+                                             dtype=vertex_t)
                 
                 # create new cudf df
                 #
@@ -241,17 +261,21 @@ def mg_renumber(input_df,           # maybe use cpdef ?
                 return renumbered_map, shuffled_df
         elif ( edge_t == np.dtype("int64")):
             if( weight_t == np.dtype("float32")):
-                ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr),
-                                                                        <int*>c_src_vertices,
-                                                                        <int*>c_dst_vertices,
-                                                                        <float*>c_edge_weights,
-                                                                        num_partition_edges,
-                                                                        is_hyper_partitioned).release())
-                
-                shuffled_df = renumber_helper(ptr_shuffled_32_32.get())
-                
-                shuffled_src = shufled_df['src']
-                shuffled_dst = shufled_df['dst']
+                if(is_multi_gpu):
+                    ptr_shuffled_32_32.reset(call_shuffle[int, long, float](deref(handle_ptr),
+                                                                            <int*>c_major_vertices,
+                                                                            <int*>c_minor_vertices,
+                                                                            <float*>c_edge_weights,
+                                                                            num_partition_edges,
+                                                                            is_hyper_partitioned).release())
+                
+                    shuffled_df = renumber_helper(ptr_shuffled_32_32.get(), vertex_t, weights)
+                else:
+                    shuffled_df = input_df
+
+                shuffled_src = shuffled_df['src']
+                shuffled_dst = shuffled_df['dst']
+                num_partition_edges = len(shuffled_df)
                         
                 shuffled_major = shuffled_src.__cuda_array_interface__['data'][0]
                 shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0]
@@ -283,8 +307,10 @@ def mg_renumber(input_df,           # maybe use cpdef ?
                                                        uniq_partition_vector_32.get()[0].at(rank_indx+1)),
                                              dtype=vertex_t)
                 else:
-                    new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t)
-                
+                    new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0),
+                                                       uniq_partition_vector_32.get()[0].at(1)),
+                                             dtype=vertex_t)
+               
                 # create new cudf df
                 #
                 # and add the previous series to it:
@@ -295,17 +321,21 @@ def mg_renumber(input_df,           # maybe use cpdef ?
 
                 return renumbered_map, shuffled_df
             elif( weight_t == np.dtype("float64")):
-                ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr),
-                                                                         <int*>c_src_vertices,
-                                                                         <int*>c_dst_vertices,
-                                                                         <double*>c_edge_weights,
-                                                                         num_partition_edges,
-                                                                         is_hyper_partitioned).release())
-                
-                shuffled_df = renumber_helper(ptr_shuffled_32_64.get())
+                if(is_multi_gpu):
+                    ptr_shuffled_32_64.reset(call_shuffle[int, long, double](deref(handle_ptr),
+                                                                             <int*>c_major_vertices,
+                                                                             <int*>c_minor_vertices,
+                                                                             <double*>c_edge_weights,
+                                                                             num_partition_edges,
+                                                                             is_hyper_partitioned).release())
+                
+                    shuffled_df = renumber_helper(ptr_shuffled_32_64.get(), vertex_t, weights)
+                else:
+                    shuffled_df = input_df
                 
-                shuffled_src = shufled_df['src']
-                shuffled_dst = shufled_df['dst']
+                shuffled_src = shuffled_df['src']
+                shuffled_dst = shuffled_df['dst']
+                num_partition_edges = len(shuffled_df)
                         
                 shuffled_major = shuffled_src.__cuda_array_interface__['data'][0]
                 shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0]
@@ -337,8 +367,9 @@ def mg_renumber(input_df,           # maybe use cpdef ?
                                                        uniq_partition_vector_32.get()[0].at(rank_indx+1)),
                                              dtype=vertex_t)
                 else:
-                    new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t)
-                
+                    new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0),
+                                                       uniq_partition_vector_32.get()[0].at(1)),
+                                             dtype=vertex_t)                
                 # create new cudf df
                 #
                 # and add the previous series to it:
@@ -351,17 +382,21 @@ def mg_renumber(input_df,           # maybe use cpdef ?
     elif (vertex_t == np.dtype("int64")):
         if ( edge_t == np.dtype("int64")):
             if( weight_t == np.dtype("float32")):
-                ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr),
-                                                                        <long*>c_src_vertices,
-                                                                        <long*>c_dst_vertices,
-                                                                        <float*>c_edge_weights,
-                                                                        num_partition_edges,
-                                                                        is_hyper_partitioned).release())
-                
-                shuffled_df = renumber_helper(ptr_shuffled_64_32.get())
+                if(is_multi_gpu):
+                    ptr_shuffled_64_32.reset(call_shuffle[long, long, float](deref(handle_ptr),
+                                                                            <long*>c_major_vertices,
+                                                                            <long*>c_minor_vertices,
+                                                                            <float*>c_edge_weights,
+                                                                            num_partition_edges,
+                                                                            is_hyper_partitioned).release())
+                
+                    shuffled_df = renumber_helper(ptr_shuffled_64_32.get(), vertex_t, weights)
+                else:
+                    shuffled_df = input_df
                 
-                shuffled_src = shufled_df['src']
-                shuffled_dst = shufled_df['dst']
+                shuffled_src = shuffled_df['src']
+                shuffled_dst = shuffled_df['dst']
+                num_partition_edges = len(shuffled_df)
                         
                 shuffled_major = shuffled_src.__cuda_array_interface__['data'][0]
                 shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0]
@@ -393,7 +428,9 @@ def mg_renumber(input_df,           # maybe use cpdef ?
                                                        uniq_partition_vector_64.get()[0].at(rank_indx+1)),
                                              dtype=vertex_t)
                 else:
-                    new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t)
+                    new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0),
+                                                       uniq_partition_vector_32.get()[0].at(1)),
+                                             dtype=vertex_t)
                 
                 # create new cudf df
                 #
@@ -405,17 +442,21 @@ def mg_renumber(input_df,           # maybe use cpdef ?
 
                 return renumbered_map, shuffled_df
             elif( weight_t == np.dtype("float64")):
-                ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr),
-                                                                          <long*>c_src_vertices,
-                                                                          <long*>c_dst_vertices,
-                                                                          <double*>c_edge_weights,
-                                                                          num_partition_edges,
-                                                                          is_hyper_partitioned).release())
-                
-                shuffled_df = renumber_helper(ptr_shuffled_64_64.get())
+                if(is_multi_gpu):
+                    ptr_shuffled_64_64.reset(call_shuffle[long, long, double](deref(handle_ptr),
+                                                                              <long*>c_major_vertices,
+                                                                              <long*>c_minor_vertices,
+                                                                              <double*>c_edge_weights,
+                                                                              num_partition_edges,
+                                                                              is_hyper_partitioned).release())
+                
+                    shuffled_df = renumber_helper(ptr_shuffled_64_64.get(), vertex_t, weights)
+                else:
+                    shuffled_df = input_df
                 
-                shuffled_src = shufled_df['src']
-                shuffled_dst = shufled_df['dst']
+                shuffled_src = shuffled_df['src']
+                shuffled_dst = shuffled_df['dst']
+                num_partition_edges = len(shuffled_df)
                         
                 shuffled_major = shuffled_src.__cuda_array_interface__['data'][0]
                 shuffled_minor = shuffled_dst.__cuda_array_interface__['data'][0]
@@ -447,7 +488,9 @@ def mg_renumber(input_df,           # maybe use cpdef ?
                                                        uniq_partition_vector_64.get()[0].at(rank_indx+1)),
                                              dtype=vertex_t)
                 else:
-                    new_series = cudf.Series(np.arange(0, num_global_verts), dtype=vertex_t)
+                    new_series = cudf.Series(np.arange(uniq_partition_vector_32.get()[0].at(0),
+                                                       uniq_partition_vector_32.get()[0].at(1)),
+                                             dtype=vertex_t)
                 
                 # create new cudf df
                 #
diff --git a/python/cugraph/structure/utils.pxd b/python/cugraph/structure/utils.pxd
index 0ec9c914347..c22e64841af 100644
--- a/python/cugraph/structure/utils.pxd
+++ b/python/cugraph/structure/utils.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -19,9 +19,6 @@
 from cugraph.structure.graph_primtypes cimport *
 from libcpp.memory cimport unique_ptr
 
-cdef extern from "raft/handle.hpp" namespace "raft":
-    cdef cppclass handle_t:
-        handle_t() except +
 
 cdef extern from "functions.hpp" namespace "cugraph":
 
diff --git a/python/cugraph/structure/utils_wrapper.pyx b/python/cugraph/structure/utils_wrapper.pyx
index 00af5813056..65c1ca09750 100644
--- a/python/cugraph/structure/utils_wrapper.pyx
+++ b/python/cugraph/structure/utils_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -22,7 +22,6 @@ from cugraph.structure.graph_primtypes cimport *
 from libc.stdint cimport uintptr_t
 
 import cudf
-import rmm
 import numpy as np
 from rmm._lib.device_buffer cimport DeviceBuffer
 from cudf.core.buffer import Buffer
diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py
index c2c14e0c02d..56e90b1f6bb 100755
--- a/python/cugraph/tests/utils.py
+++ b/python/cugraph/tests/utils.py
@@ -61,6 +61,21 @@
     "netscience.csv"]
 ]
 
+DATASETS_MULTI_EDGES = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [
+    "karate_multi_edge.csv",
+    "dolphins_multi_edge.csv"]
+]
+
+DATASETS_STR_ISLT_V = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [
+    "karate_mod.mtx",
+    "karate_str.mtx"]
+]
+
+DATASETS_SELF_LOOPS = [PurePath(RAPIDS_DATASET_ROOT_DIR)/f for f in [
+    "karate_s_loop.csv",
+    "dolphins_s_loop.csv"]
+]
+
 
 #            '../datasets/email-Eu-core.csv']
 
diff --git a/python/cugraph/traversal/bfs.pxd b/python/cugraph/traversal/bfs.pxd
index 5b73d23045c..0467bf05090 100644
--- a/python/cugraph/traversal/bfs.pxd
+++ b/python/cugraph/traversal/bfs.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from libcpp cimport bool
 
 
diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx
index ae346aea953..f475842a7bf 100644
--- a/python/cugraph/traversal/bfs_wrapper.pyx
+++ b/python/cugraph/traversal/bfs_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,14 +17,11 @@
 # cython: language_level = 3
 
 cimport cugraph.traversal.bfs as c_bfs
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from cugraph.structure import graph_primtypes_wrapper
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t
-from libc.float cimport FLT_MAX_EXP
-
 import cudf
-import rmm
 import numpy as np
 
 def bfs(input_graph, start, directed=True,
diff --git a/python/cugraph/traversal/sssp.pxd b/python/cugraph/traversal/sssp.pxd
index e4b709cb879..59253a5f1e4 100644
--- a/python/cugraph/traversal/sssp.pxd
+++ b/python/cugraph/traversal/sssp.pxd
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -16,7 +16,7 @@
 # cython: embedsignature = True
 # cython: language_level = 3
 
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 
 cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
 
diff --git a/python/cugraph/traversal/sssp_wrapper.pyx b/python/cugraph/traversal/sssp_wrapper.pyx
index 730fe0db94e..36e4797e0c8 100644
--- a/python/cugraph/traversal/sssp_wrapper.pyx
+++ b/python/cugraph/traversal/sssp_wrapper.pyx
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -18,17 +18,14 @@
 
 cimport cugraph.traversal.sssp as c_sssp
 cimport cugraph.traversal.bfs as c_bfs
-from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
 from cugraph.structure import graph_primtypes_wrapper
-
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t
-from libc.float cimport FLT_MAX_EXP
-
 import cudf
-import rmm
 import numpy as np
 
+
 def sssp(input_graph, source):
     """
     Call sssp
diff --git a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx
index 5f87c42a638..6eccce57a37 100644
--- a/python/cugraph/traversal/traveling_salesperson_wrapper.pyx
+++ b/python/cugraph/traversal/traveling_salesperson_wrapper.pyx
@@ -22,7 +22,6 @@ from cugraph.structure.graph_primtypes cimport *
 from libcpp cimport bool
 from libc.stdint cimport uintptr_t
 from numba import cuda
-
 import cudf
 import numpy as np
 
diff --git a/python/cugraph/tree/minimum_spanning_tree.py b/python/cugraph/tree/minimum_spanning_tree.py
index 25a365665df..45e996aa083 100644
--- a/python/cugraph/tree/minimum_spanning_tree.py
+++ b/python/cugraph/tree/minimum_spanning_tree.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -17,7 +17,7 @@
 from cugraph.utilities import cugraph_to_nx
 
 
-def minimum_spanning_tree_subgraph(G):
+def _minimum_spanning_tree_subgraph(G):
     mst_subgraph = Graph()
     if type(G) is not Graph:
         raise Exception("input graph must be undirected")
@@ -32,7 +32,7 @@ def minimum_spanning_tree_subgraph(G):
     return mst_subgraph
 
 
-def maximum_spanning_tree_subgraph(G):
+def _maximum_spanning_tree_subgraph(G):
     mst_subgraph = Graph()
     if type(G) is not Graph:
         raise Exception("input graph must be undirected")
@@ -68,28 +68,33 @@ def minimum_spanning_tree(
     ----------
     G : cuGraph.Graph or networkx.Graph
         cuGraph graph descriptor with connectivity information.
+
     weight : string
         default to the weights in the graph, if the graph edges do not have a
         weight attribute a default weight of 1 will be used.
+
     algorithm : string
         Default to 'boruvka'. The parallel algorithm to use when finding a
         minimum spanning tree.
+
     ignore_nan : bool
         Default to False
+
     Returns
     -------
     G_mst : cuGraph.Graph or networkx.Graph
         A graph descriptor with a minimum spanning tree or forest.
         The networkx graph will not have all attributes copied over
+
     """
 
     G, isNx = check_nx_graph(G)
 
     if isNx is True:
-        mst = minimum_spanning_tree_subgraph(G)
+        mst = _minimum_spanning_tree_subgraph(G)
         return cugraph_to_nx(mst)
     else:
-        return minimum_spanning_tree_subgraph(G)
+        return _minimum_spanning_tree_subgraph(G)
 
 
 def maximum_spanning_tree(
@@ -103,25 +108,30 @@ def maximum_spanning_tree(
     ----------
     G : cuGraph.Graph or networkx.Graph
         cuGraph graph descriptor with connectivity information.
+
     weight : string
         default to the weights in the graph, if the graph edges do not have a
         weight attribute a default weight of 1 will be used.
+
     algorithm : string
         Default to 'boruvka'. The parallel algorithm to use when finding a
         maximum spanning tree.
+
     ignore_nan : bool
         Default to False
+
     Returns
     -------
     G_mst : cuGraph.Graph or networkx.Graph
         A graph descriptor with a maximum spanning tree or forest.
         The networkx graph will not have all attributes copied over
+
     """
 
     G, isNx = check_nx_graph(G)
 
     if isNx is True:
-        mst = maximum_spanning_tree_subgraph(G)
+        mst = _maximum_spanning_tree_subgraph(G)
         return cugraph_to_nx(mst)
     else:
-        return maximum_spanning_tree_subgraph(G)
+        return _maximum_spanning_tree_subgraph(G)
diff --git a/python/setuputils.py b/python/setuputils.py
index 47eaf74d4b6..d93051d05ef 100644
--- a/python/setuputils.py
+++ b/python/setuputils.py
@@ -152,7 +152,7 @@ def clone_repo_if_needed(name, cpp_build_path=None,
         repo_path = (
             _get_repo_path() + '/python/_external_repositories/' + name + '/')
     else:
-        repo_path = os.path.join(cpp_build_path, name + '/src/' + name + '/')
+        repo_path = os.path.join(cpp_build_path, '_deps', name + '-src')
 
     return repo_path, repo_cloned