From 5c05a88b7cf9e007d9d3e4f9519d115dbf781478 Mon Sep 17 00:00:00 2001
From: Raymond Douglass <ray@raydouglass.com>
Date: Thu, 25 Mar 2021 14:19:09 -0400
Subject: [PATCH 1/6] DOC v0.20 Updates

---
 CHANGELOG.md                                |  4 ++++
 conda/environments/cugraph_dev_cuda10.1.yml | 16 ++++++++--------
 conda/environments/cugraph_dev_cuda10.2.yml | 16 ++++++++--------
 conda/environments/cugraph_dev_cuda11.0.yml | 16 ++++++++--------
 cpp/CMakeLists.txt                          |  2 +-
 docs/source/conf.py                         |  4 ++--
 6 files changed, 31 insertions(+), 27 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0011b99fbf3..a7b34d3e0fe 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# cuGraph 0.20.0 (Date TBD)
+
+Please see https://github.com/rapidsai//releases/tag/v0.20.0a for the latest changes to this development branch.
+
 # cuGraph 0.19.0 (Date TBD)
 
 Please see https://github.com/rapidsai/cugraph/releases/tag/v0.19.0a for the latest changes to this development branch.
diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml
index f26c3dd45d9..cc2b0538fb1 100644
--- a/conda/environments/cugraph_dev_cuda10.1.yml
+++ b/conda/environments/cugraph_dev_cuda10.1.yml
@@ -5,17 +5,17 @@ channels:
 - rapidsai-nightly
 - conda-forge
 dependencies:
-- cudf=0.19.*
-- libcudf=0.19.*
-- rmm=0.19.*
-- cuxfilter=0.19.*
-- librmm=0.19.*
+- cudf=0.20.*
+- libcudf=0.20.*
+- rmm=0.20.*
+- cuxfilter=0.20.*
+- librmm=0.20.*
 - dask>=2.12.0
 - distributed>=2.12.0
-- dask-cuda=0.19*
-- dask-cudf=0.19*
+- dask-cuda=0.20*
+- dask-cudf=0.20*
 - nccl>=2.8.4
-- ucx-py=0.19*
+- ucx-py=0.20*
 - ucx-proc=*=gpu
 - scipy
 - networkx
diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml
index 2848cc49dc7..06cd917db9d 100644
--- a/conda/environments/cugraph_dev_cuda10.2.yml
+++ b/conda/environments/cugraph_dev_cuda10.2.yml
@@ -5,17 +5,17 @@ channels:
 - rapidsai-nightly
 - conda-forge
 dependencies:
-- cudf=0.19.*
-- libcudf=0.19.*
-- rmm=0.19.*
-- cuxfilter=0.19.*
-- librmm=0.19.*
+- cudf=0.20.*
+- libcudf=0.20.*
+- rmm=0.20.*
+- cuxfilter=0.20.*
+- librmm=0.20.*
 - dask>=2.12.0
 - distributed>=2.12.0
-- dask-cuda=0.19*
-- dask-cudf=0.19*
+- dask-cuda=0.20*
+- dask-cudf=0.20*
 - nccl>=2.8.4
-- ucx-py=0.19*
+- ucx-py=0.20*
 - ucx-proc=*=gpu
 - scipy
 - networkx
diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml
index 82e8b409d13..00f202a6025 100644
--- a/conda/environments/cugraph_dev_cuda11.0.yml
+++ b/conda/environments/cugraph_dev_cuda11.0.yml
@@ -5,17 +5,17 @@ channels:
 - rapidsai-nightly
 - conda-forge
 dependencies:
-- cudf=0.19.*
-- libcudf=0.19.*
-- rmm=0.19.*
-- cuxfilter=0.19.*
-- librmm=0.19.*
+- cudf=0.20.*
+- libcudf=0.20.*
+- rmm=0.20.*
+- cuxfilter=0.20.*
+- librmm=0.20.*
 - dask>=2.12.0
 - distributed>=2.12.0
-- dask-cuda=0.19*
-- dask-cudf=0.19*
+- dask-cuda=0.20*
+- dask-cudf=0.20*
 - nccl>=2.8.4
-- ucx-py=0.19*
+- ucx-py=0.20*
 - ucx-proc=*=gpu
 - scipy
 - networkx
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 34ea935e31d..7593a5cb89e 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -16,7 +16,7 @@
 
 cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR)
 
-project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA)
+project(CUGRAPH VERSION 0.20.0 LANGUAGES C CXX CUDA)
 
 # Write the version header
 include(cmake/Modules/Version.cmake)
diff --git a/docs/source/conf.py b/docs/source/conf.py
index eb4745a61f0..77053a3468a 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -80,9 +80,9 @@
 # built documents.
 #
 # The short X.Y version.
-version = '0.19'
+version = '0.20'
 # The full version, including alpha/beta/rc tags.
-release = '0.19.0'
+release = '0.20.0'
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

From 26a203dcb770be7e00e2422f8c78c08346a3cad6 Mon Sep 17 00:00:00 2001
From: Ray Douglass <3107146+raydouglass@users.noreply.github.com>
Date: Thu, 25 Mar 2021 14:28:47 -0400
Subject: [PATCH 2/6] Update CHANGELOG.md

---
 CHANGELOG.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index a7b34d3e0fe..bd5b313e550 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,6 +1,6 @@
 # cuGraph 0.20.0 (Date TBD)
 
-Please see https://github.com/rapidsai//releases/tag/v0.20.0a for the latest changes to this development branch.
+Please see https://github.com/rapidsai/cugraph/releases/tag/v0.20.0a for the latest changes to this development branch.
 
 # cuGraph 0.19.0 (Date TBD)
 

From 63e69fcf32742fdee7e14267ba6accd94fd19c4c Mon Sep 17 00:00:00 2001
From: Joseph Nke <76006812+jnke2016@users.noreply.github.com>
Date: Wed, 7 Apr 2021 18:51:32 -0500
Subject: [PATCH 3/6] Random Walks - Python Bindings (#1516)

Python bindings for random walks
closes #1488
check the rendering after the PR is merged to make sure everything render as expected

Authors:
  - Joseph Nke (https://github.com/jnke2016)

Approvers:
  - Brad Rees (https://github.com/BradReesWork)
  - Andrei Schaffer (https://github.com/aschaffer)
  - Alex Fender (https://github.com/afender)

URL: https://github.com/rapidsai/cugraph/pull/1516
---
 README.md                                     |   1 +
 docs/source/api.rst                           |  11 ++
 python/cugraph/__init__.py                    |   2 +
 python/cugraph/sampling/__init__.py           |  14 ++
 python/cugraph/sampling/random_walks.pxd      |  22 +++
 python/cugraph/sampling/random_walks.py       |  95 +++++++++++
 .../cugraph/sampling/random_walks_wrapper.pyx | 116 +++++++++++++
 python/cugraph/structure/graph_utilities.pxd  |   9 +
 python/cugraph/tests/test_random_walks.py     | 154 ++++++++++++++++++
 9 files changed, 424 insertions(+)
 create mode 100644 python/cugraph/sampling/__init__.py
 create mode 100644 python/cugraph/sampling/random_walks.pxd
 create mode 100644 python/cugraph/sampling/random_walks.py
 create mode 100644 python/cugraph/sampling/random_walks_wrapper.pyx
 create mode 100644 python/cugraph/tests/test_random_walks.py

diff --git a/README.md b/README.md
index 4bdbcd00280..ccc91bfe225 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,7 @@ As of Release 0.18 - including 0.18 nightly
 |              | Breadth First Search (BFS)             | Multi-GPU    | with cutoff support <br/> [C++ README](cpp/src/traversal/README.md#BFS) |
 |              | Single Source Shortest Path (SSSP)     | Multi-GPU    | [C++ README](cpp/src/traversal/README.md#SSSP) |
 |              | Traveling Salesperson Problem (TSP)    | Single-GPU   |                     |
+| Sampling     | Random Walks (RW)                      | Single-GPU   |                     |
 | Structure    |                                        |              |                     |
 |              | Renumbering                            | Single-GPU   | multiple columns, any data type  |
 |              | Symmetrize                             | Multi-GPU    |                     |
diff --git a/docs/source/api.rst b/docs/source/api.rst
index b02f8f488c5..b9b8ea4859c 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -225,6 +225,17 @@ Overlap Coefficient
     :undoc-members:
 
 
+Sampling
+========
+
+Random Walks
+------------
+
+.. automodule:: cugraph.sampling.random_walks
+    :members:
+    :undoc-members:
+
+
 Traversal
 =========
 
diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py
index 11ba2d6ef96..d4632708591 100644
--- a/python/cugraph/__init__.py
+++ b/python/cugraph/__init__.py
@@ -101,6 +101,8 @@
 from cugraph.raft import raft_include_test
 from cugraph.comms import comms
 
+from cugraph.sampling import random_walks
+
 # Versioneer
 from ._version import get_versions
 
diff --git a/python/cugraph/sampling/__init__.py b/python/cugraph/sampling/__init__.py
new file mode 100644
index 00000000000..fd9d072d4f8
--- /dev/null
+++ b/python/cugraph/sampling/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cugraph.sampling.random_walks import random_walks
diff --git a/python/cugraph/sampling/random_walks.pxd b/python/cugraph/sampling/random_walks.pxd
new file mode 100644
index 00000000000..3e0e24b4e98
--- /dev/null
+++ b/python/cugraph/sampling/random_walks.pxd
@@ -0,0 +1,22 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
+
+cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
+    cdef unique_ptr[random_walk_ret_t] call_random_walks[vertex_t, edge_t](
+      const handle_t &handle,
+      const graph_container_t &g,
+      const vertex_t *ptr_d_start,
+      edge_t num_paths,
+      edge_t max_depth) except +
diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py
new file mode 100644
index 00000000000..7ab3191a07c
--- /dev/null
+++ b/python/cugraph/sampling/random_walks.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cudf
+from cugraph.sampling import random_walks_wrapper
+import cugraph
+from collections import defaultdict
+
+# FIXME might be more efficient to return either (df + offset) or 3 cudf.Series
+
+
+def random_walks(
+    G,
+    start_vertices,
+    max_depth=None
+):
+    """
+    compute random walks for each nodes in 'start_vertices'
+
+    parameters
+    ----------
+    G : cuGraph.Graph or networkx.Graph
+        The graph can be either directed (DiGraph) or undirected (Graph).
+        Weights in the graph are ignored.
+        Use weight parameter if weights need to be considered
+        (currently not supported)
+
+    start_vertices : int or list or cudf.Series
+        A single node or a list or a cudf.Series of nodes from which to run
+        the random walks
+
+    max_depth : int
+        The maximum depth of the random walks
+
+
+    Returns
+    -------
+    random_walks_edge_lists : cudf.DataFrame
+        GPU data frame containing all random walks sources identifiers,
+        destination identifiers, edge weights
+
+    seeds_offsets: cudf.Series
+        Series containing the starting offset in the returned edge list
+        for each vertex in start_vertices.
+    """
+    if max_depth is None:
+        raise TypeError("must specify a 'max_depth'")
+
+    G, _ = cugraph.utilities.check_nx_graph(G)
+
+    if start_vertices is int:
+        start_vertices = [start_vertices]
+
+    if not isinstance(start_vertices, cudf.Series):
+        start_vertices = cudf.Series(start_vertices)
+
+    if G.renumbered is True:
+        start_vertices = G.lookup_internal_vertex_id(start_vertices)
+    vertex_set, edge_set, sizes = random_walks_wrapper.random_walks(
+        G, start_vertices, max_depth)
+
+    if G.renumbered:
+        df_ = cudf.DataFrame()
+        df_['vertex_set'] = vertex_set
+        df_ = G.unrenumber(df_, 'vertex_set', preserve_order=True)
+        vertex_set = cudf.Series(df_['vertex_set'])
+
+    edge_list = defaultdict(list)
+    next_path_idx = 0
+    offsets = [0]
+
+    df = cudf.DataFrame()
+    for s in sizes.values_host:
+        for i in range(next_path_idx, s+next_path_idx-1):
+            edge_list['src'].append(vertex_set.values_host[i])
+            edge_list['dst'].append(vertex_set.values_host[i+1])
+        next_path_idx += s
+        df = df.append(edge_list, ignore_index=True)
+        offsets.append(df.index[-1]+1)
+        edge_list['src'].clear()
+        edge_list['dst'].clear()
+    df['weight'] = edge_set
+    offsets = cudf.Series(offsets)
+
+    return df, offsets
diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx
new file mode 100644
index 00000000000..7b16ff14018
--- /dev/null
+++ b/python/cugraph/sampling/random_walks_wrapper.pyx
@@ -0,0 +1,116 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from cugraph.sampling.random_walks cimport call_random_walks
+#from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
+from libcpp cimport bool
+from libcpp.utility cimport move
+from libc.stdint cimport uintptr_t
+from cugraph.structure import graph_primtypes_wrapper
+import cudf
+import rmm
+import numpy as np
+import numpy.ctypeslib as ctypeslib
+from rmm._lib.device_buffer cimport DeviceBuffer
+from cudf.core.buffer import Buffer
+from cython.operator cimport dereference as deref
+def random_walks(input_graph, start_vertices, max_depth):
+    """
+    Call random_walks
+    """
+    # FIXME: Offsets and indices are currently hardcoded to int, but this may
+    #        not be acceptable in the future.
+    numberTypeMap = {np.dtype("int32") : <int>numberTypeEnum.int32Type,
+                     np.dtype("int64") : <int>numberTypeEnum.int64Type,
+                     np.dtype("float32") : <int>numberTypeEnum.floatType,
+                     np.dtype("double") : <int>numberTypeEnum.doubleType}
+    [src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']]
+    vertex_t = src.dtype
+    edge_t = np.dtype("int32")
+    weights = None
+    if input_graph.edgelist.weights:
+        weights = input_graph.edgelist.edgelist_df['weights']
+    num_verts = input_graph.number_of_vertices()
+    num_edges = input_graph.number_of_edges(directed_edges=True)
+    num_partition_edges = num_edges
+    
+    if num_edges > (2**31 - 1):
+        edge_t = np.dtype("int64")
+    cdef unique_ptr[random_walk_ret_t] rw_ret_ptr 
+    
+    cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0]
+    cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0]
+    cdef uintptr_t c_edge_weights = <uintptr_t>NULL
+    if weights is not None:
+        c_edge_weights = weights.__cuda_array_interface__['data'][0]
+        weight_t = weights.dtype
+        is_weighted = True
+    else:
+        weight_t = np.dtype("float32")
+        is_weighted = False
+    # Pointers for random_walks
+    start_vertices = start_vertices.astype('int32')
+    cdef uintptr_t c_start_vertex_ptr = start_vertices.__cuda_array_interface__['data'][0]
+    num_paths = start_vertices.size
+    cdef unique_ptr[handle_t] handle_ptr
+    handle_ptr.reset(new handle_t())
+    handle_ = handle_ptr.get()
+    cdef graph_container_t graph_container
+    populate_graph_container(graph_container,
+                             handle_[0],
+                             <void*>c_src_vertices, <void*>c_dst_vertices, <void*>c_edge_weights,
+                             <void*>NULL,
+                             <numberTypeEnum>(<int>(numberTypeMap[vertex_t])),
+                             <numberTypeEnum>(<int>(numberTypeMap[edge_t])),
+                             <numberTypeEnum>(<int>(numberTypeMap[weight_t])),
+                             num_partition_edges,
+                             num_verts,
+                             num_edges,
+                             False,
+                             is_weighted,
+                             False, False)
+    if(vertex_t == np.dtype("int32")):
+        if(edge_t == np.dtype("int32")):
+            rw_ret_ptr = move(call_random_walks[int, int]( deref(handle_),
+                                                           graph_container,
+                                                           <int*> c_start_vertex_ptr,
+                                                           <int> num_paths,
+                                                           <int> max_depth))
+        else: # (edge_t == np.dtype("int64")):
+            rw_ret_ptr = move(call_random_walks[int, long]( deref(handle_),
+                                                           graph_container,
+                                                           <int*> c_start_vertex_ptr,
+                                                           <long> num_paths,
+                                                           <long> max_depth))
+    else: # (vertex_t == edge_t == np.dtype("int64")):
+        rw_ret_ptr = move(call_random_walks[long, long]( deref(handle_),
+                                                           graph_container,
+                                                           <long*> c_start_vertex_ptr,
+                                                           <long> num_paths,
+                                                           <long> max_depth))
+
+    
+    rw_ret= move(rw_ret_ptr.get()[0])
+    vertex_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_v_))
+    edge_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_w_))
+    sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_))
+    vertex_set = Buffer(vertex_set)
+    edge_set = Buffer(edge_set)
+    sizes = Buffer(sizes)
+
+    set_vertex = cudf.Series(data=vertex_set, dtype=vertex_t)
+    set_edge = cudf.Series(data=edge_set, dtype=weight_t)
+    set_sizes = cudf.Series(data=sizes, dtype=edge_t)
+
+    return set_vertex, set_edge, set_sizes
+    
\ No newline at end of file
diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd
index b169e42ccf8..c9cf1748bfe 100644
--- a/python/cugraph/structure/graph_utilities.pxd
+++ b/python/cugraph/structure/graph_utilities.pxd
@@ -83,6 +83,15 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
         unique_ptr[device_buffer] dst_indices
         unique_ptr[device_buffer] edge_data
         unique_ptr[device_buffer] subgraph_offsets
+    
+    cdef cppclass random_walk_ret_t:
+        size_t coalesced_sz_v_
+        size_t coalesced_sz_w_
+        size_t num_paths_
+        size_t max_depth_
+        unique_ptr[device_buffer] d_coalesced_v_
+        unique_ptr[device_buffer] d_coalesced_w_
+        unique_ptr[device_buffer] d_sizes_
 
 cdef extern from "<utility>" namespace "std" nogil:
     cdef device_buffer move(device_buffer)
diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py
new file mode 100644
index 00000000000..9767e81ba1f
--- /dev/null
+++ b/python/cugraph/tests/test_random_walks.py
@@ -0,0 +1,154 @@
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.:
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+
+import pytest
+
+from cugraph.tests import utils
+import cugraph
+import random
+
+
+# =============================================================================
+# Parameters
+# =============================================================================
+DIRECTED_GRAPH_OPTIONS = [False, True]
+WEIGHTED_GRAPH_OPTIONS = [False, True]
+DATASETS = [pytest.param(d) for d in utils.DATASETS]
+DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL]
+
+
+def calc_random_walks(
+    graph_file,
+    directed=False,
+    max_depth=None
+):
+    """
+    compute random walks for each nodes in 'start_vertices'
+
+    parameters
+    ----------
+    G : cuGraph.Graph or networkx.Graph
+        The graph can be either directed (DiGraph) or undirected (Graph).
+        Weights in the graph are ignored.
+        Use weight parameter if weights need to be considered
+        (currently not supported)
+
+    start_vertices : int or list or cudf.Series
+        A single node or a list or a cudf.Series of nodes from which to run
+        the random walks
+
+    max_depth : int
+        The maximum depth of the random walks
+
+
+    Returns
+    -------
+    random_walks_edge_lists : cudf.DataFrame
+        GPU data frame containing all random walks sources identifiers,
+        destination identifiers, edge weights
+
+    seeds_offsets: cudf.Series
+        Series containing the starting offset in the returned edge list
+        for each vertex in start_vertices.
+    """
+    G = utils.generate_cugraph_graph_from_file(
+        graph_file, directed=directed, edgevals=True)
+    assert G is not None
+
+    k = random.randint(1, 10)
+    start_vertices = random.sample(range(G.number_of_vertices()), k)
+    df, offsets = cugraph.random_walks(G, start_vertices, max_depth)
+
+    return df, offsets, start_vertices
+
+
+def check_random_walks(df, offsets, seeds, df_G=None):
+    invalid_edge = 0
+    invalid_seeds = 0
+    invalid_weight = 0
+    offsets_idx = 0
+    for i in range(len(df.index)):
+        src, dst, weight = df.iloc[i].to_array()
+        if i == offsets[offsets_idx]:
+            if df['src'].iloc[i] != seeds[offsets_idx]:
+                invalid_seeds += 1
+                print(
+                        "[ERR] Invalid seed: "
+                        " src {} != src {}"
+                        .format(df['src'].iloc[i], offsets[offsets_idx])
+                    )
+            offsets_idx += 1
+
+        edge = df.loc[(df['src'] == (src)) & (df['dst'] == (dst))].reset_index(
+            drop=True)
+        exp_edge = df_G.loc[
+            (df_G['src'] == (src)) & (
+                df_G['dst'] == (dst))].reset_index(drop=True)
+
+        if not exp_edge.equals(edge[:1]):
+            print(
+                    "[ERR] Invalid edge: "
+                    "There is no edge src {} dst {} weight {}"
+                    .format(src, dst, weight)
+                )
+            invalid_weight += 1
+
+    assert invalid_edge == 0
+    assert invalid_seeds == 0
+    assert invalid_weight == 0
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+
+
+def prepare_test():
+    gc.collect()
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+@pytest.mark.parametrize("max_depth", [None])
+def test_random_walks_invalid_max_dept(
+    graph_file,
+    directed,
+    max_depth
+):
+    """Test calls random_walks an invalid type"""
+    prepare_test()
+    with pytest.raises(TypeError):
+        df, offsets, seeds = calc_random_walks(
+            graph_file,
+            directed=directed,
+            max_depth=max_depth
+        )
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_random_walks(
+    graph_file,
+    directed
+):
+    max_depth = random.randint(2, 10)
+    df_G = utils.read_csv_file(graph_file)
+    df_G.rename(
+        columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True)
+    df, offsets, seeds = calc_random_walks(
+        graph_file,
+        directed,
+        max_depth=max_depth
+    )
+    check_random_walks(df, offsets, seeds, df_G)

From 9fd4f3c92135108f67f986b3f8d8633f4de47f0f Mon Sep 17 00:00:00 2001
From: Ayush Dattagupta <ayushdg95@gmail.com>
Date: Thu, 8 Apr 2021 09:53:50 -0700
Subject: [PATCH 4/6] Update docs and remove all warnings (#1521)

This pr fixes the following
- Add traveling salesperson problem to the docs
- Update docs to address all build warnings


To remove some warnings. updated the use of `NOTE:` in cases like the one shown below.

| Old  | New |
| ------------- | ------------- |
| ![image](https://user-images.githubusercontent.com/19949207/113936070-283a2380-97ac-11eb-9705-9f261c965fa9.png) | ![image](https://user-images.githubusercontent.com/19949207/113935703-b06bf900-97ab-11eb-93a4-7df2f711c1aa.png)  |

Authors:
  - Ayush Dattagupta (https://github.com/ayushdg)

Approvers:
  - Brad Rees (https://github.com/BradReesWork)
  - Rick Ratzel (https://github.com/rlratzel)
  - AJ Schmidt (https://github.com/ajschmidt8)

URL: https://github.com/rapidsai/cugraph/pull/1521
---
 docs/source/api.rst                           | 29 ++++++++-------
 python/cugraph/centrality/katz_centrality.py  | 18 +++++-----
 python/cugraph/components/connectivity.py     | 36 ++++++++++++-------
 .../dask/centrality/katz_centrality.py        | 19 +++++-----
 python/cugraph/dask/link_analysis/pagerank.py |  2 ++
 python/cugraph/link_analysis/pagerank.py      |  1 -
 python/cugraph/structure/symmetrize.py        |  1 +
 python/cugraph/traversal/bfs.py               |  6 ++--
 .../traversal/traveling_salesperson.py        |  1 +
 9 files changed, 70 insertions(+), 43 deletions(-)

diff --git a/docs/source/api.rst b/docs/source/api.rst
index b9b8ea4859c..e2c2c19cf02 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -192,7 +192,7 @@ Pagerank
     :undoc-members:
 
 Pagerank (MG)
----------
+-------------
 
 .. automodule:: cugraph.dask.link_analysis.pagerank
     :members: pagerank
@@ -247,7 +247,7 @@ Breadth-first-search
     :undoc-members:
 
 Breadth-first-search (MG)
---------------------
+-------------------------
 
 .. automodule:: cugraph.dask.traversal.bfs
     :members:
@@ -261,12 +261,19 @@ Single-source-shortest-path
     :undoc-members:
 
 Single-source-shortest-path (MG)
----------------------------
+--------------------------------
 
 .. automodule:: cugraph.dask.traversal.sssp
     :members:
     :undoc-members:
 
+Traveling-salesperson-problem
+-----------------------------
+
+.. automodule:: cugraph.traversal.traveling_salesperson
+    :members:
+    :undoc-members:
+
 
 Tree
 =========
@@ -275,27 +282,25 @@ Minimum Spanning Tree
 ---------------------
 
 .. automodule:: cugraph.tree.minimum_spanning_tree
-    :members:
+    :members: minimum_spanning_tree
     :undoc-members:
 
 Maximum Spanning Tree
 ---------------------
 
-.. automodule:: cugraph.tree.maximum_spanning_tree
-    :members:
+.. automodule:: cugraph.tree.minimum_spanning_tree
+    :members: maximum_spanning_tree
     :undoc-members:
+    :noindex:
 
 
-DASK MG Helper functions 
+DASK MG Helper functions
 ===========================
 
 .. automodule:: cugraph.comms.comms
-    :members: initialize
-    :undoc-members:
-
-.. automodule:: cugraph.comms.comms
-    :members: destroy
+    :members: initialize, destroy
     :undoc-members:
+    :member-order: bysource
 
 .. automodule:: cugraph.dask.common.read_utils
     :members: get_chunksize
diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py
index 3e2680a196f..ce52d15f5db 100644
--- a/python/cugraph/centrality/katz_centrality.py
+++ b/python/cugraph/centrality/katz_centrality.py
@@ -39,14 +39,16 @@ def katz_centrality(
         Attenuation factor defaulted to None. If alpha is not specified then
         it is internally calculated as 1/(degree_max) where degree_max is the
         maximum out degree.
-        NOTE : The maximum acceptable value of alpha for convergence
-        alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue
-        of the graph.
-        Since lambda_max is always lesser than or equal to degree_max for a
-        graph, alpha_max will always be greater than or equal to
-        (1/degree_max). Therefore, setting alpha to (1/degree_max) will
-        guarantee that it will never exceed alpha_max thus in turn fulfilling
-        the requirement for convergence.
+
+        NOTE
+            The maximum acceptable value of alpha for convergence
+            alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue
+            of the graph.
+            Since lambda_max is always lesser than or equal to degree_max for a
+            graph, alpha_max will always be greater than or equal to
+            (1/degree_max). Therefore, setting alpha to (1/degree_max) will
+            guarantee that it will never exceed alpha_max thus in turn fulfilling
+            the requirement for convergence.
     beta : None
         A weight scalar - currently Not Supported
     max_iter : int
diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py
index 72f33ebfcbb..df33f8b8e03 100644
--- a/python/cugraph/components/connectivity.py
+++ b/python/cugraph/components/connectivity.py
@@ -138,8 +138,10 @@ def weakly_connected_components(G,
 
     directed : bool, optional
 
-        NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
-              TypeError if used with a Graph object.
+        NOTE
+            For non-Graph-type (eg. sparse matrix) values of G only. 
+            Raises TypeError if used with a Graph object.
+
         If True (default), then convert the input matrix to a cugraph.DiGraph
         and only move from point i to point j along paths csgraph[i, j]. If
         False, then find the shortest path on an undirected graph: the
@@ -154,8 +156,10 @@ def weakly_connected_components(G,
 
     return_labels : bool, optional
 
-        NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
-              TypeError if used with a Graph object.
+        NOTE
+            For non-Graph-type (eg. sparse matrix) values of G only. Raises
+            TypeError if used with a Graph object.
+
         If True (default), then return the labels for each of the connected
         components.
 
@@ -231,8 +235,10 @@ def strongly_connected_components(G,
 
     directed : bool, optional
 
-        NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
-              TypeError if used with a Graph object.
+        NOTE
+            For non-Graph-type (eg. sparse matrix) values of G only.
+            Raises TypeError if used with a Graph object.
+
         If True (default), then convert the input matrix to a cugraph.DiGraph
         and only move from point i to point j along paths csgraph[i, j]. If
         False, then find the shortest path on an undirected graph: the
@@ -247,8 +253,10 @@ def strongly_connected_components(G,
 
     return_labels : bool, optional
 
-        NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
-              TypeError if used with a Graph object.
+        NOTE
+            For non-Graph-type (eg. sparse matrix) values of G only. Raises
+            TypeError if used with a Graph object.
+
         If True (default), then return the labels for each of the connected
         components.
 
@@ -325,8 +333,10 @@ def connected_components(G,
 
     directed : bool, optional
 
-        NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
-              TypeError if used with a Graph object.
+        NOTE
+            For non-Graph-type (eg. sparse matrix) values of G only. Raises
+            TypeError if used with a Graph object.
+
         If True (default), then convert the input matrix to a cugraph.DiGraph
         and only move from point i to point j along paths csgraph[i, j]. If
         False, then find the shortest path on an undirected graph: the
@@ -340,8 +350,10 @@ def connected_components(G,
 
     return_labels : bool, optional
 
-        NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
-              TypeError if used with a Graph object.
+        NOTE
+            For non-Graph-type (eg. sparse matrix) values of G only. Raises
+            TypeError if used with a Graph object.
+
         If True (default), then return the labels for each of the connected
         components.
 
diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py
index a2f83a0b2a8..45deda8b7ae 100644
--- a/python/cugraph/dask/centrality/katz_centrality.py
+++ b/python/cugraph/dask/centrality/katz_centrality.py
@@ -68,14 +68,16 @@ def katz_centrality(input_graph,
         Attenuation factor defaulted to None. If alpha is not specified then
         it is internally calculated as 1/(degree_max) where degree_max is the
         maximum out degree.
-        NOTE : The maximum acceptable value of alpha for convergence
-        alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue
-        of the graph.
-        Since lambda_max is always lesser than or equal to degree_max for a
-        graph, alpha_max will always be greater than or equal to
-        (1/degree_max). Therefore, setting alpha to (1/degree_max) will
-        guarantee that it will never exceed alpha_max thus in turn fulfilling
-        the requirement for convergence.
+
+        NOTE
+            The maximum acceptable value of alpha for convergence
+            alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue
+            of the graph.
+            Since lambda_max is always lesser than or equal to degree_max for a
+            graph, alpha_max will always be greater than or equal to
+            (1/degree_max). Therefore, setting alpha to (1/degree_max) will
+            guarantee that it will never exceed alpha_max thus in turn fulfilling
+            the requirement for convergence.
     beta : None
         A weight scalar - currently Not Supported
     max_iter : int
@@ -94,6 +96,7 @@ def katz_centrality(input_graph,
         acceptable.
     nstart : dask_cudf.Dataframe
         GPU Dataframe containing the initial guess for katz centrality
+
         nstart['vertex'] : dask_cudf.Series
             Contains the vertex identifiers
         nstart['values'] : dask_cudf.Series
diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py
index bfaada85a6f..fb9f4ad3a25 100644
--- a/python/cugraph/dask/link_analysis/pagerank.py
+++ b/python/cugraph/dask/link_analysis/pagerank.py
@@ -73,6 +73,7 @@ def pagerank(input_graph,
     personalization : cudf.Dataframe
         GPU Dataframe containing the personalization information.
         Currently not supported.
+
         personalization['vertex'] : cudf.Series
             Subset of vertices of graph for personalization
         personalization['values'] : cudf.Series
@@ -91,6 +92,7 @@ def pagerank(input_graph,
         acceptable.
     nstart : not supported
         initial guess for pagerank
+
     Returns
     -------
     PageRank : dask_cudf.DataFrame
diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py
index 0bb89195e01..8a03ee077f6 100644
--- a/python/cugraph/link_analysis/pagerank.py
+++ b/python/cugraph/link_analysis/pagerank.py
@@ -46,7 +46,6 @@ def pagerank(
             Subset of vertices of graph for personalization
         personalization['values'] : cudf.Series
             Personalization values for vertices
-
     max_iter : int
         The maximum number of iterations before an answer is returned. This can
         be used to limit the execution time and do an early exit before the
diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py
index 0f4ca90a97c..8720f7ad343 100644
--- a/python/cugraph/structure/symmetrize.py
+++ b/python/cugraph/structure/symmetrize.py
@@ -32,6 +32,7 @@ def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True):
     != data2 then this code will arbitrarily pick the smaller data
     element to keep, if this is not desired then the caller should
     should correct the data prior to calling symmetrize.
+
     Parameters
     ----------
     df : cudf.DataFrame
diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py
index efbae095676..a483b96850b 100644
--- a/python/cugraph/traversal/bfs.py
+++ b/python/cugraph/traversal/bfs.py
@@ -136,8 +136,10 @@ def bfs(G,
         can be set, not both.
 
     directed : bool, optional
-        NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
-              TypeError if used with a Graph object.
+        NOTE
+            For non-Graph-type (eg. sparse matrix) values of G only. Raises
+            TypeError if used with a Graph object.
+
         If True (default), then convert the input matrix to a cugraph.DiGraph,
         otherwise a cugraph.Graph object will be used.
 
diff --git a/python/cugraph/traversal/traveling_salesperson.py b/python/cugraph/traversal/traveling_salesperson.py
index ae17555e4ea..7aea7ae603f 100644
--- a/python/cugraph/traversal/traveling_salesperson.py
+++ b/python/cugraph/traversal/traveling_salesperson.py
@@ -29,6 +29,7 @@ def traveling_salesperson(pos_list,
     optimization.
 
     The current implementation does not support a weighted graph.
+
     Parameters
     ----------
     pos_list: cudf.DataFrame

From e9d09eeb11414c2e12c46b4a188186e1ceee032d Mon Sep 17 00:00:00 2001
From: Iroy30 <41401566+Iroy30@users.noreply.github.com>
Date: Thu, 8 Apr 2021 21:26:13 -0500
Subject: [PATCH 5/6] fix mg_renumber non-deterministic errors (#1523)

* @Iroy30 added missing dask `persist()` call to ensure deterministic indirection map state prior to merging renumbering results.
* @rlratzel updated MG renumbering test for latest API changes, removed redundant test, and updated test IDs to include the dataset name.

Authors:
  - https://github.com/Iroy30
  - Rick Ratzel (https://github.com/rlratzel)

Approvers:
  - Brad Rees (https://github.com/BradReesWork)
  - Joseph Nke (https://github.com/jnke2016)

URL: https://github.com/rapidsai/cugraph/pull/1523
---
 python/cugraph/structure/number_map.py        |  8 +-
 .../test_mg_batch_betweenness_centrality.py   |  3 +-
 ...st_mg_batch_edge_betweenness_centrality.py |  5 +-
 python/cugraph/tests/dask/test_mg_bfs.py      |  5 +-
 python/cugraph/tests/dask/test_mg_comms.py    |  6 +-
 python/cugraph/tests/dask/test_mg_degree.py   |  5 +-
 .../tests/dask/test_mg_katz_centrality.py     |  5 +-
 python/cugraph/tests/dask/test_mg_louvain.py  |  7 +-
 python/cugraph/tests/dask/test_mg_pagerank.py |  5 +-
 python/cugraph/tests/dask/test_mg_renumber.py | 93 ++++++-------------
 .../cugraph/tests/dask/test_mg_replication.py | 46 ++++++---
 python/cugraph/tests/dask/test_mg_sssp.py     |  5 +-
 python/cugraph/tests/dask/test_mg_utility.py  |  5 +-
 13 files changed, 107 insertions(+), 91 deletions(-)

diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py
index e45a50d6dbe..cd24dfc0434 100644
--- a/python/cugraph/structure/number_map.py
+++ b/python/cugraph/structure/number_map.py
@@ -263,7 +263,6 @@ def indirection_map(self, ddf, src_col_names, dst_col_names):
                             to_frame(name=newname)
                     else:
                         tmp_df[newname] = tmp[newname].append(tmp_dst[oldname])
-                    print(tmp_df.columns)
             else:
                 for newname in self.col_names:
                     tmp_df[newname] = tmp[newname]
@@ -273,7 +272,7 @@ def indirection_map(self, ddf, src_col_names, dst_col_names):
             tmp_ddf = tmp_ddf.assign(idx=1)
             tmp_ddf['global_id'] = tmp_ddf.idx.cumsum() - 1
             tmp_ddf = tmp_ddf.drop(columns='idx')
-
+            tmp_ddf = tmp_ddf.persist()
             self.ddf = tmp_ddf
             return tmp_ddf
 
@@ -481,8 +480,6 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False,
             renumber_type = 'legacy'
         else:
             renumber_type = 'experimental'
-            df = df.rename(columns={src_col_names: "src",
-                                    dst_col_names: "dst"})
 
         renumber_map = NumberMap()
         if not isinstance(src_col_names, list):
@@ -514,6 +511,9 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False,
                 df, "dst", dst_col_names, drop=True,
                 preserve_order=preserve_order
             )
+        else:
+            df = df.rename(columns={src_col_names[0]: "src",
+                                    dst_col_names[0]: "dst"})
 
         num_edges = len(df)
 
diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py
index 6e1e5ea380a..02696f589e3 100644
--- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py
+++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py
@@ -51,7 +51,8 @@
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS)
+@pytest.mark.parametrize("graph_file", DATASETS,
+                         ids=[f"dataset={d.as_posix()}" for d in DATASETS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
 @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py
index 54b58c340aa..89844797807 100644
--- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py
+++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -48,7 +48,8 @@
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS)
+@pytest.mark.parametrize("graph_file", DATASETS,
+                         ids=[f"dataset={d}" for d in DATASETS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
 @pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
diff --git a/python/cugraph/tests/dask/test_mg_bfs.py b/python/cugraph/tests/dask/test_mg_bfs.py
index 63580461b17..36d1f436b52 100644
--- a/python/cugraph/tests/dask/test_mg_bfs.py
+++ b/python/cugraph/tests/dask/test_mg_bfs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -35,7 +35,10 @@ def client_connection():
 def test_dask_bfs(client_connection):
     gc.collect()
 
+    # FIXME: update this to allow dataset to be parameterized and have dataset
+    # part of test param id (see other tests)
     input_data_path = r"../datasets/netscience.csv"
+    print(f"dataset={input_data_path}")
     chunksize = dcg.get_chunksize(input_data_path)
 
     ddf = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_comms.py b/python/cugraph/tests/dask/test_mg_comms.py
index 61a4944b5f1..03a0a5d73d2 100644
--- a/python/cugraph/tests/dask/test_mg_comms.py
+++ b/python/cugraph/tests/dask/test_mg_comms.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -38,10 +38,14 @@ def test_dask_pagerank(client_connection):
     # Initialize and run pagerank on two distributed graphs
     # with same communicator
 
+    # FIXME: update this to allow dataset to be parameterized and have dataset
+    # part of test param id (see other tests)
     input_data_path1 = r"../datasets/karate.csv"
+    print(f"dataset1={input_data_path1}")
     chunksize1 = dcg.get_chunksize(input_data_path1)
 
     input_data_path2 = r"../datasets/dolphins.csv"
+    print(f"dataset2={input_data_path2}")
     chunksize2 = dcg.get_chunksize(input_data_path2)
 
     ddf1 = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py
index 9f4c0d94319..93e8a365dea 100644
--- a/python/cugraph/tests/dask/test_mg_degree.py
+++ b/python/cugraph/tests/dask/test_mg_degree.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -34,7 +34,10 @@ def client_connection():
 def test_dask_mg_degree(client_connection):
     gc.collect()
 
+    # FIXME: update this to allow dataset to be parameterized and have dataset
+    # part of test param id (see other tests)
     input_data_path = r"../datasets/karate.csv"
+    print(f"dataset={input_data_path}")
 
     chunksize = cugraph.dask.get_chunksize(input_data_path)
 
diff --git a/python/cugraph/tests/dask/test_mg_katz_centrality.py b/python/cugraph/tests/dask/test_mg_katz_centrality.py
index 631457f7558..eadf0f662d4 100644
--- a/python/cugraph/tests/dask/test_mg_katz_centrality.py
+++ b/python/cugraph/tests/dask/test_mg_katz_centrality.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -36,7 +36,10 @@ def client_connection():
 def test_dask_katz_centrality(client_connection):
     gc.collect()
 
+    # FIXME: update this to allow dataset to be parameterized and have dataset
+    # part of test param id (see other tests)
     input_data_path = r"../datasets/karate.csv"
+    print(f"dataset={input_data_path}")
     chunksize = dcg.get_chunksize(input_data_path)
 
     ddf = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py
index a07eede8cb9..bd7374fb75e 100644
--- a/python/cugraph/tests/dask/test_mg_louvain.py
+++ b/python/cugraph/tests/dask/test_mg_louvain.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -52,7 +52,10 @@ def client_connection():
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.fixture(scope="module", params=utils.DATASETS_UNDIRECTED)
+@pytest.fixture(scope="module",
+                params=utils.DATASETS_UNDIRECTED,
+                ids=[f"dataset={d.as_posix()}"
+                     for d in utils.DATASETS_UNDIRECTED])
 def daskGraphFromDataset(request, client_connection):
     """
     Returns a new dask dataframe created from the dataset file param.
diff --git a/python/cugraph/tests/dask/test_mg_pagerank.py b/python/cugraph/tests/dask/test_mg_pagerank.py
index 4f0b45242dd..9cb00010311 100644
--- a/python/cugraph/tests/dask/test_mg_pagerank.py
+++ b/python/cugraph/tests/dask/test_mg_pagerank.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -65,7 +65,10 @@ def client_connection():
 def test_dask_pagerank(client_connection, personalization_perc):
     gc.collect()
 
+    # FIXME: update this to allow dataset to be parameterized and have dataset
+    # part of test param id (see other tests)
     input_data_path = r"../datasets/karate.csv"
+    print(f"dataset={input_data_path}")
     chunksize = dcg.get_chunksize(input_data_path)
 
     ddf = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py
index 7f5cf6f08bc..68ec3de35f8 100644
--- a/python/cugraph/tests/dask/test_mg_renumber.py
+++ b/python/cugraph/tests/dask/test_mg_renumber.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -38,11 +38,12 @@ def client_connection():
     teardown_local_dask_cluster(cluster, client)
 
 
-# Test all combinations of default/managed and pooled/non-pooled allocation
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED)
+@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in utils.DATASETS_UNRENUMBERED])
 def test_mg_renumber(graph_file, client_connection):
     gc.collect()
 
@@ -60,71 +61,37 @@ def test_mg_renumber(graph_file, client_connection):
 
     ddf = dask.dataframe.from_pandas(gdf, npartitions=2)
 
-    numbering = NumberMap()
-    numbering.from_dataframe(ddf, ["src", "src_old"], ["dst", "dst_old"])
-    renumbered_df = numbering.add_internal_vertex_id(
-        numbering.add_internal_vertex_id(ddf, "src_id", ["src", "src_old"]),
-        "dst_id",
-        ["dst", "dst_old"],
-    )
-
-    check_src = numbering.from_internal_vertex_id(
-        renumbered_df, "src_id"
-    ).compute()
-    check_dst = numbering.from_internal_vertex_id(
-        renumbered_df, "dst_id"
-    ).compute()
-
-    assert check_src["0"].to_pandas().equals(check_src["src"].to_pandas())
-    assert check_src["1"].to_pandas().equals(check_src["src_old"].to_pandas())
-    assert check_dst["0"].to_pandas().equals(check_dst["dst"].to_pandas())
-    assert check_dst["1"].to_pandas().equals(check_dst["dst_old"].to_pandas())
-
-
-# Test all combinations of default/managed and pooled/non-pooled allocation
-@pytest.mark.skipif(
-    is_single_gpu(), reason="skipping MG testing on Single GPU system"
-)
-@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED)
-def test_mg_renumber2(graph_file, client_connection):
-    gc.collect()
-
-    M = utils.read_csv_for_nx(graph_file)
-    sources = cudf.Series(M["0"])
-    destinations = cudf.Series(M["1"])
-
-    translate = 1000
-
-    gdf = cudf.DataFrame()
-    gdf["src_old"] = sources
-    gdf["dst_old"] = destinations
-    gdf["src"] = sources + translate
-    gdf["dst"] = destinations + translate
-    gdf["weight"] = gdf.index.astype(np.float)
-
-    ddf = dask.dataframe.from_pandas(gdf, npartitions=2)
-
-    ren2, num2 = NumberMap.renumber(
-        ddf, ["src", "src_old"], ["dst", "dst_old"]
-    )
-
-    check_src = num2.from_internal_vertex_id(ren2, "src").compute()
-    check_src = check_src.sort_values("weight").reset_index(drop=True)
-    check_dst = num2.from_internal_vertex_id(ren2, "dst").compute()
-    check_dst = check_dst.sort_values("weight").reset_index(drop=True)
-
-    assert check_src["0"].to_pandas().equals(gdf["src"].to_pandas())
-    assert check_src["1"].to_pandas().equals(gdf["src_old"].to_pandas())
-    assert check_dst["0"].to_pandas().equals(gdf["dst"].to_pandas())
-    assert check_dst["1"].to_pandas().equals(gdf["dst_old"].to_pandas())
+    # preserve_order is not supported for MG
+    renumbered_df, renumber_map = NumberMap.renumber(ddf,
+                                                     ["src", "src_old"],
+                                                     ["dst", "dst_old"],
+                                                     preserve_order=False)
+    unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src",
+                                              preserve_order=False)
+    unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst",
+                                              preserve_order=False)
+
+    # sort needed only for comparisons, since preserve_order is False
+    gdf = gdf.sort_values(by=["src", "src_old", "dst", "dst_old"])
+    gdf = gdf.reset_index()
+    unrenumbered_df = unrenumbered_df.compute()
+    unrenumbered_df = unrenumbered_df.sort_values(by=["0_src", "1_src",
+                                                      "0_dst", "1_dst"])
+    unrenumbered_df = unrenumbered_df.reset_index()
+
+    assert gdf["src"].equals(unrenumbered_df["0_src"])
+    assert gdf["src_old"].equals(unrenumbered_df["1_src"])
+    assert gdf["dst"].equals(unrenumbered_df["0_dst"])
+    assert gdf["dst_old"].equals(unrenumbered_df["1_dst"])
 
 
-# Test all combinations of default/managed and pooled/non-pooled allocation
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED)
-def test_mg_renumber3(graph_file, client_connection):
+@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in utils.DATASETS_UNRENUMBERED])
+def test_mg_renumber_add_internal_vertex_id(graph_file, client_connection):
     gc.collect()
 
     M = utils.read_csv_for_nx(graph_file)
diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py
index bb43d6c0f7a..3974cf9ed82 100644
--- a/python/cugraph/tests/dask/test_mg_replication.py
+++ b/python/cugraph/tests/dask/test_mg_replication.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -34,7 +34,9 @@
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS)
+@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_replicate_cudf_dataframe_with_weights(
     input_data_path, mg_device_count
@@ -60,7 +62,9 @@ def test_replicate_cudf_dataframe_with_weights(
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS)
+@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count):
     gc.collect()
@@ -84,7 +88,9 @@ def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count):
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS)
+@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_replicate_cudf_series(input_data_path, mg_device_count):
     gc.collect()
@@ -114,7 +120,9 @@ def test_replicate_cudf_series(input_data_path, mg_device_count):
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_enable_batch_no_context(graph_file, directed, mg_device_count):
@@ -129,7 +137,9 @@ def test_enable_batch_no_context(graph_file, directed, mg_device_count):
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_enable_batch_no_context_view_adj(
@@ -145,7 +155,9 @@ def test_enable_batch_no_context_view_adj(
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_enable_batch_context_then_views(
@@ -174,7 +186,9 @@ def test_enable_batch_context_then_views(
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_enable_batch_view_then_context(graph_file, directed, mg_device_count):
@@ -205,7 +219,9 @@ def test_enable_batch_view_then_context(graph_file, directed, mg_device_count):
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_enable_batch_context_no_context_views(
@@ -230,7 +246,9 @@ def test_enable_batch_context_no_context_views(
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_enable_batch_edgelist_replication(
@@ -251,7 +269,9 @@ def test_enable_batch_edgelist_replication(
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_enable_batch_adjlist_replication_weights(
@@ -293,7 +313,9 @@ def test_enable_batch_adjlist_replication_weights(
 @pytest.mark.skipif(
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+                         ids=[f"dataset={d.as_posix()}"
+                              for d in DATASETS_OPTIONS])
 @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
 @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
 def test_enable_batch_adjlist_replication_no_weights(
diff --git a/python/cugraph/tests/dask/test_mg_sssp.py b/python/cugraph/tests/dask/test_mg_sssp.py
index d75d76d7fd4..9e1fd1ec82f 100644
--- a/python/cugraph/tests/dask/test_mg_sssp.py
+++ b/python/cugraph/tests/dask/test_mg_sssp.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -35,7 +35,10 @@ def client_connection():
 def test_dask_sssp(client_connection):
     gc.collect()
 
+    # FIXME: update this to allow dataset to be parameterized and have dataset
+    # part of test param id (see other tests)
     input_data_path = r"../datasets/netscience.csv"
+    print(f"dataset={input_data_path}")
     chunksize = dcg.get_chunksize(input_data_path)
 
     ddf = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py
index 3217c1bef1a..150fa0137f5 100644
--- a/python/cugraph/tests/dask/test_mg_utility.py
+++ b/python/cugraph/tests/dask/test_mg_utility.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -46,7 +46,10 @@ def client_connection():
     is_single_gpu(), reason="skipping MG testing on Single GPU system"
 )
 def test_from_edgelist(client_connection):
+    # FIXME: update this to allow dataset to be parameterized and have dataset
+    # part of test param id (see other tests)
     input_data_path = r"../datasets/karate.csv"
+    print(f"dataset={input_data_path}")
     chunksize = dcg.get_chunksize(input_data_path)
     ddf = dask_cudf.read_csv(
         input_data_path,

From 62c1c6824ab9f4249ed227cb4954076d282d3b57 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com>
Date: Mon, 12 Apr 2021 08:39:03 -0500
Subject: [PATCH 6/6] Fixed copyright date and format. (#1526)

Update copyright data and format

Authors:
  - Rick Ratzel (https://github.com/rlratzel)

Approvers:
  - Brad Rees (https://github.com/BradReesWork)

URL: https://github.com/rapidsai/cugraph/pull/1526
---
 docs/source/conf.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/source/conf.py b/docs/source/conf.py
index eb4745a61f0..3422428c96b 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 # -*- coding: utf-8 -*-
 #
-# Copyright (c) 2018-2020 NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
 #
 # pygdf documentation build configuration file, created by
 # sphinx-quickstart on Wed May  3 10:59:22 2017.