diff --git a/CHANGELOG.md b/CHANGELOG.md
index 0011b99fbf3..bd5b313e550 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,7 @@
+# cuGraph 0.20.0 (Date TBD)
+
+Please see https://github.com/rapidsai/cugraph/releases/tag/v0.20.0a for the latest changes to this development branch.
+
# cuGraph 0.19.0 (Date TBD)
Please see https://github.com/rapidsai/cugraph/releases/tag/v0.19.0a for the latest changes to this development branch.
diff --git a/README.md b/README.md
index 4bdbcd00280..ccc91bfe225 100644
--- a/README.md
+++ b/README.md
@@ -82,6 +82,7 @@ As of Release 0.18 - including 0.18 nightly
| | Breadth First Search (BFS) | Multi-GPU | with cutoff support
[C++ README](cpp/src/traversal/README.md#BFS) |
| | Single Source Shortest Path (SSSP) | Multi-GPU | [C++ README](cpp/src/traversal/README.md#SSSP) |
| | Traveling Salesperson Problem (TSP) | Single-GPU | |
+| Sampling | Random Walks (RW) | Single-GPU | |
| Structure | | | |
| | Renumbering | Single-GPU | multiple columns, any data type |
| | Symmetrize | Multi-GPU | |
diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml
index a138f5e80df..8d717c205c7 100644
--- a/conda/environments/cugraph_dev_cuda10.1.yml
+++ b/conda/environments/cugraph_dev_cuda10.1.yml
@@ -5,17 +5,17 @@ channels:
- rapidsai-nightly
- conda-forge
dependencies:
-- cudf=0.19.*
-- libcudf=0.19.*
-- rmm=0.19.*
-- cuxfilter=0.19.*
-- librmm=0.19.*
+- cudf=0.20.*
+- libcudf=0.20.*
+- rmm=0.20.*
+- cuxfilter=0.20.*
+- librmm=0.20.*
- dask>=2.12.0
- distributed>=2.12.0
-- dask-cuda=0.19*
-- dask-cudf=0.19*
+- dask-cuda=0.20*
+- dask-cudf=0.20*
- nccl>=2.8.4
-- ucx-py=0.19*
+- ucx-py=0.20*
- ucx-proc=*=gpu
- scipy
- networkx>=2.5.1
diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml
index d53fefc086a..771f6141a68 100644
--- a/conda/environments/cugraph_dev_cuda10.2.yml
+++ b/conda/environments/cugraph_dev_cuda10.2.yml
@@ -5,17 +5,17 @@ channels:
- rapidsai-nightly
- conda-forge
dependencies:
-- cudf=0.19.*
-- libcudf=0.19.*
-- rmm=0.19.*
-- cuxfilter=0.19.*
-- librmm=0.19.*
+- cudf=0.20.*
+- libcudf=0.20.*
+- rmm=0.20.*
+- cuxfilter=0.20.*
+- librmm=0.20.*
- dask>=2.12.0
- distributed>=2.12.0
-- dask-cuda=0.19*
-- dask-cudf=0.19*
+- dask-cuda=0.20*
+- dask-cudf=0.20*
- nccl>=2.8.4
-- ucx-py=0.19*
+- ucx-py=0.20*
- ucx-proc=*=gpu
- scipy
- networkx>=2.5.1
diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml
index 771b175aa92..84c07524a00 100644
--- a/conda/environments/cugraph_dev_cuda11.0.yml
+++ b/conda/environments/cugraph_dev_cuda11.0.yml
@@ -5,17 +5,17 @@ channels:
- rapidsai-nightly
- conda-forge
dependencies:
-- cudf=0.19.*
-- libcudf=0.19.*
-- rmm=0.19.*
-- cuxfilter=0.19.*
-- librmm=0.19.*
+- cudf=0.20.*
+- libcudf=0.20.*
+- rmm=0.20.*
+- cuxfilter=0.20.*
+- librmm=0.20.*
- dask>=2.12.0
- distributed>=2.12.0
-- dask-cuda=0.19*
-- dask-cudf=0.19*
+- dask-cuda=0.20*
+- dask-cudf=0.20*
- nccl>=2.8.4
-- ucx-py=0.19*
+- ucx-py=0.20*
- ucx-proc=*=gpu
- scipy
- networkx>=2.5.1
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 0388a76d729..9394f7b38d1 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -16,7 +16,7 @@
cmake_minimum_required(VERSION 3.18...3.18 FATAL_ERROR)
-project(CUGRAPH VERSION 0.19.0 LANGUAGES C CXX CUDA)
+project(CUGRAPH VERSION 0.20.0 LANGUAGES C CXX CUDA)
# Write the version header
include(cmake/Modules/Version.cmake)
diff --git a/docs/source/api.rst b/docs/source/api.rst
index b02f8f488c5..e2c2c19cf02 100644
--- a/docs/source/api.rst
+++ b/docs/source/api.rst
@@ -192,7 +192,7 @@ Pagerank
:undoc-members:
Pagerank (MG)
----------
+-------------
.. automodule:: cugraph.dask.link_analysis.pagerank
:members: pagerank
@@ -225,6 +225,17 @@ Overlap Coefficient
:undoc-members:
+Sampling
+========
+
+Random Walks
+------------
+
+.. automodule:: cugraph.sampling.random_walks
+ :members:
+ :undoc-members:
+
+
Traversal
=========
@@ -236,7 +247,7 @@ Breadth-first-search
:undoc-members:
Breadth-first-search (MG)
---------------------
+-------------------------
.. automodule:: cugraph.dask.traversal.bfs
:members:
@@ -250,12 +261,19 @@ Single-source-shortest-path
:undoc-members:
Single-source-shortest-path (MG)
----------------------------
+--------------------------------
.. automodule:: cugraph.dask.traversal.sssp
:members:
:undoc-members:
+Traveling-salesperson-problem
+-----------------------------
+
+.. automodule:: cugraph.traversal.traveling_salesperson
+ :members:
+ :undoc-members:
+
Tree
=========
@@ -264,27 +282,25 @@ Minimum Spanning Tree
---------------------
.. automodule:: cugraph.tree.minimum_spanning_tree
- :members:
+ :members: minimum_spanning_tree
:undoc-members:
Maximum Spanning Tree
---------------------
-.. automodule:: cugraph.tree.maximum_spanning_tree
- :members:
+.. automodule:: cugraph.tree.minimum_spanning_tree
+ :members: maximum_spanning_tree
:undoc-members:
+ :noindex:
-DASK MG Helper functions
+DASK MG Helper functions
===========================
.. automodule:: cugraph.comms.comms
- :members: initialize
- :undoc-members:
-
-.. automodule:: cugraph.comms.comms
- :members: destroy
+ :members: initialize, destroy
:undoc-members:
+ :member-order: bysource
.. automodule:: cugraph.dask.common.read_utils
:members: get_chunksize
diff --git a/docs/source/conf.py b/docs/source/conf.py
index eb4745a61f0..5e87622bd09 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -1,7 +1,7 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
#
-# Copyright (c) 2018-2020 NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
#
# pygdf documentation build configuration file, created by
# sphinx-quickstart on Wed May 3 10:59:22 2017.
@@ -80,9 +80,9 @@
# built documents.
#
# The short X.Y version.
-version = '0.19'
+version = '0.20'
# The full version, including alpha/beta/rc tags.
-release = '0.19.0'
+release = '0.20.0'
# The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages.
diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py
index 11ba2d6ef96..d4632708591 100644
--- a/python/cugraph/__init__.py
+++ b/python/cugraph/__init__.py
@@ -101,6 +101,8 @@
from cugraph.raft import raft_include_test
from cugraph.comms import comms
+from cugraph.sampling import random_walks
+
# Versioneer
from ._version import get_versions
diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py
index 3e2680a196f..ce52d15f5db 100644
--- a/python/cugraph/centrality/katz_centrality.py
+++ b/python/cugraph/centrality/katz_centrality.py
@@ -39,14 +39,16 @@ def katz_centrality(
Attenuation factor defaulted to None. If alpha is not specified then
it is internally calculated as 1/(degree_max) where degree_max is the
maximum out degree.
- NOTE : The maximum acceptable value of alpha for convergence
- alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue
- of the graph.
- Since lambda_max is always lesser than or equal to degree_max for a
- graph, alpha_max will always be greater than or equal to
- (1/degree_max). Therefore, setting alpha to (1/degree_max) will
- guarantee that it will never exceed alpha_max thus in turn fulfilling
- the requirement for convergence.
+
+ NOTE
+ The maximum acceptable value of alpha for convergence
+ alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue
+ of the graph.
+ Since lambda_max is always lesser than or equal to degree_max for a
+ graph, alpha_max will always be greater than or equal to
+ (1/degree_max). Therefore, setting alpha to (1/degree_max) will
+ guarantee that it will never exceed alpha_max thus in turn fulfilling
+ the requirement for convergence.
beta : None
A weight scalar - currently Not Supported
max_iter : int
diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py
index 72f33ebfcbb..df33f8b8e03 100644
--- a/python/cugraph/components/connectivity.py
+++ b/python/cugraph/components/connectivity.py
@@ -138,8 +138,10 @@ def weakly_connected_components(G,
directed : bool, optional
- NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
- TypeError if used with a Graph object.
+ NOTE
+ For non-Graph-type (eg. sparse matrix) values of G only.
+ Raises TypeError if used with a Graph object.
+
If True (default), then convert the input matrix to a cugraph.DiGraph
and only move from point i to point j along paths csgraph[i, j]. If
False, then find the shortest path on an undirected graph: the
@@ -154,8 +156,10 @@ def weakly_connected_components(G,
return_labels : bool, optional
- NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
- TypeError if used with a Graph object.
+ NOTE
+ For non-Graph-type (eg. sparse matrix) values of G only. Raises
+ TypeError if used with a Graph object.
+
If True (default), then return the labels for each of the connected
components.
@@ -231,8 +235,10 @@ def strongly_connected_components(G,
directed : bool, optional
- NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
- TypeError if used with a Graph object.
+ NOTE
+ For non-Graph-type (eg. sparse matrix) values of G only.
+ Raises TypeError if used with a Graph object.
+
If True (default), then convert the input matrix to a cugraph.DiGraph
and only move from point i to point j along paths csgraph[i, j]. If
False, then find the shortest path on an undirected graph: the
@@ -247,8 +253,10 @@ def strongly_connected_components(G,
return_labels : bool, optional
- NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
- TypeError if used with a Graph object.
+ NOTE
+ For non-Graph-type (eg. sparse matrix) values of G only. Raises
+ TypeError if used with a Graph object.
+
If True (default), then return the labels for each of the connected
components.
@@ -325,8 +333,10 @@ def connected_components(G,
directed : bool, optional
- NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
- TypeError if used with a Graph object.
+ NOTE
+ For non-Graph-type (eg. sparse matrix) values of G only. Raises
+ TypeError if used with a Graph object.
+
If True (default), then convert the input matrix to a cugraph.DiGraph
and only move from point i to point j along paths csgraph[i, j]. If
False, then find the shortest path on an undirected graph: the
@@ -340,8 +350,10 @@ def connected_components(G,
return_labels : bool, optional
- NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
- TypeError if used with a Graph object.
+ NOTE
+ For non-Graph-type (eg. sparse matrix) values of G only. Raises
+ TypeError if used with a Graph object.
+
If True (default), then return the labels for each of the connected
components.
diff --git a/python/cugraph/dask/centrality/katz_centrality.py b/python/cugraph/dask/centrality/katz_centrality.py
index a2f83a0b2a8..45deda8b7ae 100644
--- a/python/cugraph/dask/centrality/katz_centrality.py
+++ b/python/cugraph/dask/centrality/katz_centrality.py
@@ -68,14 +68,16 @@ def katz_centrality(input_graph,
Attenuation factor defaulted to None. If alpha is not specified then
it is internally calculated as 1/(degree_max) where degree_max is the
maximum out degree.
- NOTE : The maximum acceptable value of alpha for convergence
- alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue
- of the graph.
- Since lambda_max is always lesser than or equal to degree_max for a
- graph, alpha_max will always be greater than or equal to
- (1/degree_max). Therefore, setting alpha to (1/degree_max) will
- guarantee that it will never exceed alpha_max thus in turn fulfilling
- the requirement for convergence.
+
+ NOTE
+ The maximum acceptable value of alpha for convergence
+ alpha_max = 1/(lambda_max) where lambda_max is the largest eigenvalue
+ of the graph.
+ Since lambda_max is always lesser than or equal to degree_max for a
+ graph, alpha_max will always be greater than or equal to
+ (1/degree_max). Therefore, setting alpha to (1/degree_max) will
+ guarantee that it will never exceed alpha_max thus in turn fulfilling
+ the requirement for convergence.
beta : None
A weight scalar - currently Not Supported
max_iter : int
@@ -94,6 +96,7 @@ def katz_centrality(input_graph,
acceptable.
nstart : dask_cudf.Dataframe
GPU Dataframe containing the initial guess for katz centrality
+
nstart['vertex'] : dask_cudf.Series
Contains the vertex identifiers
nstart['values'] : dask_cudf.Series
diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py
index bfaada85a6f..fb9f4ad3a25 100644
--- a/python/cugraph/dask/link_analysis/pagerank.py
+++ b/python/cugraph/dask/link_analysis/pagerank.py
@@ -73,6 +73,7 @@ def pagerank(input_graph,
personalization : cudf.Dataframe
GPU Dataframe containing the personalization information.
Currently not supported.
+
personalization['vertex'] : cudf.Series
Subset of vertices of graph for personalization
personalization['values'] : cudf.Series
@@ -91,6 +92,7 @@ def pagerank(input_graph,
acceptable.
nstart : not supported
initial guess for pagerank
+
Returns
-------
PageRank : dask_cudf.DataFrame
diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py
index 0bb89195e01..8a03ee077f6 100644
--- a/python/cugraph/link_analysis/pagerank.py
+++ b/python/cugraph/link_analysis/pagerank.py
@@ -46,7 +46,6 @@ def pagerank(
Subset of vertices of graph for personalization
personalization['values'] : cudf.Series
Personalization values for vertices
-
max_iter : int
The maximum number of iterations before an answer is returned. This can
be used to limit the execution time and do an early exit before the
diff --git a/python/cugraph/sampling/__init__.py b/python/cugraph/sampling/__init__.py
new file mode 100644
index 00000000000..fd9d072d4f8
--- /dev/null
+++ b/python/cugraph/sampling/__init__.py
@@ -0,0 +1,14 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from cugraph.sampling.random_walks import random_walks
diff --git a/python/cugraph/sampling/random_walks.pxd b/python/cugraph/sampling/random_walks.pxd
new file mode 100644
index 00000000000..3e0e24b4e98
--- /dev/null
+++ b/python/cugraph/sampling/random_walks.pxd
@@ -0,0 +1,22 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
+
+cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
+ cdef unique_ptr[random_walk_ret_t] call_random_walks[vertex_t, edge_t](
+ const handle_t &handle,
+ const graph_container_t &g,
+ const vertex_t *ptr_d_start,
+ edge_t num_paths,
+ edge_t max_depth) except +
diff --git a/python/cugraph/sampling/random_walks.py b/python/cugraph/sampling/random_walks.py
new file mode 100644
index 00000000000..7ab3191a07c
--- /dev/null
+++ b/python/cugraph/sampling/random_walks.py
@@ -0,0 +1,95 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import cudf
+from cugraph.sampling import random_walks_wrapper
+import cugraph
+from collections import defaultdict
+
+# FIXME might be more efficient to return either (df + offset) or 3 cudf.Series
+
+
+def random_walks(
+ G,
+ start_vertices,
+ max_depth=None
+):
+ """
+ compute random walks for each nodes in 'start_vertices'
+
+ parameters
+ ----------
+ G : cuGraph.Graph or networkx.Graph
+ The graph can be either directed (DiGraph) or undirected (Graph).
+ Weights in the graph are ignored.
+ Use weight parameter if weights need to be considered
+ (currently not supported)
+
+ start_vertices : int or list or cudf.Series
+ A single node or a list or a cudf.Series of nodes from which to run
+ the random walks
+
+ max_depth : int
+ The maximum depth of the random walks
+
+
+ Returns
+ -------
+ random_walks_edge_lists : cudf.DataFrame
+ GPU data frame containing all random walks sources identifiers,
+ destination identifiers, edge weights
+
+ seeds_offsets: cudf.Series
+ Series containing the starting offset in the returned edge list
+ for each vertex in start_vertices.
+ """
+ if max_depth is None:
+ raise TypeError("must specify a 'max_depth'")
+
+ G, _ = cugraph.utilities.check_nx_graph(G)
+
+ if start_vertices is int:
+ start_vertices = [start_vertices]
+
+ if not isinstance(start_vertices, cudf.Series):
+ start_vertices = cudf.Series(start_vertices)
+
+ if G.renumbered is True:
+ start_vertices = G.lookup_internal_vertex_id(start_vertices)
+ vertex_set, edge_set, sizes = random_walks_wrapper.random_walks(
+ G, start_vertices, max_depth)
+
+ if G.renumbered:
+ df_ = cudf.DataFrame()
+ df_['vertex_set'] = vertex_set
+ df_ = G.unrenumber(df_, 'vertex_set', preserve_order=True)
+ vertex_set = cudf.Series(df_['vertex_set'])
+
+ edge_list = defaultdict(list)
+ next_path_idx = 0
+ offsets = [0]
+
+ df = cudf.DataFrame()
+ for s in sizes.values_host:
+ for i in range(next_path_idx, s+next_path_idx-1):
+ edge_list['src'].append(vertex_set.values_host[i])
+ edge_list['dst'].append(vertex_set.values_host[i+1])
+ next_path_idx += s
+ df = df.append(edge_list, ignore_index=True)
+ offsets.append(df.index[-1]+1)
+ edge_list['src'].clear()
+ edge_list['dst'].clear()
+ df['weight'] = edge_set
+ offsets = cudf.Series(offsets)
+
+ return df, offsets
diff --git a/python/cugraph/sampling/random_walks_wrapper.pyx b/python/cugraph/sampling/random_walks_wrapper.pyx
new file mode 100644
index 00000000000..7b16ff14018
--- /dev/null
+++ b/python/cugraph/sampling/random_walks_wrapper.pyx
@@ -0,0 +1,116 @@
+# Copyright (c) 2021, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from cugraph.sampling.random_walks cimport call_random_walks
+#from cugraph.structure.graph_primtypes cimport *
+from cugraph.structure.graph_utilities cimport *
+from libcpp cimport bool
+from libcpp.utility cimport move
+from libc.stdint cimport uintptr_t
+from cugraph.structure import graph_primtypes_wrapper
+import cudf
+import rmm
+import numpy as np
+import numpy.ctypeslib as ctypeslib
+from rmm._lib.device_buffer cimport DeviceBuffer
+from cudf.core.buffer import Buffer
+from cython.operator cimport dereference as deref
+def random_walks(input_graph, start_vertices, max_depth):
+ """
+ Call random_walks
+ """
+ # FIXME: Offsets and indices are currently hardcoded to int, but this may
+ # not be acceptable in the future.
+ numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type,
+ np.dtype("int64") : numberTypeEnum.int64Type,
+ np.dtype("float32") : numberTypeEnum.floatType,
+ np.dtype("double") : numberTypeEnum.doubleType}
+ [src, dst] = [input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']]
+ vertex_t = src.dtype
+ edge_t = np.dtype("int32")
+ weights = None
+ if input_graph.edgelist.weights:
+ weights = input_graph.edgelist.edgelist_df['weights']
+ num_verts = input_graph.number_of_vertices()
+ num_edges = input_graph.number_of_edges(directed_edges=True)
+ num_partition_edges = num_edges
+
+ if num_edges > (2**31 - 1):
+ edge_t = np.dtype("int64")
+ cdef unique_ptr[random_walk_ret_t] rw_ret_ptr
+
+ cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0]
+ cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0]
+ cdef uintptr_t c_edge_weights = NULL
+ if weights is not None:
+ c_edge_weights = weights.__cuda_array_interface__['data'][0]
+ weight_t = weights.dtype
+ is_weighted = True
+ else:
+ weight_t = np.dtype("float32")
+ is_weighted = False
+ # Pointers for random_walks
+ start_vertices = start_vertices.astype('int32')
+ cdef uintptr_t c_start_vertex_ptr = start_vertices.__cuda_array_interface__['data'][0]
+ num_paths = start_vertices.size
+ cdef unique_ptr[handle_t] handle_ptr
+ handle_ptr.reset(new handle_t())
+ handle_ = handle_ptr.get()
+ cdef graph_container_t graph_container
+ populate_graph_container(graph_container,
+ handle_[0],
+ c_src_vertices, c_dst_vertices, c_edge_weights,
+ NULL,
+ ((numberTypeMap[vertex_t])),
+ ((numberTypeMap[edge_t])),
+ ((numberTypeMap[weight_t])),
+ num_partition_edges,
+ num_verts,
+ num_edges,
+ False,
+ is_weighted,
+ False, False)
+ if(vertex_t == np.dtype("int32")):
+ if(edge_t == np.dtype("int32")):
+ rw_ret_ptr = move(call_random_walks[int, int]( deref(handle_),
+ graph_container,
+ c_start_vertex_ptr,
+ num_paths,
+ max_depth))
+ else: # (edge_t == np.dtype("int64")):
+ rw_ret_ptr = move(call_random_walks[int, long]( deref(handle_),
+ graph_container,
+ c_start_vertex_ptr,
+ num_paths,
+ max_depth))
+ else: # (vertex_t == edge_t == np.dtype("int64")):
+ rw_ret_ptr = move(call_random_walks[long, long]( deref(handle_),
+ graph_container,
+ c_start_vertex_ptr,
+ num_paths,
+ max_depth))
+
+
+ rw_ret= move(rw_ret_ptr.get()[0])
+ vertex_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_v_))
+ edge_set = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_coalesced_w_))
+ sizes = DeviceBuffer.c_from_unique_ptr(move(rw_ret.d_sizes_))
+ vertex_set = Buffer(vertex_set)
+ edge_set = Buffer(edge_set)
+ sizes = Buffer(sizes)
+
+ set_vertex = cudf.Series(data=vertex_set, dtype=vertex_t)
+ set_edge = cudf.Series(data=edge_set, dtype=weight_t)
+ set_sizes = cudf.Series(data=sizes, dtype=edge_t)
+
+ return set_vertex, set_edge, set_sizes
+
\ No newline at end of file
diff --git a/python/cugraph/structure/graph_utilities.pxd b/python/cugraph/structure/graph_utilities.pxd
index b169e42ccf8..c9cf1748bfe 100644
--- a/python/cugraph/structure/graph_utilities.pxd
+++ b/python/cugraph/structure/graph_utilities.pxd
@@ -83,6 +83,15 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython":
unique_ptr[device_buffer] dst_indices
unique_ptr[device_buffer] edge_data
unique_ptr[device_buffer] subgraph_offsets
+
+ cdef cppclass random_walk_ret_t:
+ size_t coalesced_sz_v_
+ size_t coalesced_sz_w_
+ size_t num_paths_
+ size_t max_depth_
+ unique_ptr[device_buffer] d_coalesced_v_
+ unique_ptr[device_buffer] d_coalesced_w_
+ unique_ptr[device_buffer] d_sizes_
cdef extern from "" namespace "std" nogil:
cdef device_buffer move(device_buffer)
diff --git a/python/cugraph/structure/number_map.py b/python/cugraph/structure/number_map.py
index e45a50d6dbe..cd24dfc0434 100644
--- a/python/cugraph/structure/number_map.py
+++ b/python/cugraph/structure/number_map.py
@@ -263,7 +263,6 @@ def indirection_map(self, ddf, src_col_names, dst_col_names):
to_frame(name=newname)
else:
tmp_df[newname] = tmp[newname].append(tmp_dst[oldname])
- print(tmp_df.columns)
else:
for newname in self.col_names:
tmp_df[newname] = tmp[newname]
@@ -273,7 +272,7 @@ def indirection_map(self, ddf, src_col_names, dst_col_names):
tmp_ddf = tmp_ddf.assign(idx=1)
tmp_ddf['global_id'] = tmp_ddf.idx.cumsum() - 1
tmp_ddf = tmp_ddf.drop(columns='idx')
-
+ tmp_ddf = tmp_ddf.persist()
self.ddf = tmp_ddf
return tmp_ddf
@@ -481,8 +480,6 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False,
renumber_type = 'legacy'
else:
renumber_type = 'experimental'
- df = df.rename(columns={src_col_names: "src",
- dst_col_names: "dst"})
renumber_map = NumberMap()
if not isinstance(src_col_names, list):
@@ -514,6 +511,9 @@ def renumber(df, src_col_names, dst_col_names, preserve_order=False,
df, "dst", dst_col_names, drop=True,
preserve_order=preserve_order
)
+ else:
+ df = df.rename(columns={src_col_names[0]: "src",
+ dst_col_names[0]: "dst"})
num_edges = len(df)
diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py
index 0f4ca90a97c..8720f7ad343 100644
--- a/python/cugraph/structure/symmetrize.py
+++ b/python/cugraph/structure/symmetrize.py
@@ -32,6 +32,7 @@ def symmetrize_df(df, src_name, dst_name, multi=False, symmetrize=True):
!= data2 then this code will arbitrarily pick the smaller data
element to keep, if this is not desired then the caller should
should correct the data prior to calling symmetrize.
+
Parameters
----------
df : cudf.DataFrame
diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py
index 6e1e5ea380a..02696f589e3 100644
--- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py
+++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py
@@ -51,7 +51,8 @@
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS)
+@pytest.mark.parametrize("graph_file", DATASETS,
+ ids=[f"dataset={d.as_posix()}" for d in DATASETS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py
index 54b58c340aa..89844797807 100644
--- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py
+++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -48,7 +48,8 @@
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS)
+@pytest.mark.parametrize("graph_file", DATASETS,
+ ids=[f"dataset={d}" for d in DATASETS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS)
@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS)
diff --git a/python/cugraph/tests/dask/test_mg_bfs.py b/python/cugraph/tests/dask/test_mg_bfs.py
index 63580461b17..36d1f436b52 100644
--- a/python/cugraph/tests/dask/test_mg_bfs.py
+++ b/python/cugraph/tests/dask/test_mg_bfs.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -35,7 +35,10 @@ def client_connection():
def test_dask_bfs(client_connection):
gc.collect()
+ # FIXME: update this to allow dataset to be parameterized and have dataset
+ # part of test param id (see other tests)
input_data_path = r"../datasets/netscience.csv"
+ print(f"dataset={input_data_path}")
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_comms.py b/python/cugraph/tests/dask/test_mg_comms.py
index 61a4944b5f1..03a0a5d73d2 100644
--- a/python/cugraph/tests/dask/test_mg_comms.py
+++ b/python/cugraph/tests/dask/test_mg_comms.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -38,10 +38,14 @@ def test_dask_pagerank(client_connection):
# Initialize and run pagerank on two distributed graphs
# with same communicator
+ # FIXME: update this to allow dataset to be parameterized and have dataset
+ # part of test param id (see other tests)
input_data_path1 = r"../datasets/karate.csv"
+ print(f"dataset1={input_data_path1}")
chunksize1 = dcg.get_chunksize(input_data_path1)
input_data_path2 = r"../datasets/dolphins.csv"
+ print(f"dataset2={input_data_path2}")
chunksize2 = dcg.get_chunksize(input_data_path2)
ddf1 = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py
index 9f4c0d94319..93e8a365dea 100644
--- a/python/cugraph/tests/dask/test_mg_degree.py
+++ b/python/cugraph/tests/dask/test_mg_degree.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -34,7 +34,10 @@ def client_connection():
def test_dask_mg_degree(client_connection):
gc.collect()
+ # FIXME: update this to allow dataset to be parameterized and have dataset
+ # part of test param id (see other tests)
input_data_path = r"../datasets/karate.csv"
+ print(f"dataset={input_data_path}")
chunksize = cugraph.dask.get_chunksize(input_data_path)
diff --git a/python/cugraph/tests/dask/test_mg_katz_centrality.py b/python/cugraph/tests/dask/test_mg_katz_centrality.py
index 631457f7558..eadf0f662d4 100644
--- a/python/cugraph/tests/dask/test_mg_katz_centrality.py
+++ b/python/cugraph/tests/dask/test_mg_katz_centrality.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -36,7 +36,10 @@ def client_connection():
def test_dask_katz_centrality(client_connection):
gc.collect()
+ # FIXME: update this to allow dataset to be parameterized and have dataset
+ # part of test param id (see other tests)
input_data_path = r"../datasets/karate.csv"
+ print(f"dataset={input_data_path}")
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py
index a07eede8cb9..bd7374fb75e 100644
--- a/python/cugraph/tests/dask/test_mg_louvain.py
+++ b/python/cugraph/tests/dask/test_mg_louvain.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -52,7 +52,10 @@ def client_connection():
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.fixture(scope="module", params=utils.DATASETS_UNDIRECTED)
+@pytest.fixture(scope="module",
+ params=utils.DATASETS_UNDIRECTED,
+ ids=[f"dataset={d.as_posix()}"
+ for d in utils.DATASETS_UNDIRECTED])
def daskGraphFromDataset(request, client_connection):
"""
Returns a new dask dataframe created from the dataset file param.
diff --git a/python/cugraph/tests/dask/test_mg_pagerank.py b/python/cugraph/tests/dask/test_mg_pagerank.py
index 4f0b45242dd..9cb00010311 100644
--- a/python/cugraph/tests/dask/test_mg_pagerank.py
+++ b/python/cugraph/tests/dask/test_mg_pagerank.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -65,7 +65,10 @@ def client_connection():
def test_dask_pagerank(client_connection, personalization_perc):
gc.collect()
+ # FIXME: update this to allow dataset to be parameterized and have dataset
+ # part of test param id (see other tests)
input_data_path = r"../datasets/karate.csv"
+ print(f"dataset={input_data_path}")
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py
index 7f5cf6f08bc..68ec3de35f8 100644
--- a/python/cugraph/tests/dask/test_mg_renumber.py
+++ b/python/cugraph/tests/dask/test_mg_renumber.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019-2020, NVIDIA CORPORATION.
+# Copyright (c) 2019-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -38,11 +38,12 @@ def client_connection():
teardown_local_dask_cluster(cluster, client)
-# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED)
+@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED,
+ ids=[f"dataset={d.as_posix()}"
+ for d in utils.DATASETS_UNRENUMBERED])
def test_mg_renumber(graph_file, client_connection):
gc.collect()
@@ -60,71 +61,37 @@ def test_mg_renumber(graph_file, client_connection):
ddf = dask.dataframe.from_pandas(gdf, npartitions=2)
- numbering = NumberMap()
- numbering.from_dataframe(ddf, ["src", "src_old"], ["dst", "dst_old"])
- renumbered_df = numbering.add_internal_vertex_id(
- numbering.add_internal_vertex_id(ddf, "src_id", ["src", "src_old"]),
- "dst_id",
- ["dst", "dst_old"],
- )
-
- check_src = numbering.from_internal_vertex_id(
- renumbered_df, "src_id"
- ).compute()
- check_dst = numbering.from_internal_vertex_id(
- renumbered_df, "dst_id"
- ).compute()
-
- assert check_src["0"].to_pandas().equals(check_src["src"].to_pandas())
- assert check_src["1"].to_pandas().equals(check_src["src_old"].to_pandas())
- assert check_dst["0"].to_pandas().equals(check_dst["dst"].to_pandas())
- assert check_dst["1"].to_pandas().equals(check_dst["dst_old"].to_pandas())
-
-
-# Test all combinations of default/managed and pooled/non-pooled allocation
-@pytest.mark.skipif(
- is_single_gpu(), reason="skipping MG testing on Single GPU system"
-)
-@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED)
-def test_mg_renumber2(graph_file, client_connection):
- gc.collect()
-
- M = utils.read_csv_for_nx(graph_file)
- sources = cudf.Series(M["0"])
- destinations = cudf.Series(M["1"])
-
- translate = 1000
-
- gdf = cudf.DataFrame()
- gdf["src_old"] = sources
- gdf["dst_old"] = destinations
- gdf["src"] = sources + translate
- gdf["dst"] = destinations + translate
- gdf["weight"] = gdf.index.astype(np.float)
-
- ddf = dask.dataframe.from_pandas(gdf, npartitions=2)
-
- ren2, num2 = NumberMap.renumber(
- ddf, ["src", "src_old"], ["dst", "dst_old"]
- )
-
- check_src = num2.from_internal_vertex_id(ren2, "src").compute()
- check_src = check_src.sort_values("weight").reset_index(drop=True)
- check_dst = num2.from_internal_vertex_id(ren2, "dst").compute()
- check_dst = check_dst.sort_values("weight").reset_index(drop=True)
-
- assert check_src["0"].to_pandas().equals(gdf["src"].to_pandas())
- assert check_src["1"].to_pandas().equals(gdf["src_old"].to_pandas())
- assert check_dst["0"].to_pandas().equals(gdf["dst"].to_pandas())
- assert check_dst["1"].to_pandas().equals(gdf["dst_old"].to_pandas())
+ # preserve_order is not supported for MG
+ renumbered_df, renumber_map = NumberMap.renumber(ddf,
+ ["src", "src_old"],
+ ["dst", "dst_old"],
+ preserve_order=False)
+ unrenumbered_df = renumber_map.unrenumber(renumbered_df, "src",
+ preserve_order=False)
+ unrenumbered_df = renumber_map.unrenumber(unrenumbered_df, "dst",
+ preserve_order=False)
+
+ # sort needed only for comparisons, since preserve_order is False
+ gdf = gdf.sort_values(by=["src", "src_old", "dst", "dst_old"])
+ gdf = gdf.reset_index()
+ unrenumbered_df = unrenumbered_df.compute()
+ unrenumbered_df = unrenumbered_df.sort_values(by=["0_src", "1_src",
+ "0_dst", "1_dst"])
+ unrenumbered_df = unrenumbered_df.reset_index()
+
+ assert gdf["src"].equals(unrenumbered_df["0_src"])
+ assert gdf["src_old"].equals(unrenumbered_df["1_src"])
+ assert gdf["dst"].equals(unrenumbered_df["0_dst"])
+ assert gdf["dst_old"].equals(unrenumbered_df["1_dst"])
-# Test all combinations of default/managed and pooled/non-pooled allocation
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED)
-def test_mg_renumber3(graph_file, client_connection):
+@pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED,
+ ids=[f"dataset={d.as_posix()}"
+ for d in utils.DATASETS_UNRENUMBERED])
+def test_mg_renumber_add_internal_vertex_id(graph_file, client_connection):
gc.collect()
M = utils.read_csv_for_nx(graph_file)
diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py
index bb43d6c0f7a..3974cf9ed82 100644
--- a/python/cugraph/tests/dask/test_mg_replication.py
+++ b/python/cugraph/tests/dask/test_mg_replication.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -34,7 +34,9 @@
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS)
+@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_replicate_cudf_dataframe_with_weights(
input_data_path, mg_device_count
@@ -60,7 +62,9 @@ def test_replicate_cudf_dataframe_with_weights(
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS)
+@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count):
gc.collect()
@@ -84,7 +88,9 @@ def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count):
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS)
+@pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_replicate_cudf_series(input_data_path, mg_device_count):
gc.collect()
@@ -114,7 +120,9 @@ def test_replicate_cudf_series(input_data_path, mg_device_count):
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_enable_batch_no_context(graph_file, directed, mg_device_count):
@@ -129,7 +137,9 @@ def test_enable_batch_no_context(graph_file, directed, mg_device_count):
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_enable_batch_no_context_view_adj(
@@ -145,7 +155,9 @@ def test_enable_batch_no_context_view_adj(
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_enable_batch_context_then_views(
@@ -174,7 +186,9 @@ def test_enable_batch_context_then_views(
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_enable_batch_view_then_context(graph_file, directed, mg_device_count):
@@ -205,7 +219,9 @@ def test_enable_batch_view_then_context(graph_file, directed, mg_device_count):
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_enable_batch_context_no_context_views(
@@ -230,7 +246,9 @@ def test_enable_batch_context_no_context_views(
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_enable_batch_edgelist_replication(
@@ -251,7 +269,9 @@ def test_enable_batch_edgelist_replication(
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_enable_batch_adjlist_replication_weights(
@@ -293,7 +313,9 @@ def test_enable_batch_adjlist_replication_weights(
@pytest.mark.skipif(
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
-@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS)
+@pytest.mark.parametrize("graph_file", DATASETS_OPTIONS,
+ ids=[f"dataset={d.as_posix()}"
+ for d in DATASETS_OPTIONS])
@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS)
def test_enable_batch_adjlist_replication_no_weights(
diff --git a/python/cugraph/tests/dask/test_mg_sssp.py b/python/cugraph/tests/dask/test_mg_sssp.py
index d75d76d7fd4..9e1fd1ec82f 100644
--- a/python/cugraph/tests/dask/test_mg_sssp.py
+++ b/python/cugraph/tests/dask/test_mg_sssp.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2020, NVIDIA CORPORATION.
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -35,7 +35,10 @@ def client_connection():
def test_dask_sssp(client_connection):
gc.collect()
+ # FIXME: update this to allow dataset to be parameterized and have dataset
+ # part of test param id (see other tests)
input_data_path = r"../datasets/netscience.csv"
+ print(f"dataset={input_data_path}")
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py
index 3217c1bef1a..150fa0137f5 100644
--- a/python/cugraph/tests/dask/test_mg_utility.py
+++ b/python/cugraph/tests/dask/test_mg_utility.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2018-2020, NVIDIA CORPORATION.
+# Copyright (c) 2018-2021, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -46,7 +46,10 @@ def client_connection():
is_single_gpu(), reason="skipping MG testing on Single GPU system"
)
def test_from_edgelist(client_connection):
+ # FIXME: update this to allow dataset to be parameterized and have dataset
+ # part of test param id (see other tests)
input_data_path = r"../datasets/karate.csv"
+ print(f"dataset={input_data_path}")
chunksize = dcg.get_chunksize(input_data_path)
ddf = dask_cudf.read_csv(
input_data_path,
diff --git a/python/cugraph/tests/test_random_walks.py b/python/cugraph/tests/test_random_walks.py
new file mode 100644
index 00000000000..9767e81ba1f
--- /dev/null
+++ b/python/cugraph/tests/test_random_walks.py
@@ -0,0 +1,154 @@
+# Copyright (c) 2020-2021, NVIDIA CORPORATION.:
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import gc
+
+import pytest
+
+from cugraph.tests import utils
+import cugraph
+import random
+
+
+# =============================================================================
+# Parameters
+# =============================================================================
+DIRECTED_GRAPH_OPTIONS = [False, True]
+WEIGHTED_GRAPH_OPTIONS = [False, True]
+DATASETS = [pytest.param(d) for d in utils.DATASETS]
+DATASETS_SMALL = [pytest.param(d) for d in utils.DATASETS_SMALL]
+
+
+def calc_random_walks(
+ graph_file,
+ directed=False,
+ max_depth=None
+):
+ """
+ compute random walks for each nodes in 'start_vertices'
+
+ parameters
+ ----------
+ G : cuGraph.Graph or networkx.Graph
+ The graph can be either directed (DiGraph) or undirected (Graph).
+ Weights in the graph are ignored.
+ Use weight parameter if weights need to be considered
+ (currently not supported)
+
+ start_vertices : int or list or cudf.Series
+ A single node or a list or a cudf.Series of nodes from which to run
+ the random walks
+
+ max_depth : int
+ The maximum depth of the random walks
+
+
+ Returns
+ -------
+ random_walks_edge_lists : cudf.DataFrame
+ GPU data frame containing all random walks sources identifiers,
+ destination identifiers, edge weights
+
+ seeds_offsets: cudf.Series
+ Series containing the starting offset in the returned edge list
+ for each vertex in start_vertices.
+ """
+ G = utils.generate_cugraph_graph_from_file(
+ graph_file, directed=directed, edgevals=True)
+ assert G is not None
+
+ k = random.randint(1, 10)
+ start_vertices = random.sample(range(G.number_of_vertices()), k)
+ df, offsets = cugraph.random_walks(G, start_vertices, max_depth)
+
+ return df, offsets, start_vertices
+
+
+def check_random_walks(df, offsets, seeds, df_G=None):
+ invalid_edge = 0
+ invalid_seeds = 0
+ invalid_weight = 0
+ offsets_idx = 0
+ for i in range(len(df.index)):
+ src, dst, weight = df.iloc[i].to_array()
+ if i == offsets[offsets_idx]:
+ if df['src'].iloc[i] != seeds[offsets_idx]:
+ invalid_seeds += 1
+ print(
+ "[ERR] Invalid seed: "
+ " src {} != src {}"
+ .format(df['src'].iloc[i], offsets[offsets_idx])
+ )
+ offsets_idx += 1
+
+ edge = df.loc[(df['src'] == (src)) & (df['dst'] == (dst))].reset_index(
+ drop=True)
+ exp_edge = df_G.loc[
+ (df_G['src'] == (src)) & (
+ df_G['dst'] == (dst))].reset_index(drop=True)
+
+ if not exp_edge.equals(edge[:1]):
+ print(
+ "[ERR] Invalid edge: "
+ "There is no edge src {} dst {} weight {}"
+ .format(src, dst, weight)
+ )
+ invalid_weight += 1
+
+ assert invalid_edge == 0
+ assert invalid_seeds == 0
+ assert invalid_weight == 0
+
+# =============================================================================
+# Pytest Setup / Teardown - called for each test function
+# =============================================================================
+
+
+def prepare_test():
+ gc.collect()
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+@pytest.mark.parametrize("max_depth", [None])
+def test_random_walks_invalid_max_dept(
+ graph_file,
+ directed,
+ max_depth
+):
+ """Test calls random_walks an invalid type"""
+ prepare_test()
+ with pytest.raises(TypeError):
+ df, offsets, seeds = calc_random_walks(
+ graph_file,
+ directed=directed,
+ max_depth=max_depth
+ )
+
+
+@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL)
+@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS)
+def test_random_walks(
+ graph_file,
+ directed
+):
+ max_depth = random.randint(2, 10)
+ df_G = utils.read_csv_file(graph_file)
+ df_G.rename(
+ columns={"0": "src", "1": "dst", "2": "weight"}, inplace=True)
+ df, offsets, seeds = calc_random_walks(
+ graph_file,
+ directed,
+ max_depth=max_depth
+ )
+ check_random_walks(df, offsets, seeds, df_G)
diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py
index efbae095676..a483b96850b 100644
--- a/python/cugraph/traversal/bfs.py
+++ b/python/cugraph/traversal/bfs.py
@@ -136,8 +136,10 @@ def bfs(G,
can be set, not both.
directed : bool, optional
- NOTE: For non-Graph-type (eg. sparse matrix) values of G only. Raises
- TypeError if used with a Graph object.
+ NOTE
+ For non-Graph-type (eg. sparse matrix) values of G only. Raises
+ TypeError if used with a Graph object.
+
If True (default), then convert the input matrix to a cugraph.DiGraph,
otherwise a cugraph.Graph object will be used.
diff --git a/python/cugraph/traversal/traveling_salesperson.py b/python/cugraph/traversal/traveling_salesperson.py
index ae17555e4ea..7aea7ae603f 100644
--- a/python/cugraph/traversal/traveling_salesperson.py
+++ b/python/cugraph/traversal/traveling_salesperson.py
@@ -29,6 +29,7 @@ def traveling_salesperson(pos_list,
optimization.
The current implementation does not support a weighted graph.
+
Parameters
----------
pos_list: cudf.DataFrame