From 1178aed914a53c8adc69a363ac8fec6b06cde663 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Wed, 26 Aug 2020 17:52:05 -0500 Subject: [PATCH 01/74] Updated CHANGELOG.md. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7594c03f6c..9409f54c18e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # cuGraph 0.16.0 (Date TBD) ## New Features +- PR #1124 Sub-communicator initialization for 2D partitioning support ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree From b68d72f79c45ea6b8318d94bcaa9932b293b696f Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Thu, 27 Aug 2020 18:50:02 -0500 Subject: [PATCH 02/74] Proto interface and implementation. --- cpp/include/partition_manager.hpp | 138 ++++++++++++++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 cpp/include/partition_manager.hpp diff --git a/cpp/include/partition_manager.hpp b/cpp/include/partition_manager.hpp new file mode 100644 index 00000000000..ef4d69ffdd6 --- /dev/null +++ b/cpp/include/partition_manager.hpp @@ -0,0 +1,138 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cugraph { +namespace partition_2d { + +template +std::string to_string(from_t const& value) +{ + std::stringstream ss; + ss << value; + return ss.str(); +} + +// default key-naming mechanism: +// +struct key_naming_t { + key_naming_t(int row_indx, int col_indx) + : name_(prefix_ + "_" + to_string(row_indx) + "_" + to_string(col_indx)) + { + } + + std::string col_name(void) const { return std::string("col_") + name_; } + + std::string row_name(void) const { return std::string("row_") + name_; } + + private: + std::string const prefix_{"partition"}; + std::string name_; +}; + +template +using matrix_t = std::vector>; + +using pair_comms_t = + std::pair, std::shared_ptr>; + +template +class partition_manager_t { + public: + partition_manager_t(raft::handle_t& handle, size_t p_row_size, size_t p_col_size) + : handle_(handle), p_row_size_(p_row_size), p_col_size_(p_col_size) + { + init_comms(); + } + + partition_manager_t(raft::handle_t const& handle, size_t p_size) : handle_(handle) + { + partition2d(p_size); + init_comms(); + } + + virtual ~partition_manager_t(void) {} + + protected: + virtual void partition2d(size_t p_size) + { + auto sqr = static_cast(std::sqrt(p_size)); + + // find divisor of p_size + // nearest to sqr; + // + p_row_size_ = nearest_divisor(sqr, p_size); + p_col_size_ = p_size / p_row_size_; + + assert(p_row_size_ > 1 && p_col_size_ > 1); + } + + void init_comms(void) + { + std::vector empty_row(p_col_size_, std::make_pair(nullptr, nullptr)); + comms_set_.assign(p_row_size_, empty_row); + + comms_t const& communicator = handle_.get_comms(); + + for (size_t row = 0; row < p_row_size_; ++row) + for (size_t col = 0; col < p_col_size_; ++col) { + key_name_policy_t key{row, col}; + + // TODO: + int row_color; // = ????? + int row_key; // = ????? + + int col_color; // = ????? + int col_key; // = ????? + + auto shared_row_comm = + std::make_shared(communicator.comm_split(row_color, row_key)); + handle_.set_subcomm(key.row_name(), shared_row_comm); + + auto shared_col_comm = + std::make_shared(communicator.comm_split(col_color, col_key)); + handle_.set_subcomm(key.col_name(), shared_col_comm); + + comms_set_[row][col] = std::make_pair(shared_row_comm, shared_col_comm); + } + } + + private: + raft::handle_t& handle_; + size_t p_row_size_; + size_t p_col_size_; + matrix_t comms_set_; + + decltype(auto) nearest_divisor(size_t sqr, size_t p_size) + { + // TODO: + return sqr; // for now... + } +}; + +} // namespace partition_2d +} // namespace cugraph From 19c64b5be6970e9d76ce4d07aaca990912f59595 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Fri, 28 Aug 2020 12:22:41 -0500 Subject: [PATCH 03/74] Updated partition_manager_t functionality. --- cpp/include/partition_manager.hpp | 82 ++++++++++++++++++++----------- 1 file changed, 52 insertions(+), 30 deletions(-) diff --git a/cpp/include/partition_manager.hpp b/cpp/include/partition_manager.hpp index ef4d69ffdd6..af420eb19af 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/partition_manager.hpp @@ -28,7 +28,6 @@ namespace cugraph { namespace partition_2d { - template std::string to_string(from_t const& value) { @@ -40,17 +39,26 @@ std::string to_string(from_t const& value) // default key-naming mechanism: // struct key_naming_t { - key_naming_t(int row_indx, int col_indx) - : name_(prefix_ + "_" + to_string(row_indx) + "_" + to_string(col_indx)) + key_naming_t(int row_indx, + int col_indx, + std::string const& col_suffix = std::string("_col"), + std::string const& row_suffix = std::string("_row"), + std::string const& prefix = std::string("partition")) + : col_suffix_(col_suffix), + row_suffix_(row_suffix), + prefix_(prefix), + name_(prefix_ + "_" + to_string(row_indx) + "_" + to_string(col_indx)) { } - std::string col_name(void) const { return std::string("col_") + name_; } + std::string col_name(void) const { return name_ + col_suffix_; } - std::string row_name(void) const { return std::string("row_") + name_; } + std::string row_name(void) const { return name_ + row_suffix_; } private: - std::string const prefix_{"partition"}; + std::string const col_suffix_; + std::string const row_suffix_; + std::string const prefix_; std::string name_; }; @@ -60,16 +68,20 @@ using matrix_t = std::vector>; using pair_comms_t = std::pair, std::shared_ptr>; -template +enum class colors_2d_t : int { ROW = 0, COL = 1 }; + +// class responsible for creating 2D partition sub-comms: +// +template class partition_manager_t { public: - partition_manager_t(raft::handle_t& handle, size_t p_row_size, size_t p_col_size) + partition_manager_t(raft::handle_t& handle, size_type p_row_size, size_type p_col_size) : handle_(handle), p_row_size_(p_row_size), p_col_size_(p_col_size) { init_comms(); } - partition_manager_t(raft::handle_t const& handle, size_t p_size) : handle_(handle) + partition_manager_t(raft::handle_t const& handle, size_type p_size) : handle_(handle) { partition2d(p_size); init_comms(); @@ -77,10 +89,12 @@ class partition_manager_t { virtual ~partition_manager_t(void) {} + matrix_t const& comms_matrix(void) const { return comms_set_; } + protected: - virtual void partition2d(size_t p_size) + virtual void partition2d(size_type p_size) { - auto sqr = static_cast(std::sqrt(p_size)); + auto sqr = static_cast(std::sqrt(p_size)); // find divisor of p_size // nearest to sqr; @@ -96,43 +110,51 @@ class partition_manager_t { std::vector empty_row(p_col_size_, std::make_pair(nullptr, nullptr)); comms_set_.assign(p_row_size_, empty_row); - comms_t const& communicator = handle_.get_comms(); + raft::comms::comms_t const& communicator = handle_.get_comms(); - for (size_t row = 0; row < p_row_size_; ++row) - for (size_t col = 0; col < p_col_size_; ++col) { + for (size_type row = 0; row < p_row_size_; ++row) + for (size_type col = 0; col < p_col_size_; ++col) { key_name_policy_t key{row, col}; - // TODO: - int row_color; // = ????? - int row_key; // = ????? + // comm_slpit() on same key=linear_key, + // but different colors for row and column + // + // TODO: check if this assummed + // functionality is correct + // + size_type linear_key{p_col_size_ * row + col}; - int col_color; // = ????? - int col_key; // = ????? - - auto shared_row_comm = - std::make_shared(communicator.comm_split(row_color, row_key)); + auto shared_row_comm = std::make_shared( + communicator.comm_split(static_cast(colors_2d_t::ROW), linear_key)); handle_.set_subcomm(key.row_name(), shared_row_comm); - auto shared_col_comm = - std::make_shared(communicator.comm_split(col_color, col_key)); + auto shared_col_comm = std::make_shared( + communicator.comm_split(static_cast(colors_2d_t::COL), linear_key)); handle_.set_subcomm(key.col_name(), shared_col_comm); + // Also store in a matrix of comms_t; + // this may be redundant, but useful; + // TODO: check if this is okay... + // comms_set_[row][col] = std::make_pair(shared_row_comm, shared_col_comm); } } private: raft::handle_t& handle_; - size_t p_row_size_; - size_t p_col_size_; + size_type p_row_size_; + size_type p_col_size_; matrix_t comms_set_; - decltype(auto) nearest_divisor(size_t sqr, size_t p_size) + static decltype(auto) nearest_divisor(size_type sqr, size_type p_size) { - // TODO: - return sqr; // for now... + assert(sqr > 0); + + for (size_type div = sqr; div > 0; --div) { + auto p_div = p_size % div; + if (p_div == 0) return div; + } } }; - } // namespace partition_2d } // namespace cugraph From 553d02ef80db7873516f66a98c16ce8663ade807 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Mon, 31 Aug 2020 17:54:36 -0500 Subject: [PATCH 04/74] Created per-worker subcomm_factory_t to fix erroneous implementation. --- cpp/include/partition_manager.hpp | 155 ++++++++++++++++-------------- 1 file changed, 83 insertions(+), 72 deletions(-) diff --git a/cpp/include/partition_manager.hpp b/cpp/include/partition_manager.hpp index af420eb19af..32d935dbf7d 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/partition_manager.hpp @@ -36,61 +36,29 @@ std::string to_string(from_t const& value) return ss.str(); } -// default key-naming mechanism: +// class responsible for creating 2D partition of workers: +// responsible with finding appropriate P_ROW x P_COL +// 2D partition and initializing the raft::handle_t communicator // -struct key_naming_t { - key_naming_t(int row_indx, - int col_indx, - std::string const& col_suffix = std::string("_col"), - std::string const& row_suffix = std::string("_row"), - std::string const& prefix = std::string("partition")) - : col_suffix_(col_suffix), - row_suffix_(row_suffix), - prefix_(prefix), - name_(prefix_ + "_" + to_string(row_indx) + "_" + to_string(col_indx)) - { - } - - std::string col_name(void) const { return name_ + col_suffix_; } - - std::string row_name(void) const { return name_ + row_suffix_; } - - private: - std::string const col_suffix_; - std::string const row_suffix_; - std::string const prefix_; - std::string name_; -}; - -template -using matrix_t = std::vector>; - -using pair_comms_t = - std::pair, std::shared_ptr>; - -enum class colors_2d_t : int { ROW = 0, COL = 1 }; - -// class responsible for creating 2D partition sub-comms: +// (this might be removed; or, it might exist already) // -template +template class partition_manager_t { public: partition_manager_t(raft::handle_t& handle, size_type p_row_size, size_type p_col_size) : handle_(handle), p_row_size_(p_row_size), p_col_size_(p_col_size) { - init_comms(); + init_communicator(); } partition_manager_t(raft::handle_t const& handle, size_type p_size) : handle_(handle) { partition2d(p_size); - init_comms(); + init_communicator(); } virtual ~partition_manager_t(void) {} - matrix_t const& comms_matrix(void) const { return comms_set_; } - protected: virtual void partition2d(size_type p_size) { @@ -105,46 +73,15 @@ class partition_manager_t { assert(p_row_size_ > 1 && p_col_size_ > 1); } - void init_comms(void) + virtual void init_communicator(void) { - std::vector empty_row(p_col_size_, std::make_pair(nullptr, nullptr)); - comms_set_.assign(p_row_size_, empty_row); - - raft::comms::comms_t const& communicator = handle_.get_comms(); - - for (size_type row = 0; row < p_row_size_; ++row) - for (size_type col = 0; col < p_col_size_; ++col) { - key_name_policy_t key{row, col}; - - // comm_slpit() on same key=linear_key, - // but different colors for row and column - // - // TODO: check if this assummed - // functionality is correct - // - size_type linear_key{p_col_size_ * row + col}; - - auto shared_row_comm = std::make_shared( - communicator.comm_split(static_cast(colors_2d_t::ROW), linear_key)); - handle_.set_subcomm(key.row_name(), shared_row_comm); - - auto shared_col_comm = std::make_shared( - communicator.comm_split(static_cast(colors_2d_t::COL), linear_key)); - handle_.set_subcomm(key.col_name(), shared_col_comm); - - // Also store in a matrix of comms_t; - // this may be redundant, but useful; - // TODO: check if this is okay... - // - comms_set_[row][col] = std::make_pair(shared_row_comm, shared_col_comm); - } + // TODO: init's handle's communicator (singleton?) } private: raft::handle_t& handle_; size_type p_row_size_; size_type p_col_size_; - matrix_t comms_set_; static decltype(auto) nearest_divisor(size_type sqr, size_type p_size) { @@ -156,5 +93,79 @@ class partition_manager_t { } } }; + +// default key-naming mechanism: +// +struct key_naming_t { + key_naming_t(int row_indx, + int col_indx, + std::string const& col_suffix = std::string("_col"), + std::string const& row_suffix = std::string("_row"), + std::string const& prefix = std::string("partition")) + : col_suffix_(col_suffix), + row_suffix_(row_suffix), + prefix_(prefix), + name_(prefix_ + "_" + to_string(row_indx) + "_" + to_string(col_indx)) + { + } + + std::string col_name(void) const { return name_ + col_suffix_; } + + std::string row_name(void) const { return name_ + row_suffix_; } + + private: + std::string const col_suffix_; + std::string const row_suffix_; + std::string const prefix_; + std::string name_; +}; + +using pair_comms_t = + std::pair, std::shared_ptr>; + +enum class key_2d_t : int { ROW = 0, COL = 1 }; + +// class responsible for creating 2D partition sub-comms: +// this is instantiated by each worker (processing element, PE) +// for the row/column it belongs to; +// +template +class subcomm_factory_t { + public: + subcomm_factory_t(raft::handle_t& handle, size_type p_row_index, size_type p_col_index) + : handle_(handle), row_index_(p_row_index), col_index_(p_col_index) + { + init_row_col_comms(); + } + virtual ~subcomm_factory_t(void) {} + + protected: + virtual void init_row_col_comms(void) + { + name_policy_t key{row_index_, col_index_}; + raft::comms::comms_t const& communicator = handle_.get_comms(); + + int const rank = communicator.get_rank(); + int row_color = rank / row_index_; + int col_color = rank % row_index_; + + auto row_comm = std::make_shared( + communicator.comm_split(row_color, static_cast(key_2d_t::ROW))); + handle_.set_subcomm(key.row_name(), row_comm); + + auto col_comm = std::make_shared( + communicator.comm_split(col_color, static_cast(key_2d_t::COL))); + handle_.set_subcomm(key.col_name(), col_comm); + + row_col_subcomms_.first = row_comm; + row_col_subcomms_.second = col_comm; + } + + private: + raft::handle_t& handle_; + size_type row_index_; + size_type col_index_; + pair_comms_t row_col_subcomms_; +}; } // namespace partition_2d } // namespace cugraph From 9aeed09db71faec5bd69d03cf5972d19ab904a4c Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 2 Sep 2020 20:51:34 -0400 Subject: [PATCH 05/74] restructure louvain/leiden in preparation for MNMG work --- cpp/CMakeLists.txt | 6 +- cpp/include/graph.hpp | 4 + cpp/src/community/leiden.cpp | 50 -- cpp/src/community/leiden.cu | 67 ++ cpp/src/community/leiden.cuh | 163 ++++ cpp/src/community/leiden_kernels.cu | 299 ------- cpp/src/community/leiden_kernels.hpp | 35 - cpp/src/community/louvain.cpp | 52 -- cpp/src/community/louvain.cu | 67 ++ cpp/src/community/louvain.cuh | 637 +++++++++++++++ cpp/src/community/louvain_kernels.cu | 746 ------------------ cpp/src/community/louvain_kernels.hpp | 97 --- cpp/src/community/triangles_counting.cu | 4 +- cpp/tests/CMakeLists.txt | 2 +- .../{louvain_test.cpp => louvain_test.cu} | 6 +- python/cugraph/tests/test_louvain.py | 16 +- 16 files changed, 956 insertions(+), 1295 deletions(-) delete mode 100644 cpp/src/community/leiden.cpp create mode 100644 cpp/src/community/leiden.cu create mode 100644 cpp/src/community/leiden.cuh delete mode 100644 cpp/src/community/leiden_kernels.cu delete mode 100644 cpp/src/community/leiden_kernels.hpp delete mode 100644 cpp/src/community/louvain.cpp create mode 100644 cpp/src/community/louvain.cu create mode 100644 cpp/src/community/louvain.cuh delete mode 100644 cpp/src/community/louvain_kernels.cu delete mode 100644 cpp/src/community/louvain_kernels.hpp rename cpp/tests/community/{louvain_test.cpp => louvain_test.cu} (99%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c260563446e..2de15047a1c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -344,10 +344,8 @@ add_library(cugraph SHARED src/converters/renumber.cu src/converters/COOtoCSR.cu src/community/spectral_clustering.cu - src/community/louvain.cpp - src/community/louvain_kernels.cu - src/community/leiden.cpp - src/community/leiden_kernels.cu + src/community/louvain.cu + src/community/leiden.cu src/community/ktruss.cu src/community/ECG.cu src/community/triangles_counting.cu diff --git a/cpp/include/graph.hpp b/cpp/include/graph.hpp index d96fe5cfd16..e4f072be357 100644 --- a/cpp/include/graph.hpp +++ b/cpp/include/graph.hpp @@ -53,6 +53,10 @@ enum class DegreeDirection { template class GraphViewBase { public: + using vertex_type = vertex_t; + using edge_type = edge_t; + using weight_type = weight_t; + raft::handle_t *handle; weight_t *edge_data; ///< edge weight diff --git a/cpp/src/community/leiden.cpp b/cpp/src/community/leiden.cpp deleted file mode 100644 index 9e7a49db1f1..00000000000 --- a/cpp/src/community/leiden.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include - -#include - -#include - -#include "utilities/error.hpp" - -namespace cugraph { - -template -void leiden(GraphCSRView const &graph, - weight_t &final_modularity, - int &num_level, - vertex_t *leiden_parts, - int max_level, - weight_t resolution) -{ - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, leiden expects a weighted graph"); - CUGRAPH_EXPECTS(leiden_parts != nullptr, "API error, leiden_parts is null"); - - detail::leiden( - graph, final_modularity, num_level, leiden_parts, max_level, resolution); -} - -template void leiden( - GraphCSRView const &, float &, int &, int32_t *, int, float); -template void leiden( - GraphCSRView const &, double &, int &, int32_t *, int, double); - -} // namespace cugraph diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu new file mode 100644 index 00000000000..feb02a311e7 --- /dev/null +++ b/cpp/src/community/leiden.cu @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace cugraph { +namespace detail { + +template +std::pair leiden(GraphCSRView const &graph, + vertex_t *leiden_parts, + int max_level, + weight_t resolution, + cudaStream_t stream) +{ + CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, leiden expects a weighted graph"); + CUGRAPH_EXPECTS(leiden_parts != nullptr, "API error, leiden_parts is null"); + + Leiden> runner(graph, stream); + + return runner.compute(leiden_parts, max_level, resolution); +} + +} // namespace detail + + +template +void leiden(GraphCSRView const &graph, + weight_t &final_modularity, + int &num_level, + vertex_t *leiden_parts, + int max_level, + weight_t resolution) { + + cudaStream_t stream{0}; + + std::tie(num_level, final_modularity) = detail::leiden(graph, leiden_parts, max_level, resolution, stream); + +} + +template void leiden(GraphCSRView const &, + float &, + int &, + int32_t *, + int, + float); +template void leiden(GraphCSRView const &, + double &, + int &, + int32_t *, + int, + double); + +} //namespace cugraph diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh new file mode 100644 index 00000000000..f38d1c10ed7 --- /dev/null +++ b/cpp/src/community/leiden.cuh @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace cugraph { + +template +class Leiden: public Louvain { +public: + using graph_t = graph_type; + using vertex_t = typename graph_type::vertex_type; + using edge_t = typename graph_type::edge_type; + using weight_t = typename graph_type::weight_type; + + Leiden(graph_type const &graph, cudaStream_t stream): + Louvain(graph, stream), + constraint_v_(graph.number_of_vertices) { + } + + weight_t update_clustering_constrained(weight_t total_edge_weight, weight_t resolution, + graph_type const &graph) { + + this->timer_start("update_clustering_constrained"); + + rmm::device_vector next_cluster_v(this->cluster_v_); + rmm::device_vector delta_Q_v(graph.number_of_edges); + rmm::device_vector cluster_hash_v(graph.number_of_edges); + rmm::device_vector old_cluster_sum_v(graph.number_of_vertices); + + vertex_t const *d_src_indices = this->src_indices_v_.data().get(); + vertex_t const *d_dst_indices = graph.indices; + vertex_t *d_cluster_hash = cluster_hash_v.data().get(); + vertex_t *d_cluster = this->cluster_v_.data().get(); + weight_t const *d_vertex_weights = this->vertex_weights_v_.data().get(); + weight_t *d_cluster_weights = this->cluster_weights_v_.data().get(); + weight_t *d_delta_Q = delta_Q_v.data().get(); + vertex_t *d_constraint = constraint_v_.data().get(); + + weight_t new_Q = this->modularity(total_edge_weight, resolution, graph, this->cluster_v_.data().get()); + + weight_t cur_Q = new_Q - 1; + + // To avoid the potential of having two vertices swap clusters + // we will only allow vertices to move up (true) or down (false) + // during each iteration of the loop + bool up_down = true; + + while (new_Q > (cur_Q + 0.0001)) { + cur_Q = new_Q; + + this->compute_delta_modularity(total_edge_weight, resolution, graph, + cluster_hash_v, + old_cluster_sum_v, + delta_Q_v); + + // Filter out positive delta_Q values for nodes not in the same constraint group + thrust::for_each( + rmm::exec_policy(this->stream_)->on(this->stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_edges), + [d_src_indices, d_dst_indices, d_constraint, d_delta_Q] __device__(vertex_t i) { + vertex_t start_cluster = d_constraint[d_src_indices[i]]; + vertex_t end_cluster = d_constraint[d_dst_indices[i]]; + if (start_cluster != end_cluster) d_delta_Q[i] = weight_t{0.0}; + }); + + this->assign_nodes(graph, + cluster_hash_v, + next_cluster_v, + delta_Q_v, + up_down); + + up_down = !up_down; + + new_Q = this->modularity(total_edge_weight, resolution, graph, next_cluster_v.data().get()); + + if (new_Q > cur_Q) { + thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), + next_cluster_v.begin(), + next_cluster_v.end(), + this->cluster_v_.begin()); + } + } + + this->timer_stop(this->stream_); + return cur_Q; + } + + std::pair compute(vertex_t *d_cluster_vec, + int max_level, + weight_t resolution) { + int num_level{0}; + + weight_t total_edge_weight = + thrust::reduce(rmm::exec_policy(this->stream_)->on(this->stream_), this->weights_v_.begin(), this->weights_v_.end()); + + weight_t best_modularity = weight_t{-1}; + + // + // Initialize every cluster to reference each vertex to itself + // + thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), this->cluster_v_.begin(), this->cluster_v_.end()); + thrust::copy( + rmm::exec_policy(this->stream_)->on(this->stream_), this->cluster_v_.begin(), this->cluster_v_.end(), d_cluster_vec); + + // + // Our copy of the graph. Each iteration of the outer loop will + // shrink this copy of the graph. + // + GraphCSRView current_graph(this->offsets_v_.data().get(), + this->indices_v_.data().get(), + this->weights_v_.data().get(), + this->number_of_vertices_, + this->number_of_edges_); + + current_graph.get_source_indices(this->src_indices_v_.data().get()); + + while (num_level < max_level) { + this->compute_vertex_and_cluster_weights(current_graph); + + weight_t new_Q = this->update_clustering(total_edge_weight, resolution, current_graph); + + thrust::copy( + rmm::exec_policy(this->stream_)->on(this->stream_), this->cluster_v_.begin(), this->cluster_v_.end(), constraint_v_.begin()); + + new_Q = update_clustering_constrained(total_edge_weight, + resolution, + current_graph); + + if (new_Q <= best_modularity) { break; } + + best_modularity = new_Q; + + this->shrink_graph(current_graph, d_cluster_vec); + + num_level++; + } + + this->timer_display(std::cout); + + return std::make_pair(num_level, best_modularity); + } + +private: + rmm::device_vector constraint_v_; +}; + +} // namespace cugraph diff --git a/cpp/src/community/leiden_kernels.cu b/cpp/src/community/leiden_kernels.cu deleted file mode 100644 index 5eb4219d1ac..00000000000 --- a/cpp/src/community/leiden_kernels.cu +++ /dev/null @@ -1,299 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -#include - -#include -#include - -//#define TIMING - -#ifdef TIMING -#include -#endif - -#include - -namespace cugraph { -namespace detail { - -template -weight_t update_clustering_by_delta_modularity_constrained( - weight_t total_edge_weight, - weight_t resolution, - GraphCSRView const &graph, - rmm::device_vector const &src_indices, - rmm::device_vector const &vertex_weights, - rmm::device_vector &cluster_weights, - rmm::device_vector &cluster, - rmm::device_vector &constraint, - cudaStream_t stream) -{ - rmm::device_vector next_cluster(cluster); - rmm::device_vector delta_Q(graph.number_of_edges); - rmm::device_vector cluster_hash(graph.number_of_edges); - rmm::device_vector old_cluster_sum(graph.number_of_vertices); - - weight_t *d_delta_Q = delta_Q.data().get(); - vertex_t *d_constraint = constraint.data().get(); - vertex_t const *d_src_indices = src_indices.data().get(); - vertex_t const *d_dst_indices = graph.indices; - - weight_t new_Q = modularity(total_edge_weight, resolution, graph, cluster.data().get(), stream); - - weight_t cur_Q = new_Q - 1; - - // To avoid the potential of having two vertices swap clusters - // we will only allow vertices to move up (true) or down (false) - // during each iteration of the loop - bool up_down = true; - - while (new_Q > (cur_Q + 0.0001)) { - cur_Q = new_Q; - - compute_delta_modularity(total_edge_weight, - resolution, - graph, - src_indices, - vertex_weights, - cluster_weights, - cluster, - cluster_hash, - delta_Q, - old_cluster_sum, - stream); - - // Filter out positive delta_Q values for nodes not in the same constraint group - thrust::for_each( - rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_edges), - [d_src_indices, d_dst_indices, d_constraint, d_delta_Q] __device__(vertex_t i) { - vertex_t start_cluster = d_constraint[d_src_indices[i]]; - vertex_t end_cluster = d_constraint[d_dst_indices[i]]; - if (start_cluster != end_cluster) d_delta_Q[i] = weight_t{0.0}; - }); - - assign_nodes(graph, - delta_Q, - cluster_hash, - src_indices, - next_cluster, - vertex_weights, - cluster_weights, - up_down, - stream); - - up_down = !up_down; - - new_Q = modularity(total_edge_weight, resolution, graph, next_cluster.data().get(), stream); - - if (new_Q > cur_Q) { - thrust::copy(rmm::exec_policy(stream)->on(stream), - next_cluster.begin(), - next_cluster.end(), - cluster.begin()); - } - } - - return cur_Q; -} - -template float update_clustering_by_delta_modularity_constrained( - float, - float, - GraphCSRView const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector &, - rmm::device_vector &, - rmm::device_vector &, - cudaStream_t); - -template double update_clustering_by_delta_modularity_constrained( - double, - double, - GraphCSRView const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector &, - rmm::device_vector &, - rmm::device_vector &, - cudaStream_t); - -template -void leiden(GraphCSRView const &graph, - weight_t &final_modularity, - int &num_level, - vertex_t *cluster_vec, - int max_level, - weight_t resolution, - cudaStream_t stream) -{ -#ifdef TIMING - HighResTimer hr_timer; -#endif - - num_level = 0; - - // - // Vectors to create a copy of the graph - // - rmm::device_vector offsets_v(graph.offsets, graph.offsets + graph.number_of_vertices + 1); - rmm::device_vector indices_v(graph.indices, graph.indices + graph.number_of_edges); - rmm::device_vector weights_v(graph.edge_data, graph.edge_data + graph.number_of_edges); - rmm::device_vector src_indices_v(graph.number_of_edges); - - // - // Weights and clustering across iterations of algorithm - // - rmm::device_vector vertex_weights_v(graph.number_of_vertices); - rmm::device_vector cluster_weights_v(graph.number_of_vertices); - rmm::device_vector cluster_v(graph.number_of_vertices); - - // - // Temporaries used within kernels. Each iteration uses less - // of this memory - // - rmm::device_vector tmp_arr_v(graph.number_of_vertices); - rmm::device_vector cluster_inverse_v(graph.number_of_vertices); - - weight_t total_edge_weight = - thrust::reduce(rmm::exec_policy(stream)->on(stream), weights_v.begin(), weights_v.end()); - weight_t best_modularity = -1; - - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(stream)->on(stream), cluster_v.begin(), cluster_v.end()); - thrust::copy( - rmm::exec_policy(stream)->on(stream), cluster_v.begin(), cluster_v.end(), cluster_vec); - - // - // Our copy of the graph. Each iteration of the outer loop will - // shrink this copy of the graph. - // - GraphCSRView current_graph(offsets_v.data().get(), - indices_v.data().get(), - weights_v.data().get(), - graph.number_of_vertices, - graph.number_of_edges); - - current_graph.get_source_indices(src_indices_v.data().get()); - - while (num_level < max_level) { - // - // Sum the weights of all edges departing a vertex. This is - // loop invariant, so we'll compute it here. - // - // Cluster weights are equivalent to vertex weights with this initial - // graph - // -#ifdef TIMING - hr_timer.start("init"); -#endif - - cugraph::detail::compute_vertex_sums(current_graph, vertex_weights_v, stream); - thrust::copy(rmm::exec_policy(stream)->on(stream), - vertex_weights_v.begin(), - vertex_weights_v.end(), - cluster_weights_v.begin()); - -#ifdef TIMING - hr_timer.stop(); - - hr_timer.start("update_clustering"); -#endif - - weight_t new_Q = update_clustering_by_delta_modularity(total_edge_weight, - resolution, - current_graph, - src_indices_v, - vertex_weights_v, - cluster_weights_v, - cluster_v, - stream); - - // After finding the initial unconstrained partition we use that partitioning as the constraint - // for the second round. - rmm::device_vector constraint(graph.number_of_vertices); - thrust::copy( - rmm::exec_policy(stream)->on(stream), cluster_v.begin(), cluster_v.end(), constraint.begin()); - new_Q = update_clustering_by_delta_modularity_constrained(total_edge_weight, - resolution, - current_graph, - src_indices_v, - vertex_weights_v, - cluster_weights_v, - cluster_v, - constraint, - stream); - -#ifdef TIMING - hr_timer.stop(); -#endif - - if (new_Q <= best_modularity) { break; } - - best_modularity = new_Q; - -#ifdef TIMING - hr_timer.start("shrinking graph"); -#endif - - // renumber the clusters to the range 0..(num_clusters-1) - vertex_t num_clusters = renumber_clusters( - graph.number_of_vertices, cluster_v, tmp_arr_v, cluster_inverse_v, cluster_vec, stream); - cluster_weights_v.resize(num_clusters); - - // shrink our graph to represent the graph of supervertices - generate_superverticies_graph(current_graph, src_indices_v, num_clusters, cluster_v, stream); - - // assign each new vertex to its own cluster - thrust::sequence(rmm::exec_policy(stream)->on(stream), cluster_v.begin(), cluster_v.end()); - -#ifdef TIMING - hr_timer.stop(); -#endif - - num_level++; - } - -#ifdef TIMING - hr_timer.display(std::cout); -#endif - - final_modularity = best_modularity; -} - -template void leiden(GraphCSRView const &, - float &, - int &, - int32_t *, - int, - float, - cudaStream_t); -template void leiden(GraphCSRView const &, - double &, - int &, - int32_t *, - int, - double, - cudaStream_t); - -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/community/leiden_kernels.hpp b/cpp/src/community/leiden_kernels.hpp deleted file mode 100644 index cbe93c04f52..00000000000 --- a/cpp/src/community/leiden_kernels.hpp +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -#include - -namespace cugraph { -namespace detail { - -template -void leiden(GraphCSRView const& graph, - weight_t& final_modularity, - int& num_level, - vertex_t* cluster_vec, - int max_level, - weight_t resolution, - cudaStream_t stream = 0); - -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/community/louvain.cpp b/cpp/src/community/louvain.cpp deleted file mode 100644 index 0e3f6ac51fd..00000000000 --- a/cpp/src/community/louvain.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include - -#include - -#include - -#include - -#include "utilities/error.hpp" - -namespace cugraph { - -template -void louvain(GraphCSRView const &graph, - weight_t *final_modularity, - int *num_level, - vertex_t *louvain_parts, - int max_level, - weight_t resolution) -{ - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); - CUGRAPH_EXPECTS(final_modularity != nullptr, "API error, final_modularity is null"); - CUGRAPH_EXPECTS(num_level != nullptr, "API error, num_level is null"); - CUGRAPH_EXPECTS(louvain_parts != nullptr, "API error, louvain_parts is null"); - - detail::louvain( - graph, final_modularity, num_level, louvain_parts, max_level, resolution); -} - -template void louvain( - GraphCSRView const &, float *, int *, int32_t *, int, float); -template void louvain( - GraphCSRView const &, double *, int *, int32_t *, int, double); - -} // namespace cugraph diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu new file mode 100644 index 00000000000..2d5957ead42 --- /dev/null +++ b/cpp/src/community/louvain.cu @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +namespace cugraph { +namespace detail { + +template +std::pair louvain(GraphCSRView const &graph, + vertex_t *louvain_parts, + int max_level, + weight_t resolution, + cudaStream_t stream) +{ + CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); + CUGRAPH_EXPECTS(louvain_parts != nullptr, "API error, louvain_parts is null"); + + Louvain> runner(graph, stream); + + return runner.compute(louvain_parts, max_level, resolution); +} + +} // namespace detail + + +template +void louvain(GraphCSRView const &graph, + weight_t *final_modularity, + int *num_level, + vertex_t *louvain_parts, + int max_level, + weight_t resolution) { + + cudaStream_t stream{0}; + + std::tie(*num_level, *final_modularity) = detail::louvain(graph, louvain_parts, max_level, resolution, stream); + +} + +template void louvain(GraphCSRView const &, + float *, + int *, + int32_t *, + int, + float); +template void louvain(GraphCSRView const &, + double *, + int *, + int32_t *, + int, + double); + +} //namespace cugraph diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh new file mode 100644 index 00000000000..f93bf8b90a2 --- /dev/null +++ b/cpp/src/community/louvain.cuh @@ -0,0 +1,637 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include +#include + +//#define TIMING + +#ifdef TIMING +#include +#endif + + +namespace cugraph { + +template +class Louvain { +public: + using graph_t = graph_type; + using vertex_t = typename graph_type::vertex_type; + using edge_t = typename graph_type::edge_type; + using weight_t = typename graph_type::weight_type; + + Louvain(graph_type const &graph, cudaStream_t stream): +#ifdef TIMING + hr_timer_(), +#endif + + // FIXME: Don't really need to copy here but would need + // to change the logic to populate this properly + // in generate_superverticies_graph. + // + offsets_v_(graph.offsets, graph.offsets + graph.number_of_vertices + 1), + indices_v_(graph.indices, graph.indices + graph.number_of_edges), + weights_v_(graph.edge_data, graph.edge_data + graph.number_of_edges), + src_indices_v_(graph.number_of_edges), + vertex_weights_v_(graph.number_of_vertices), + cluster_weights_v_(graph.number_of_vertices), + cluster_v_(graph.number_of_vertices), + tmp_arr_v_(graph.number_of_vertices), + cluster_inverse_v_(graph.number_of_vertices), + number_of_vertices_(graph.number_of_vertices), + number_of_edges_(graph.number_of_edges), + stream_(stream) + { + } + + + weight_t modularity(weight_t total_edge_weight, + weight_t resolution, + graph_t const &graph, + vertex_t const *d_cluster) { + + vertex_t n_verts = graph.number_of_vertices; + + rmm::device_vector inc(n_verts, weight_t{0.0}); + rmm::device_vector deg(n_verts, weight_t{0.0}); + + edge_t const *d_offsets = graph.offsets; + vertex_t const *d_indices = graph.indices; + weight_t const *d_weights = graph.edge_data; + weight_t *d_inc = inc.data().get(); + weight_t *d_deg = deg.data().get(); + + // FIXME: Already have weighted degree computed in main loop, + // could pass that in rather than computing d_deg... which + // would save an atomicAdd (synchronization) + // + thrust::for_each( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_vertices), + [d_inc, d_deg, d_offsets, d_indices, d_weights, d_cluster] __device__(vertex_t v) { + vertex_t community = d_cluster[v]; + weight_t increase{0.0}; + weight_t degree{0.0}; + + for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { + vertex_t neighbor = d_indices[loc]; + degree += d_weights[loc]; + if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } + } + + if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); + if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); + }); + + weight_t Q = thrust::transform_reduce( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_vertices), + [d_deg, d_inc, total_edge_weight, resolution] __device__(vertex_t community) { + return ((d_inc[community] / total_edge_weight) - resolution * + (d_deg[community] * d_deg[community]) / + (total_edge_weight * total_edge_weight)); + }, + weight_t{0.0}, + thrust::plus()); + + return Q; + } + + virtual std::pair compute(vertex_t *d_cluster_vec, + int max_level, + weight_t resolution) { + + int num_level{0}; + + weight_t total_edge_weight = + thrust::reduce(rmm::exec_policy(stream_)->on(stream_), weights_v_.begin(), weights_v_.end()); + + weight_t best_modularity = weight_t{-1}; + + // + // Initialize every cluster to reference each vertex to itself + // + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end()); + thrust::copy( + rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end(), d_cluster_vec); + + // + // Our copy of the graph. Each iteration of the outer loop will + // shrink this copy of the graph. + // + GraphCSRView current_graph(offsets_v_.data().get(), + indices_v_.data().get(), + weights_v_.data().get(), + number_of_vertices_, + number_of_edges_); + + current_graph.get_source_indices(src_indices_v_.data().get()); + + while (num_level < max_level) { + compute_vertex_and_cluster_weights(current_graph); + + weight_t new_Q = update_clustering(total_edge_weight, resolution, current_graph); + + if (new_Q <= best_modularity) { break; } + + best_modularity = new_Q; + + shrink_graph(current_graph, d_cluster_vec); + + num_level++; + } + + timer_display(std::cout); + + return std::make_pair(num_level, best_modularity); + } + +protected: + void timer_start(std::string const ®ion) { +#ifdef TIMING + hr_timer_.start(region); +#endif + } + + void timer_stop(cudaStream_t stream) { +#ifdef TIMING + CUDA_TRY(cudaStreamSynchronize(stream)); + hr_timer_.stop(); +#endif + } + + void timer_display(std::ostream &os) { +#ifdef TIMING + hr_timer_.display(os); +#endif + } + +public: + void compute_vertex_and_cluster_weights(graph_type const &graph) { + timer_start("compute_vertex_and_cluster_weights"); + + edge_t const *d_offsets = graph.offsets; + vertex_t const *d_indices = graph.indices; + weight_t const *d_weights = graph.edge_data; + weight_t *d_vertex_weights = vertex_weights_v_.data().get(); + weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + + // + // MNMG: copy_v_transform_reduce_out_nbr, then copy + // + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_vertices), + [d_offsets, d_indices, d_weights, + d_vertex_weights, d_cluster_weights] __device__ (vertex_t src) { + weight_t sum{0.0}; + + for (edge_t i = d_offsets[src] ; i < d_offsets[src + 1] ; ++i) { + sum += d_weights[i]; + } + + d_vertex_weights[src] = sum; + d_cluster_weights[src] = sum; + }); + + timer_stop(stream_); + } + + virtual weight_t update_clustering(weight_t total_edge_weight, weight_t resolution, + graph_type const &graph) { + + timer_start("update_clustering"); + + // + // MNMG: This is the hard one, see writeup + // + rmm::device_vector next_cluster_v(cluster_v_); + rmm::device_vector delta_Q_v(graph.number_of_edges); + rmm::device_vector cluster_hash_v(graph.number_of_edges); + rmm::device_vector old_cluster_sum_v(graph.number_of_vertices); + + vertex_t *d_cluster_hash = cluster_hash_v.data().get(); + vertex_t *d_cluster = cluster_v_.data().get(); + weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); + weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + weight_t *d_delta_Q = delta_Q_v.data().get(); + + weight_t new_Q = modularity(total_edge_weight, resolution, graph, cluster_v_.data().get()); + + weight_t cur_Q = new_Q - 1; + + // To avoid the potential of having two vertices swap clusters + // we will only allow vertices to move up (true) or down (false) + // during each iteration of the loop + bool up_down = true; + + while (new_Q > (cur_Q + 0.0001)) { + cur_Q = new_Q; + + compute_delta_modularity(total_edge_weight, resolution, graph, + cluster_hash_v, + old_cluster_sum_v, + delta_Q_v); + + assign_nodes(graph, + cluster_hash_v, + next_cluster_v, + delta_Q_v, + up_down); + + up_down = !up_down; + + new_Q = modularity(total_edge_weight, resolution, graph, next_cluster_v.data().get()); + + if (new_Q > cur_Q) { + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + next_cluster_v.begin(), + next_cluster_v.end(), + cluster_v_.begin()); + } + } + + timer_stop(stream_); + return cur_Q; + } + + void compute_delta_modularity(weight_t total_edge_weight, + weight_t resolution, + graph_type const &graph, + rmm::device_vector &cluster_hash_v, + rmm::device_vector &old_cluster_sum_v, + rmm::device_vector &delta_Q_v + ) { + + vertex_t const *d_src_indices = src_indices_v_.data().get(); + vertex_t const *d_dst_indices = graph.indices; + edge_t const *d_offsets = graph.offsets; + weight_t const *d_weights = graph.edge_data; + vertex_t const *d_cluster = cluster_v_.data().get(); + weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); + weight_t const *d_cluster_weights = cluster_weights_v_.data().get(); + + vertex_t *d_cluster_hash = cluster_hash_v.data().get(); + weight_t *d_delta_Q = delta_Q_v.data().get(); + weight_t *d_old_cluster_sum = old_cluster_sum_v.data().get(); + weight_t *d_new_cluster_sum = d_delta_Q; + + thrust::fill(cluster_hash_v.begin(), cluster_hash_v.end(), vertex_t{-1}); + thrust::fill(delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); + thrust::fill(old_cluster_sum_v.begin(), old_cluster_sum_v.end(), weight_t{0.0}); + + // MNMG: New technique using reduce_by_key. Would require a segmented sort + // or a pair of sorts on each node, so probably slower than what's here. + // This might still be faster even in MNMG... + // + // + // FIXME: Eventually this should use cuCollections concurrent map + // implementation, but that won't be available for a while. + // + // For each source vertex, we're going to build a hash + // table to the destination cluster ids. We can use + // the offsets ranges to define the bounds of the hash + // table. + // + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_edges), + [d_src_indices, + d_dst_indices, + d_cluster, + d_offsets, + d_cluster_hash, + d_new_cluster_sum, + d_weights, + d_old_cluster_sum] __device__(edge_t loc) { + vertex_t src = d_src_indices[loc]; + vertex_t dst = d_dst_indices[loc]; + + if (src != dst) { + vertex_t old_cluster = d_cluster[src]; + vertex_t new_cluster = d_cluster[dst]; + edge_t hash_base = d_offsets[src]; + edge_t n_edges = d_offsets[src + 1] - hash_base; + + int h = (new_cluster % n_edges); + edge_t offset = hash_base + h; + while (d_cluster_hash[offset] != new_cluster) { + if (d_cluster_hash[offset] == -1) { + atomicCAS(d_cluster_hash + offset, -1, new_cluster); + } else { + h = (h + 1) % n_edges; + offset = hash_base + h; + } + } + + atomicAdd(d_new_cluster_sum + offset, d_weights[loc]); + + if (old_cluster == new_cluster) + atomicAdd(d_old_cluster_sum + src, d_weights[loc]); + } + }); + + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_edges), + [total_edge_weight, + resolution, + d_cluster_hash, + d_src_indices, + d_cluster, + d_vertex_weights, + d_delta_Q, + d_new_cluster_sum, + d_old_cluster_sum, + d_cluster_weights] __device__(edge_t loc) { + vertex_t new_cluster = d_cluster_hash[loc]; + if (new_cluster >= 0) { + vertex_t src = d_src_indices[loc]; + vertex_t old_cluster = d_cluster[src]; + weight_t k_k = d_vertex_weights[src]; + weight_t a_old = d_cluster_weights[old_cluster]; + weight_t a_new = d_cluster_weights[new_cluster]; + + // NOTE: d_delta_Q and d_new_cluster_sum are aliases + // for same device array to save memory + d_delta_Q[loc] = + 2 * + (((d_new_cluster_sum[loc] - d_old_cluster_sum[src]) / total_edge_weight) - + resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / + (total_edge_weight * total_edge_weight)); + } else { + d_delta_Q[loc] = weight_t{0.0}; + } + }); + } + + void assign_nodes(graph_type const &graph, + rmm::device_vector &cluster_hash_v, + rmm::device_vector &next_cluster_v, + rmm::device_vector &delta_Q_v, + bool up_down) { + rmm::device_vector temp_vertices_v(graph.number_of_vertices); + rmm::device_vector temp_cluster_v(graph.number_of_vertices, vertex_t{-1}); + rmm::device_vector temp_delta_Q_v(graph.number_of_vertices, weight_t{0.0}); + + weight_t *d_delta_Q = delta_Q_v.data().get(); + vertex_t *d_next_cluster = next_cluster_v.data().get(); + vertex_t *d_cluster_hash = cluster_hash_v.data().get(); + weight_t const *d_vertex_weights = vertex_weights_v_.data().get(); + weight_t *d_cluster_weights = cluster_weights_v_.data().get(); + + auto cluster_reduce_iterator = + thrust::make_zip_iterator(thrust::make_tuple(d_cluster_hash, d_delta_Q)); + + auto output_edge_iterator2 = thrust::make_zip_iterator( + thrust::make_tuple(temp_cluster_v.data().get(), temp_delta_Q_v.data().get())); + + auto cluster_reduce_end = + thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), + src_indices_v_.begin(), + src_indices_v_.end(), + cluster_reduce_iterator, + temp_vertices_v.data().get(), + output_edge_iterator2, + thrust::equal_to(), + [] __device__(auto pair1, auto pair2) { + if (thrust::get<1>(pair1) > thrust::get<1>(pair2)) + return pair1; + else + return pair2; + }); + + vertex_t final_size = thrust::distance(temp_vertices_v.data().get(), cluster_reduce_end.first); + + vertex_t *d_temp_vertices = temp_vertices_v.data().get(); + vertex_t *d_temp_clusters = temp_cluster_v.data().get(); + weight_t *d_temp_delta_Q = temp_delta_Q_v.data().get(); + + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(final_size), + [d_temp_delta_Q, + up_down, + d_next_cluster, + d_temp_vertices, + d_vertex_weights, + d_temp_clusters, + d_cluster_weights] __device__(vertex_t id) { + if ((d_temp_clusters[id] >= 0) && (d_temp_delta_Q[id] > weight_t{0.0})) { + vertex_t new_cluster = d_temp_clusters[id]; + vertex_t old_cluster = d_next_cluster[d_temp_vertices[id]]; + + if ((new_cluster > old_cluster) == up_down) { + weight_t src_weight = d_vertex_weights[d_temp_vertices[id]]; + d_next_cluster[d_temp_vertices[id]] = d_temp_clusters[id]; + + atomicAdd(d_cluster_weights + new_cluster, src_weight); + atomicAdd(d_cluster_weights + old_cluster, -src_weight); + } + } + }); + } + + void shrink_graph(graph_t &graph, vertex_t *d_cluster_vec) { + timer_start("shrinking graph"); + + // renumber the clusters to the range 0..(num_clusters-1) + vertex_t num_clusters = renumber_clusters(d_cluster_vec); + cluster_weights_v_.resize(num_clusters); + + // shrink our graph to represent the graph of supervertices + generate_superverticies_graph(graph, num_clusters); + + // assign each new vertex to its own cluster + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + cluster_v_.begin(), cluster_v_.end()); + + timer_stop(stream_); + } + + vertex_t renumber_clusters(vertex_t *d_cluster_vec) { + vertex_t *d_tmp_array = tmp_arr_v_.data().get(); + vertex_t *d_cluster_inverse = cluster_inverse_v_.data().get(); + vertex_t *d_cluster = cluster_v_.data().get(); + + vertex_t old_num_clusters = cluster_v_.size(); + + // + // New technique. Initialize cluster_inverse_v_ to 0 + // + thrust::fill(cluster_inverse_v_.begin(), cluster_inverse_v_.end(), vertex_t{0}); + + // + // Iterate over every element c in cluster_v_ and set cluster_inverse_v to 1 + // + auto first_1 = thrust::make_constant_iterator(1); + auto last_1 = first_1 + old_num_clusters; + + thrust::scatter(rmm::exec_policy(stream_)->on(stream_), first_1, last_1, cluster_v_.begin(), cluster_inverse_v_.begin()); + + // + // Now we'll copy all of the clusters that have a value of 1 into a temporary array + // + auto copy_end = thrust::copy_if(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(old_num_clusters), + tmp_arr_v_.begin(), + [d_cluster_inverse] __device__ (const vertex_t idx) { + return d_cluster_inverse[idx] == 1; + }); + + vertex_t new_num_clusters = thrust::distance(tmp_arr_v_.begin(), copy_end); + tmp_arr_v_.resize(new_num_clusters); + + // + // Now we can set each value in cluster_inverse of a cluster to its index + // + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(new_num_clusters), + [d_cluster_inverse, d_tmp_array] __device__ (const vertex_t idx) { + d_cluster_inverse[d_tmp_array[idx]] = idx; + }); + + + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(old_num_clusters), + [d_cluster, d_cluster_inverse] __device__(vertex_t i) { + d_cluster[i] = d_cluster_inverse[d_cluster[i]]; + }); + + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(number_of_vertices_), + [d_cluster_vec, d_cluster] __device__(vertex_t i) { + d_cluster_vec[i] = d_cluster[d_cluster_vec[i]]; + }); + + cluster_inverse_v_.resize(new_num_clusters); + cluster_v_.resize(new_num_clusters); + + return new_num_clusters; + } + + void generate_superverticies_graph(graph_t &graph, vertex_t num_clusters) { + rmm::device_vector new_src_v(graph.number_of_edges); + rmm::device_vector new_dst_v(graph.number_of_edges); + rmm::device_vector new_weight_v(graph.number_of_edges); + + vertex_t *d_old_src = src_indices_v_.data().get(); + vertex_t *d_old_dst = graph.indices; + weight_t *d_old_weight = graph.edge_data; + vertex_t *d_new_src = new_src_v.data().get(); + vertex_t *d_new_dst = new_dst_v.data().get(); + vertex_t *d_clusters = cluster_v_.data().get(); + weight_t *d_new_weight = new_weight_v.data().get(); + + // + // Renumber the COO + // + thrust::for_each( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_edges), + [d_old_src, d_old_dst, d_old_weight, d_new_src, d_new_dst, d_new_weight, d_clusters] + __device__( edge_t e) { + d_new_src[e] = d_clusters[d_old_src[e]]; + d_new_dst[e] = d_clusters[d_old_dst[e]]; + d_new_weight[e] = d_old_weight[e]; + }); + + thrust::stable_sort_by_key( + rmm::exec_policy(stream_)->on(stream_), + d_new_dst, + d_new_dst + graph.number_of_edges, + thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_weight))); + thrust::stable_sort_by_key( + rmm::exec_policy(stream_)->on(stream_), + d_new_src, + d_new_src + graph.number_of_edges, + thrust::make_zip_iterator(thrust::make_tuple(d_new_dst, d_new_weight))); + + // + // Now we reduce by key to combine the weights of duplicate + // edges. + // + auto start = thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_dst)); + auto new_start = thrust::make_zip_iterator(thrust::make_tuple(d_old_src, d_old_dst)); + auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), + start, + start + graph.number_of_edges, + d_new_weight, + new_start, + d_old_weight, + thrust::equal_to>(), + thrust::plus()); + + graph.number_of_edges = thrust::distance(new_start, new_end.first); + graph.number_of_vertices = num_clusters; + + detail::fill_offset(d_old_src, + graph.offsets, + num_clusters, + graph.number_of_edges, + stream_); + CHECK_CUDA(stream_); + + src_indices_v_.resize(graph.number_of_edges); + } + +protected: + vertex_t number_of_vertices_; + edge_t number_of_edges_; + cudaStream_t stream_; + + // + // Copy of graph + // + rmm::device_vector offsets_v_; + rmm::device_vector indices_v_; + rmm::device_vector weights_v_; + rmm::device_vector src_indices_v_; + + // + // Weights and clustering across iterations of algorithm + // + rmm::device_vector vertex_weights_v_; + rmm::device_vector cluster_weights_v_; + rmm::device_vector cluster_v_; + + // + // Temporaries used within kernels. Each iteration uses less + // of this memory + // + rmm::device_vector tmp_arr_v_; + rmm::device_vector cluster_inverse_v_; + +#ifdef TIMING + HighResTimer hr_timer_; +#endif +}; + + +} // namespace cugraph diff --git a/cpp/src/community/louvain_kernels.cu b/cpp/src/community/louvain_kernels.cu deleted file mode 100644 index c93e2d82fdf..00000000000 --- a/cpp/src/community/louvain_kernels.cu +++ /dev/null @@ -1,746 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#include - -#include - -#include - -//#define TIMING - -#ifdef TIMING -#include -#endif - -#include - -namespace cugraph { -namespace detail { - -namespace { // anonym. -constexpr int BLOCK_SIZE_1D = 64; -} - -template -__global__ // - void - compute_vertex_sums(vertex_t n_vertex, - edge_t const *offsets, - weight_t const *weights, - weight_t *output) -{ - int src = blockDim.x * blockIdx.x + threadIdx.x; - - if ((src < n_vertex)) { - weight_t sum{0.0}; - - for (int i = offsets[src]; i < offsets[src + 1]; ++i) { sum += weights[i]; } - - output[src] = sum; - } -} - -template -weight_t modularity(weight_t total_edge_weight, - weight_t resolution, - GraphCSRView const &graph, - vertex_t const *d_cluster, - cudaStream_t stream) -{ - vertex_t n_verts = graph.number_of_vertices; - - rmm::device_vector inc(n_verts, weight_t{0.0}); - rmm::device_vector deg(n_verts, weight_t{0.0}); - - edge_t const *d_offsets = graph.offsets; - vertex_t const *d_indices = graph.indices; - weight_t const *d_weights = graph.edge_data; - weight_t *d_inc = inc.data().get(); - weight_t *d_deg = deg.data().get(); - - // FIXME: Already have weighted degree computed in main loop, - // could pass that in rather than computing d_deg... which - // would save an atomicAdd (synchronization) - // - thrust::for_each( - rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - [d_inc, d_deg, d_offsets, d_indices, d_weights, d_cluster] __device__(vertex_t v) { - vertex_t community = d_cluster[v]; - weight_t increase{0.0}; - weight_t degree{0.0}; - - for (edge_t loc = d_offsets[v]; loc < d_offsets[v + 1]; ++loc) { - vertex_t neighbor = d_indices[loc]; - degree += d_weights[loc]; - if (d_cluster[neighbor] == community) { increase += d_weights[loc]; } - } - - if (degree > weight_t{0.0}) atomicAdd(d_deg + community, degree); - if (increase > weight_t{0.0}) atomicAdd(d_inc + community, increase); - }); - - weight_t Q = thrust::transform_reduce( - rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - [d_deg, d_inc, total_edge_weight, resolution] __device__(vertex_t community) { - return ((d_inc[community] / total_edge_weight) - resolution * - (d_deg[community] * d_deg[community]) / - (total_edge_weight * total_edge_weight)); - }, - weight_t{0.0}, - thrust::plus()); - return Q; -} - -template float modularity( - float, float, GraphCSRView const &, int32_t const *, cudaStream_t); - -template double modularity( - double, double, GraphCSRView const &, int32_t const *, cudaStream_t); - -template -void generate_superverticies_graph(cugraph::GraphCSRView ¤t_graph, - rmm::device_vector &src_indices_v, - vertex_t new_number_of_vertices, - rmm::device_vector &cluster_v, - cudaStream_t stream) -{ - rmm::device_vector new_src_v(current_graph.number_of_edges); - rmm::device_vector new_dst_v(current_graph.number_of_edges); - rmm::device_vector new_weight_v(current_graph.number_of_edges); - - vertex_t *d_old_src = src_indices_v.data().get(); - vertex_t *d_old_dst = current_graph.indices; - weight_t *d_old_weight = current_graph.edge_data; - vertex_t *d_new_src = new_src_v.data().get(); - vertex_t *d_new_dst = new_dst_v.data().get(); - vertex_t *d_clusters = cluster_v.data().get(); - weight_t *d_new_weight = new_weight_v.data().get(); - - // - // Renumber the COO - // - thrust::for_each( - rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(current_graph.number_of_edges), - [d_old_src, d_old_dst, d_new_src, d_new_dst, d_clusters, d_new_weight, d_old_weight] __device__( - edge_t e) { - d_new_src[e] = d_clusters[d_old_src[e]]; - d_new_dst[e] = d_clusters[d_old_dst[e]]; - d_new_weight[e] = d_old_weight[e]; - }); - - thrust::stable_sort_by_key( - rmm::exec_policy(stream)->on(stream), - d_new_dst, - d_new_dst + current_graph.number_of_edges, - thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_weight))); - thrust::stable_sort_by_key( - rmm::exec_policy(stream)->on(stream), - d_new_src, - d_new_src + current_graph.number_of_edges, - thrust::make_zip_iterator(thrust::make_tuple(d_new_dst, d_new_weight))); - - // - // Now we reduce by key to combine the weights of duplicate - // edges. - // - auto start = thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_dst)); - auto new_start = thrust::make_zip_iterator(thrust::make_tuple(d_old_src, d_old_dst)); - auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), - start, - start + current_graph.number_of_edges, - d_new_weight, - new_start, - d_old_weight, - thrust::equal_to>(), - thrust::plus()); - - current_graph.number_of_edges = thrust::distance(new_start, new_end.first); - current_graph.number_of_vertices = new_number_of_vertices; - - detail::fill_offset(d_old_src, - current_graph.offsets, - new_number_of_vertices, - current_graph.number_of_edges, - stream); - CHECK_CUDA(stream); - - src_indices_v.resize(current_graph.number_of_edges); -} - -template void generate_superverticies_graph(GraphCSRView &, - rmm::device_vector &, - int32_t, - rmm::device_vector &, - cudaStream_t); - -template void generate_superverticies_graph(GraphCSRView &, - rmm::device_vector &, - int32_t, - rmm::device_vector &, - cudaStream_t); - -template -void compute_vertex_sums(GraphCSRView const &graph, - rmm::device_vector &sums, - cudaStream_t stream) -{ - dim3 block_size_1d = - dim3((graph.number_of_vertices + BLOCK_SIZE_1D * 4 - 1) / BLOCK_SIZE_1D * 4, 1, 1); - dim3 grid_size_1d = dim3(BLOCK_SIZE_1D * 4, 1, 1); - - compute_vertex_sums<<>>( - graph.number_of_vertices, graph.offsets, graph.edge_data, sums.data().get()); -} - -template void compute_vertex_sums(GraphCSRView const &, - rmm::device_vector &, - cudaStream_t); - -template void compute_vertex_sums(GraphCSRView const &, - rmm::device_vector &, - cudaStream_t); - -template -vertex_t renumber_clusters(vertex_t graph_num_vertices, - rmm::device_vector &cluster, - rmm::device_vector &temp_array, - rmm::device_vector &cluster_inverse, - vertex_t *cluster_vec, - cudaStream_t stream) -{ - // - // Now we're going to renumber the clusters from 0 to (k-1), where k is the number of - // clusters in this level of the dendogram. - // - thrust::copy( - rmm::exec_policy(stream)->on(stream), cluster.begin(), cluster.end(), temp_array.begin()); - thrust::sort(rmm::exec_policy(stream)->on(stream), temp_array.begin(), temp_array.end()); - auto tmp_end = - thrust::unique(rmm::exec_policy(stream)->on(stream), temp_array.begin(), temp_array.end()); - - vertex_t old_num_clusters = cluster.size(); - vertex_t new_num_clusters = thrust::distance(temp_array.begin(), tmp_end); - - cluster.resize(new_num_clusters); - temp_array.resize(new_num_clusters); - - thrust::fill(cluster_inverse.begin(), cluster_inverse.end(), vertex_t{-1}); - - vertex_t *d_tmp_array = temp_array.data().get(); - vertex_t *d_cluster_inverse = cluster_inverse.data().get(); - vertex_t *d_cluster = cluster.data().get(); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(new_num_clusters), - [d_tmp_array, d_cluster_inverse] __device__(vertex_t i) { - d_cluster_inverse[d_tmp_array[i]] = i; - }); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(old_num_clusters), - [d_cluster, d_cluster_inverse] __device__(vertex_t i) { - d_cluster[i] = d_cluster_inverse[d_cluster[i]]; - }); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph_num_vertices), - [cluster_vec, d_cluster] __device__(vertex_t i) { - cluster_vec[i] = d_cluster[cluster_vec[i]]; - }); - - return new_num_clusters; -} - -template int32_t renumber_clusters(int32_t, - rmm::device_vector &, - rmm::device_vector &, - rmm::device_vector &, - int32_t *, - cudaStream_t); - -template -void compute_delta_modularity(weight_t total_edge_weight, - weight_t resolution, - GraphCSRView const &graph, - rmm::device_vector const &src_indices_v, - rmm::device_vector const &vertex_weights_v, - rmm::device_vector const &cluster_weights_v, - rmm::device_vector const &cluster_v, - rmm::device_vector &cluster_hash_v, - rmm::device_vector &delta_Q_v, - rmm::device_vector &tmp_size_V_v, - cudaStream_t stream) -{ - vertex_t const *d_src_indices = src_indices_v.data().get(); - vertex_t const *d_dst_indices = graph.indices; - edge_t const *d_offsets = graph.offsets; - weight_t const *d_weights = graph.edge_data; - vertex_t const *d_cluster = cluster_v.data().get(); - weight_t const *d_vertex_weights = vertex_weights_v.data().get(); - weight_t const *d_cluster_weights = cluster_weights_v.data().get(); - - vertex_t *d_cluster_hash = cluster_hash_v.data().get(); - weight_t *d_delta_Q = delta_Q_v.data().get(); - weight_t *d_old_cluster_sum = tmp_size_V_v.data().get(); - weight_t *d_new_cluster_sum = d_delta_Q; - - thrust::fill(cluster_hash_v.begin(), cluster_hash_v.end(), vertex_t{-1}); - thrust::fill(delta_Q_v.begin(), delta_Q_v.end(), weight_t{0.0}); - thrust::fill(tmp_size_V_v.begin(), tmp_size_V_v.end(), weight_t{0.0}); - - // - // For each source vertex, we're going to build a hash - // table to the destination cluster ids. We can use - // the offsets ranges to define the bounds of the hash - // table. - // - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_edges), - [d_src_indices, - d_dst_indices, - d_cluster, - d_offsets, - d_cluster_hash, - d_new_cluster_sum, - d_weights, - d_old_cluster_sum] __device__(edge_t loc) { - vertex_t src = d_src_indices[loc]; - vertex_t dst = d_dst_indices[loc]; - - if (src != dst) { - vertex_t old_cluster = d_cluster[src]; - vertex_t new_cluster = d_cluster[dst]; - edge_t hash_base = d_offsets[src]; - edge_t n_edges = d_offsets[src + 1] - hash_base; - - int h = (new_cluster % n_edges); - edge_t offset = hash_base + h; - while (d_cluster_hash[offset] != new_cluster) { - if (d_cluster_hash[offset] == -1) { - atomicCAS(d_cluster_hash + offset, -1, new_cluster); - } else { - h = (h + 1) % n_edges; - offset = hash_base + h; - } - } - - atomicAdd(d_new_cluster_sum + offset, d_weights[loc]); - - if (old_cluster == new_cluster) - atomicAdd(d_old_cluster_sum + src, d_weights[loc]); - } - }); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_edges), - [total_edge_weight, - resolution, - d_cluster_hash, - d_src_indices, - d_cluster, - d_vertex_weights, - d_delta_Q, - d_new_cluster_sum, - d_old_cluster_sum, - d_cluster_weights] __device__(edge_t loc) { - vertex_t new_cluster = d_cluster_hash[loc]; - if (new_cluster >= 0) { - vertex_t src = d_src_indices[loc]; - vertex_t old_cluster = d_cluster[src]; - weight_t k_k = d_vertex_weights[src]; - weight_t a_old = d_cluster_weights[old_cluster]; - weight_t a_new = d_cluster_weights[new_cluster]; - - // NOTE: d_delta_Q and d_new_cluster_sum are aliases - // for same device array to save memory - d_delta_Q[loc] = - 2 * - (((d_new_cluster_sum[loc] - d_old_cluster_sum[src]) / total_edge_weight) - - resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / - (total_edge_weight * total_edge_weight)); - } else { - d_delta_Q[loc] = weight_t{0.0}; - } - }); -} - -template void compute_delta_modularity(float, - float, - GraphCSRView const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector &, - rmm::device_vector &, - rmm::device_vector &, - cudaStream_t); - -template void compute_delta_modularity(double, - double, - GraphCSRView const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector &, - rmm::device_vector &, - rmm::device_vector &, - cudaStream_t); - -template -void assign_nodes(GraphCSRView const &graph, - rmm::device_vector &delta_Q, - rmm::device_vector &cluster_hash, - rmm::device_vector const &src_indices, - rmm::device_vector &next_cluster, - rmm::device_vector const &vertex_weights, - rmm::device_vector &cluster_weights, - bool up_down, - cudaStream_t stream) -{ - rmm::device_vector temp_vertices(graph.number_of_vertices); - rmm::device_vector temp_cluster(graph.number_of_vertices, vertex_t{-1}); - rmm::device_vector temp_delta_Q(graph.number_of_vertices, weight_t{0.0}); - - weight_t *d_delta_Q = delta_Q.data().get(); - vertex_t *d_next_cluster = next_cluster.data().get(); - vertex_t *d_cluster_hash = cluster_hash.data().get(); - weight_t const *d_vertex_weights = vertex_weights.data().get(); - weight_t *d_cluster_weights = cluster_weights.data().get(); - - auto cluster_reduce_iterator = - thrust::make_zip_iterator(thrust::make_tuple(d_cluster_hash, d_delta_Q)); - - auto output_edge_iterator2 = thrust::make_zip_iterator( - thrust::make_tuple(temp_cluster.data().get(), temp_delta_Q.data().get())); - - auto cluster_reduce_end = - thrust::reduce_by_key(rmm::exec_policy(stream)->on(stream), - src_indices.begin(), - src_indices.end(), - cluster_reduce_iterator, - temp_vertices.data().get(), - output_edge_iterator2, - thrust::equal_to(), - [] __device__(auto pair1, auto pair2) { - if (thrust::get<1>(pair1) > thrust::get<1>(pair2)) - return pair1; - else - return pair2; - }); - - vertex_t final_size = thrust::distance(temp_vertices.data().get(), cluster_reduce_end.first); - - vertex_t *d_temp_vertices = temp_vertices.data().get(); - vertex_t *d_temp_clusters = temp_cluster.data().get(); - weight_t *d_temp_delta_Q = temp_delta_Q.data().get(); - - thrust::for_each(rmm::exec_policy(stream)->on(stream), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(final_size), - [d_temp_delta_Q, - up_down, - d_next_cluster, - d_temp_vertices, - d_vertex_weights, - d_temp_clusters, - d_cluster_weights] __device__(vertex_t id) { - if ((d_temp_clusters[id] >= 0) && (d_temp_delta_Q[id] > weight_t{0.0})) { - vertex_t new_cluster = d_temp_clusters[id]; - vertex_t old_cluster = d_next_cluster[d_temp_vertices[id]]; - - if ((new_cluster > old_cluster) == up_down) { - weight_t src_weight = d_vertex_weights[d_temp_vertices[id]]; - d_next_cluster[d_temp_vertices[id]] = d_temp_clusters[id]; - - atomicAdd(d_cluster_weights + new_cluster, src_weight); - atomicAdd(d_cluster_weights + old_cluster, -src_weight); - } - } - }); -} - -template void assign_nodes(GraphCSRView const &, - rmm::device_vector &, - rmm::device_vector &, - rmm::device_vector const &, - rmm::device_vector &, - rmm::device_vector const &, - rmm::device_vector &, - bool, - cudaStream_t); - -template void assign_nodes(GraphCSRView const &, - rmm::device_vector &, - rmm::device_vector &, - rmm::device_vector const &, - rmm::device_vector &, - rmm::device_vector const &, - rmm::device_vector &, - bool, - cudaStream_t); - -template -weight_t update_clustering_by_delta_modularity( - weight_t total_edge_weight, - weight_t resolution, - GraphCSRView const &graph, - rmm::device_vector const &src_indices, - rmm::device_vector const &vertex_weights, - rmm::device_vector &cluster_weights, - rmm::device_vector &cluster, - cudaStream_t stream) -{ - rmm::device_vector next_cluster(cluster); - rmm::device_vector delta_Q(graph.number_of_edges); - rmm::device_vector cluster_hash(graph.number_of_edges); - rmm::device_vector old_cluster_sum(graph.number_of_vertices); - - vertex_t *d_cluster_hash = cluster_hash.data().get(); - vertex_t *d_cluster = cluster.data().get(); - weight_t const *d_vertex_weights = vertex_weights.data().get(); - weight_t *d_cluster_weights = cluster_weights.data().get(); - weight_t *d_delta_Q = delta_Q.data().get(); - - weight_t new_Q = modularity( - total_edge_weight, resolution, graph, cluster.data().get(), stream); - - weight_t cur_Q = new_Q - 1; - - // To avoid the potential of having two vertices swap clusters - // we will only allow vertices to move up (true) or down (false) - // during each iteration of the loop - bool up_down = true; - - while (new_Q > (cur_Q + 0.0001)) { - cur_Q = new_Q; - - compute_delta_modularity(total_edge_weight, - resolution, - graph, - src_indices, - vertex_weights, - cluster_weights, - cluster, - cluster_hash, - delta_Q, - old_cluster_sum, - stream); - - assign_nodes(graph, - delta_Q, - cluster_hash, - src_indices, - next_cluster, - vertex_weights, - cluster_weights, - up_down, - stream); - - up_down = !up_down; - - new_Q = modularity( - total_edge_weight, resolution, graph, next_cluster.data().get(), stream); - - if (new_Q > cur_Q) { - thrust::copy(rmm::exec_policy(stream)->on(stream), - next_cluster.begin(), - next_cluster.end(), - cluster.begin()); - } - } - - return cur_Q; -} - -template float update_clustering_by_delta_modularity(float, - float, - GraphCSRView const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector &, - rmm::device_vector &, - cudaStream_t); - -template double update_clustering_by_delta_modularity( - double, - double, - GraphCSRView const &, - rmm::device_vector const &, - rmm::device_vector const &, - rmm::device_vector &, - rmm::device_vector &, - cudaStream_t); - -template -void louvain(GraphCSRView const &graph, - weight_t *final_modularity, - int *num_level, - vertex_t *cluster_vec, - int max_level, - weight_t resolution, - cudaStream_t stream) -{ -#ifdef TIMING - HighResTimer hr_timer; -#endif - - *num_level = 0; - - // - // Vectors to create a copy of the graph - // - rmm::device_vector offsets_v(graph.offsets, graph.offsets + graph.number_of_vertices + 1); - rmm::device_vector indices_v(graph.indices, graph.indices + graph.number_of_edges); - rmm::device_vector weights_v(graph.edge_data, graph.edge_data + graph.number_of_edges); - rmm::device_vector src_indices_v(graph.number_of_edges); - - // - // Weights and clustering across iterations of algorithm - // - rmm::device_vector vertex_weights_v(graph.number_of_vertices); - rmm::device_vector cluster_weights_v(graph.number_of_vertices); - rmm::device_vector cluster_v(graph.number_of_vertices); - - // - // Temporaries used within kernels. Each iteration uses less - // of this memory - // - rmm::device_vector tmp_arr_v(graph.number_of_vertices); - rmm::device_vector cluster_inverse_v(graph.number_of_vertices); - - weight_t total_edge_weight = - thrust::reduce(rmm::exec_policy(stream)->on(stream), weights_v.begin(), weights_v.end()); - weight_t best_modularity = -1; - - // - // Initialize every cluster to reference each vertex to itself - // - thrust::sequence(rmm::exec_policy(stream)->on(stream), cluster_v.begin(), cluster_v.end()); - thrust::copy( - rmm::exec_policy(stream)->on(stream), cluster_v.begin(), cluster_v.end(), cluster_vec); - - // - // Our copy of the graph. Each iteration of the outer loop will - // shrink this copy of the graph. - // - GraphCSRView current_graph(offsets_v.data().get(), - indices_v.data().get(), - weights_v.data().get(), - graph.number_of_vertices, - graph.number_of_edges); - - current_graph.get_source_indices(src_indices_v.data().get()); - - while (*num_level < max_level) { - // - // Sum the weights of all edges departing a vertex. This is - // loop invariant, so we'll compute it here. - // - // Cluster weights are equivalent to vertex weights with this initial - // graph - // -#ifdef TIMING - hr_timer.start("init"); -#endif - - cugraph::detail::compute_vertex_sums(current_graph, vertex_weights_v, stream); - thrust::copy(rmm::exec_policy(stream)->on(stream), - vertex_weights_v.begin(), - vertex_weights_v.end(), - cluster_weights_v.begin()); - -#ifdef TIMING - hr_timer.stop(); - - hr_timer.start("update_clustering"); -#endif - - weight_t new_Q = update_clustering_by_delta_modularity(total_edge_weight, - resolution, - current_graph, - src_indices_v, - vertex_weights_v, - cluster_weights_v, - cluster_v, - stream); - -#ifdef TIMING - hr_timer.stop(); -#endif - - if (new_Q <= best_modularity) { break; } - - best_modularity = new_Q; - -#ifdef TIMING - hr_timer.start("shrinking graph"); -#endif - - // renumber the clusters to the range 0..(num_clusters-1) - vertex_t num_clusters = renumber_clusters( - graph.number_of_vertices, cluster_v, tmp_arr_v, cluster_inverse_v, cluster_vec, stream); - cluster_weights_v.resize(num_clusters); - - // shrink our graph to represent the graph of supervertices - generate_superverticies_graph(current_graph, src_indices_v, num_clusters, cluster_v, stream); - - // assign each new vertex to its own cluster - thrust::sequence(rmm::exec_policy(stream)->on(stream), cluster_v.begin(), cluster_v.end()); - -#ifdef TIMING - hr_timer.stop(); -#endif - - (*num_level)++; - } - -#ifdef TIMING - hr_timer.display(std::cout); -#endif - - *final_modularity = best_modularity; -} - -template void louvain(GraphCSRView const &, - float *, - int *, - int32_t *, - int, - float, - cudaStream_t); -template void louvain(GraphCSRView const &, - double *, - int *, - int32_t *, - int, - double, - cudaStream_t); - -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/community/louvain_kernels.hpp b/cpp/src/community/louvain_kernels.hpp deleted file mode 100644 index eabd562315a..00000000000 --- a/cpp/src/community/louvain_kernels.hpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -#include - -namespace cugraph { -namespace detail { - -template -weight_t modularity(weight_t total_edge_weight, - weight_t resolution, - GraphCSRView const &graph, - vertex_t const *d_cluster, - cudaStream_t stream = 0); - -template -void generate_superverticies_graph(cugraph::GraphCSRView ¤t_graph, - rmm::device_vector &src_indices_v, - vertex_t new_number_of_vertices, - rmm::device_vector &cluster_v, - cudaStream_t stream); - -template -void compute_vertex_sums(GraphCSRView const &graph, - rmm::device_vector &sums, - cudaStream_t stream); - -template -vertex_t renumber_clusters(vertex_t graph_num_vertices, - rmm::device_vector &cluster, - rmm::device_vector &temp_array, - rmm::device_vector &cluster_inverse, - vertex_t *cluster_vec, - cudaStream_t stream); - -template -void compute_delta_modularity(weight_t total_edge_weight, - weight_t resolution, - GraphCSRView const &graph, - rmm::device_vector const &src_indices_v, - rmm::device_vector const &vertex_weights_v, - rmm::device_vector const &cluster_weights_v, - rmm::device_vector const &cluster_v, - rmm::device_vector &cluster_hash_v, - rmm::device_vector &delta_Q_v, - rmm::device_vector &tmp_size_V_v, - cudaStream_t stream = 0); - -template -void assign_nodes(GraphCSRView const &graph, - rmm::device_vector &delta_Q, - rmm::device_vector &cluster_hash, - rmm::device_vector const &src_indices, - rmm::device_vector &next_cluster, - rmm::device_vector const &vertex_weights, - rmm::device_vector &cluster_weights, - bool up_down, - cudaStream_t stream); - -template -weight_t update_clustering_by_delta_modularity( - weight_t total_edge_weight, - weight_t resolution, - GraphCSRView const &graph, - rmm::device_vector const &src_indices, - rmm::device_vector const &vertex_weights, - rmm::device_vector &cluster_weights, - rmm::device_vector &cluster, - cudaStream_t stream); - -template -void louvain(GraphCSRView const &graph, - weight_t *final_modularity, - int *num_level, - vertex_t *cluster_vec, - int max_level, - weight_t resolution, - cudaStream_t stream = 0); - -} // namespace detail -} // namespace cugraph diff --git a/cpp/src/community/triangles_counting.cu b/cpp/src/community/triangles_counting.cu index f6670365652..265083d6ef4 100644 --- a/cpp/src/community/triangles_counting.cu +++ b/cpp/src/community/triangles_counting.cu @@ -826,8 +826,8 @@ void TrianglesCount::count() else if (mean_deg < DEG_THR2) tcount_wrp(); else { - const int shMinBlkXSM = 6; - if (size_t{m_shared_mem_per_block * 8 / shMinBlkXSM} < (size_t)m_mat.N) + const int shMinBlkXSM{6}; + if (int64_t{m_shared_mem_per_block * 8} < int64_t{m_mat.N * shMinBlkXSM}) tcount_b2b(); else tcount_bsh(); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index e0f945639ca..0d81f35b7a0 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -172,7 +172,7 @@ ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}" "") set(LOUVAIN_TEST_SRC "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cpp") + "${CMAKE_CURRENT_SOURCE_DIR}/community/louvain_test.cu") ConfigureTest(LOUVAIN_TEST "${LOUVAIN_TEST_SRC}" "") diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cu similarity index 99% rename from cpp/tests/community/louvain_test.cpp rename to cpp/tests/community/louvain_test.cu index 391af641b73..98e603738e6 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cu @@ -13,8 +13,6 @@ #include #include -#include - #include #include @@ -72,6 +70,7 @@ TEST(louvain, success) ASSERT_GE(modularity, 0.402777 * 0.95); } +#if 0 TEST(louvain_modularity, simple) { std::vector off_h = {0, 1, 4, 7, 10, 11, 12}; @@ -114,7 +113,7 @@ TEST(louvain_modularity, simple) cugraph::GraphCSRView G( offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); - q = cugraph::detail::modularity(float{12}, float{1}, G, cluster_v.data().get()); + q = cugraph::detail::modularity(float{12}, float{1}, G, cluster_v.data().get(), stream); ASSERT_FLOAT_EQ(q, float{-30.0 / 144.0}); @@ -207,5 +206,6 @@ TEST(louvain_modularity, simple) ASSERT_FLOAT_EQ(q, float{-24.0 / 144.0}); } +#endif CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/python/cugraph/tests/test_louvain.py b/python/cugraph/tests/test_louvain.py index b4b0b515899..49ef31603cd 100644 --- a/python/cugraph/tests/test_louvain.py +++ b/python/cugraph/tests/test_louvain.py @@ -79,12 +79,15 @@ def test_louvain_with_edgevals(graph_file): Gnx = nx.from_pandas_edgelist( M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() ) - cu_map = {0: 0} - for i in range(len(cu_parts)): - cu_map[cu_parts["vertex"][i]] = cu_parts["partition"][i] + + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts['vertex'], cu_parts['partition'])) + assert set(nx_parts.keys()) == set(cu_map.keys()) + cu_mod_nx = community.modularity(cu_map, Gnx) nx_mod = community.modularity(nx_parts, Gnx) + assert len(cu_parts) == len(nx_parts) assert cu_mod > (0.82 * nx_mod) assert abs(cu_mod - cu_mod_nx) < 0.0001 @@ -103,9 +106,10 @@ def test_louvain(graph_file): Gnx = nx.from_pandas_edgelist( M, source="0", target="1", edge_attr="weight", create_using=nx.Graph() ) - cu_map = {0: 0} - for i in range(len(cu_parts)): - cu_map[cu_parts["vertex"][i]] = cu_parts["partition"][i] + + cu_parts = cu_parts.to_pandas() + cu_map = dict(zip(cu_parts['vertex'], cu_parts['partition'])) + assert set(nx_parts.keys()) == set(cu_map.keys()) cu_mod_nx = community.modularity(cu_map, Gnx) From eddcc70c6c0f4eed88509d12ad453bb0e62af2d3 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 2 Sep 2020 20:56:11 -0400 Subject: [PATCH 06/74] clang format issues --- cpp/include/graph.hpp | 4 +- cpp/src/community/leiden.cu | 45 +++--- cpp/src/community/leiden.cuh | 66 ++++---- cpp/src/community/louvain.cu | 29 ++-- cpp/src/community/louvain.cuh | 284 +++++++++++++++++----------------- 5 files changed, 205 insertions(+), 223 deletions(-) diff --git a/cpp/include/graph.hpp b/cpp/include/graph.hpp index e4f072be357..8941afcd95d 100644 --- a/cpp/include/graph.hpp +++ b/cpp/include/graph.hpp @@ -54,9 +54,9 @@ template class GraphViewBase { public: using vertex_type = vertex_t; - using edge_type = edge_t; + using edge_type = edge_t; using weight_type = weight_t; - + raft::handle_t *handle; weight_t *edge_data; ///< edge weight diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index feb02a311e7..58b7abe8850 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -21,10 +21,10 @@ namespace detail { template std::pair leiden(GraphCSRView const &graph, - vertex_t *leiden_parts, - int max_level, - weight_t resolution, - cudaStream_t stream) + vertex_t *leiden_parts, + int max_level, + weight_t resolution, + cudaStream_t stream) { CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, leiden expects a weighted graph"); CUGRAPH_EXPECTS(leiden_parts != nullptr, "API error, leiden_parts is null"); @@ -34,34 +34,25 @@ std::pair leiden(GraphCSRView const & return runner.compute(leiden_parts, max_level, resolution); } -} // namespace detail +} // namespace detail - template void leiden(GraphCSRView const &graph, - weight_t &final_modularity, - int &num_level, - vertex_t *leiden_parts, - int max_level, - weight_t resolution) { - + weight_t &final_modularity, + int &num_level, + vertex_t *leiden_parts, + int max_level, + weight_t resolution) +{ cudaStream_t stream{0}; - std::tie(num_level, final_modularity) = detail::leiden(graph, leiden_parts, max_level, resolution, stream); - + std::tie(num_level, final_modularity) = + detail::leiden(graph, leiden_parts, max_level, resolution, stream); } -template void leiden(GraphCSRView const &, - float &, - int &, - int32_t *, - int, - float); -template void leiden(GraphCSRView const &, - double &, - int &, - int32_t *, - int, - double); +template void leiden( + GraphCSRView const &, float &, int &, int32_t *, int, float); +template void leiden( + GraphCSRView const &, double &, int &, int32_t *, int, double); -} //namespace cugraph +} // namespace cugraph diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index f38d1c10ed7..16b19cf968b 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -20,21 +20,22 @@ namespace cugraph { template -class Leiden: public Louvain { -public: - using graph_t = graph_type; +class Leiden : public Louvain { + public: + using graph_t = graph_type; using vertex_t = typename graph_type::vertex_type; using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Leiden(graph_type const &graph, cudaStream_t stream): - Louvain(graph, stream), - constraint_v_(graph.number_of_vertices) { + Leiden(graph_type const &graph, cudaStream_t stream) + : Louvain(graph, stream), constraint_v_(graph.number_of_vertices) + { } - weight_t update_clustering_constrained(weight_t total_edge_weight, weight_t resolution, - graph_type const &graph) { - + weight_t update_clustering_constrained(weight_t total_edge_weight, + weight_t resolution, + graph_type const &graph) + { this->timer_start("update_clustering_constrained"); rmm::device_vector next_cluster_v(this->cluster_v_); @@ -51,7 +52,8 @@ public: weight_t *d_delta_Q = delta_Q_v.data().get(); vertex_t *d_constraint = constraint_v_.data().get(); - weight_t new_Q = this->modularity(total_edge_weight, resolution, graph, this->cluster_v_.data().get()); + weight_t new_Q = + this->modularity(total_edge_weight, resolution, graph, this->cluster_v_.data().get()); weight_t cur_Q = new_Q - 1; @@ -63,10 +65,8 @@ public: while (new_Q > (cur_Q + 0.0001)) { cur_Q = new_Q; - this->compute_delta_modularity(total_edge_weight, resolution, graph, - cluster_hash_v, - old_cluster_sum_v, - delta_Q_v); + this->compute_delta_modularity( + total_edge_weight, resolution, graph, cluster_hash_v, old_cluster_sum_v, delta_Q_v); // Filter out positive delta_Q values for nodes not in the same constraint group thrust::for_each( @@ -79,11 +79,7 @@ public: if (start_cluster != end_cluster) d_delta_Q[i] = weight_t{0.0}; }); - this->assign_nodes(graph, - cluster_hash_v, - next_cluster_v, - delta_Q_v, - up_down); + this->assign_nodes(graph, cluster_hash_v, next_cluster_v, delta_Q_v, up_down); up_down = !up_down; @@ -101,22 +97,26 @@ public: return cur_Q; } - std::pair compute(vertex_t *d_cluster_vec, - int max_level, - weight_t resolution) { + std::pair compute(vertex_t *d_cluster_vec, int max_level, weight_t resolution) + { int num_level{0}; - weight_t total_edge_weight = - thrust::reduce(rmm::exec_policy(this->stream_)->on(this->stream_), this->weights_v_.begin(), this->weights_v_.end()); + weight_t total_edge_weight = thrust::reduce(rmm::exec_policy(this->stream_)->on(this->stream_), + this->weights_v_.begin(), + this->weights_v_.end()); weight_t best_modularity = weight_t{-1}; // // Initialize every cluster to reference each vertex to itself // - thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), this->cluster_v_.begin(), this->cluster_v_.end()); - thrust::copy( - rmm::exec_policy(this->stream_)->on(this->stream_), this->cluster_v_.begin(), this->cluster_v_.end(), d_cluster_vec); + thrust::sequence(rmm::exec_policy(this->stream_)->on(this->stream_), + this->cluster_v_.begin(), + this->cluster_v_.end()); + thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), + this->cluster_v_.begin(), + this->cluster_v_.end(), + d_cluster_vec); // // Our copy of the graph. Each iteration of the outer loop will @@ -135,12 +135,12 @@ public: weight_t new_Q = this->update_clustering(total_edge_weight, resolution, current_graph); - thrust::copy( - rmm::exec_policy(this->stream_)->on(this->stream_), this->cluster_v_.begin(), this->cluster_v_.end(), constraint_v_.begin()); + thrust::copy(rmm::exec_policy(this->stream_)->on(this->stream_), + this->cluster_v_.begin(), + this->cluster_v_.end(), + constraint_v_.begin()); - new_Q = update_clustering_constrained(total_edge_weight, - resolution, - current_graph); + new_Q = update_clustering_constrained(total_edge_weight, resolution, current_graph); if (new_Q <= best_modularity) { break; } @@ -156,7 +156,7 @@ public: return std::make_pair(num_level, best_modularity); } -private: + private: rmm::device_vector constraint_v_; }; diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 2d5957ead42..ff7dd820dd0 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -34,34 +34,25 @@ std::pair louvain(GraphCSRView const return runner.compute(louvain_parts, max_level, resolution); } -} // namespace detail +} // namespace detail - template void louvain(GraphCSRView const &graph, weight_t *final_modularity, int *num_level, vertex_t *louvain_parts, int max_level, - weight_t resolution) { - + weight_t resolution) +{ cudaStream_t stream{0}; - std::tie(*num_level, *final_modularity) = detail::louvain(graph, louvain_parts, max_level, resolution, stream); - + std::tie(*num_level, *final_modularity) = + detail::louvain(graph, louvain_parts, max_level, resolution, stream); } -template void louvain(GraphCSRView const &, - float *, - int *, - int32_t *, - int, - float); -template void louvain(GraphCSRView const &, - double *, - int *, - int32_t *, - int, - double); +template void louvain( + GraphCSRView const &, float *, int *, int32_t *, int, float); +template void louvain( + GraphCSRView const &, double *, int *, int32_t *, int, double); -} //namespace cugraph +} // namespace cugraph diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index f93bf8b90a2..27a888c9a88 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -19,8 +19,8 @@ #include -#include #include +#include //#define TIMING @@ -28,47 +28,46 @@ #include #endif - namespace cugraph { template class Louvain { -public: - using graph_t = graph_type; + public: + using graph_t = graph_type; using vertex_t = typename graph_type::vertex_type; using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Louvain(graph_type const &graph, cudaStream_t stream): + Louvain(graph_type const &graph, cudaStream_t stream) + : #ifdef TIMING - hr_timer_(), + hr_timer_(), #endif - // FIXME: Don't really need to copy here but would need - // to change the logic to populate this properly - // in generate_superverticies_graph. - // - offsets_v_(graph.offsets, graph.offsets + graph.number_of_vertices + 1), - indices_v_(graph.indices, graph.indices + graph.number_of_edges), - weights_v_(graph.edge_data, graph.edge_data + graph.number_of_edges), - src_indices_v_(graph.number_of_edges), - vertex_weights_v_(graph.number_of_vertices), - cluster_weights_v_(graph.number_of_vertices), - cluster_v_(graph.number_of_vertices), - tmp_arr_v_(graph.number_of_vertices), - cluster_inverse_v_(graph.number_of_vertices), - number_of_vertices_(graph.number_of_vertices), - number_of_edges_(graph.number_of_edges), - stream_(stream) + // FIXME: Don't really need to copy here but would need + // to change the logic to populate this properly + // in generate_superverticies_graph. + // + offsets_v_(graph.offsets, graph.offsets + graph.number_of_vertices + 1), + indices_v_(graph.indices, graph.indices + graph.number_of_edges), + weights_v_(graph.edge_data, graph.edge_data + graph.number_of_edges), + src_indices_v_(graph.number_of_edges), + vertex_weights_v_(graph.number_of_vertices), + cluster_weights_v_(graph.number_of_vertices), + cluster_v_(graph.number_of_vertices), + tmp_arr_v_(graph.number_of_vertices), + cluster_inverse_v_(graph.number_of_vertices), + number_of_vertices_(graph.number_of_vertices), + number_of_edges_(graph.number_of_edges), + stream_(stream) { } - weight_t modularity(weight_t total_edge_weight, weight_t resolution, graph_t const &graph, - vertex_t const *d_cluster) { - + vertex_t const *d_cluster) + { vertex_t n_verts = graph.number_of_vertices; rmm::device_vector inc(n_verts, weight_t{0.0}); @@ -109,8 +108,8 @@ public: thrust::make_counting_iterator(graph.number_of_vertices), [d_deg, d_inc, total_edge_weight, resolution] __device__(vertex_t community) { return ((d_inc[community] / total_edge_weight) - resolution * - (d_deg[community] * d_deg[community]) / - (total_edge_weight * total_edge_weight)); + (d_deg[community] * d_deg[community]) / + (total_edge_weight * total_edge_weight)); }, weight_t{0.0}, thrust::plus()); @@ -120,8 +119,8 @@ public: virtual std::pair compute(vertex_t *d_cluster_vec, int max_level, - weight_t resolution) { - + weight_t resolution) + { int num_level{0}; weight_t total_edge_weight = @@ -167,60 +166,64 @@ public: return std::make_pair(num_level, best_modularity); } -protected: - void timer_start(std::string const ®ion) { + protected: + void timer_start(std::string const ®ion) + { #ifdef TIMING hr_timer_.start(region); #endif } - void timer_stop(cudaStream_t stream) { + void timer_stop(cudaStream_t stream) + { #ifdef TIMING CUDA_TRY(cudaStreamSynchronize(stream)); hr_timer_.stop(); #endif } - void timer_display(std::ostream &os) { + void timer_display(std::ostream &os) + { #ifdef TIMING hr_timer_.display(os); #endif } -public: - void compute_vertex_and_cluster_weights(graph_type const &graph) { + public: + void compute_vertex_and_cluster_weights(graph_type const &graph) + { timer_start("compute_vertex_and_cluster_weights"); - edge_t const *d_offsets = graph.offsets; - vertex_t const *d_indices = graph.indices; - weight_t const *d_weights = graph.edge_data; - weight_t *d_vertex_weights = vertex_weights_v_.data().get(); + edge_t const *d_offsets = graph.offsets; + vertex_t const *d_indices = graph.indices; + weight_t const *d_weights = graph.edge_data; + weight_t *d_vertex_weights = vertex_weights_v_.data().get(); weight_t *d_cluster_weights = cluster_weights_v_.data().get(); // - // MNMG: copy_v_transform_reduce_out_nbr, then copy + // MNMG: copy_v_transform_reduce_out_nbr, then copy // - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_vertices), - [d_offsets, d_indices, d_weights, - d_vertex_weights, d_cluster_weights] __device__ (vertex_t src) { - weight_t sum{0.0}; + thrust::for_each( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_vertices), + [d_offsets, d_indices, d_weights, d_vertex_weights, d_cluster_weights] __device__( + vertex_t src) { + weight_t sum{0.0}; - for (edge_t i = d_offsets[src] ; i < d_offsets[src + 1] ; ++i) { - sum += d_weights[i]; - } + for (edge_t i = d_offsets[src]; i < d_offsets[src + 1]; ++i) { sum += d_weights[i]; } - d_vertex_weights[src] = sum; - d_cluster_weights[src] = sum; - }); + d_vertex_weights[src] = sum; + d_cluster_weights[src] = sum; + }); timer_stop(stream_); } - virtual weight_t update_clustering(weight_t total_edge_weight, weight_t resolution, - graph_type const &graph) { - + virtual weight_t update_clustering(weight_t total_edge_weight, + weight_t resolution, + graph_type const &graph) + { timer_start("update_clustering"); // @@ -249,16 +252,10 @@ public: while (new_Q > (cur_Q + 0.0001)) { cur_Q = new_Q; - compute_delta_modularity(total_edge_weight, resolution, graph, - cluster_hash_v, - old_cluster_sum_v, - delta_Q_v); + compute_delta_modularity( + total_edge_weight, resolution, graph, cluster_hash_v, old_cluster_sum_v, delta_Q_v); - assign_nodes(graph, - cluster_hash_v, - next_cluster_v, - delta_Q_v, - up_down); + assign_nodes(graph, cluster_hash_v, next_cluster_v, delta_Q_v, up_down); up_down = !up_down; @@ -281,9 +278,8 @@ public: graph_type const &graph, rmm::device_vector &cluster_hash_v, rmm::device_vector &old_cluster_sum_v, - rmm::device_vector &delta_Q_v - ) { - + rmm::device_vector &delta_Q_v) + { vertex_t const *d_src_indices = src_indices_v_.data().get(); vertex_t const *d_dst_indices = graph.indices; edge_t const *d_offsets = graph.offsets; @@ -352,45 +348,46 @@ public: } }); - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_edges), - [total_edge_weight, - resolution, - d_cluster_hash, - d_src_indices, - d_cluster, - d_vertex_weights, - d_delta_Q, - d_new_cluster_sum, - d_old_cluster_sum, - d_cluster_weights] __device__(edge_t loc) { - vertex_t new_cluster = d_cluster_hash[loc]; - if (new_cluster >= 0) { - vertex_t src = d_src_indices[loc]; - vertex_t old_cluster = d_cluster[src]; - weight_t k_k = d_vertex_weights[src]; - weight_t a_old = d_cluster_weights[old_cluster]; - weight_t a_new = d_cluster_weights[new_cluster]; - - // NOTE: d_delta_Q and d_new_cluster_sum are aliases - // for same device array to save memory - d_delta_Q[loc] = - 2 * - (((d_new_cluster_sum[loc] - d_old_cluster_sum[src]) / total_edge_weight) - - resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / - (total_edge_weight * total_edge_weight)); - } else { - d_delta_Q[loc] = weight_t{0.0}; - } - }); + thrust::for_each( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_edges), + [total_edge_weight, + resolution, + d_cluster_hash, + d_src_indices, + d_cluster, + d_vertex_weights, + d_delta_Q, + d_new_cluster_sum, + d_old_cluster_sum, + d_cluster_weights] __device__(edge_t loc) { + vertex_t new_cluster = d_cluster_hash[loc]; + if (new_cluster >= 0) { + vertex_t src = d_src_indices[loc]; + vertex_t old_cluster = d_cluster[src]; + weight_t k_k = d_vertex_weights[src]; + weight_t a_old = d_cluster_weights[old_cluster]; + weight_t a_new = d_cluster_weights[new_cluster]; + + // NOTE: d_delta_Q and d_new_cluster_sum are aliases + // for same device array to save memory + d_delta_Q[loc] = + 2 * (((d_new_cluster_sum[loc] - d_old_cluster_sum[src]) / total_edge_weight) - + resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / + (total_edge_weight * total_edge_weight)); + } else { + d_delta_Q[loc] = weight_t{0.0}; + } + }); } void assign_nodes(graph_type const &graph, rmm::device_vector &cluster_hash_v, rmm::device_vector &next_cluster_v, rmm::device_vector &delta_Q_v, - bool up_down) { + bool up_down) + { rmm::device_vector temp_vertices_v(graph.number_of_vertices); rmm::device_vector temp_cluster_v(graph.number_of_vertices, vertex_t{-1}); rmm::device_vector temp_delta_Q_v(graph.number_of_vertices, weight_t{0.0}); @@ -453,7 +450,8 @@ public: }); } - void shrink_graph(graph_t &graph, vertex_t *d_cluster_vec) { + void shrink_graph(graph_t &graph, vertex_t *d_cluster_vec) + { timer_start("shrinking graph"); // renumber the clusters to the range 0..(num_clusters-1) @@ -464,18 +462,18 @@ public: generate_superverticies_graph(graph, num_clusters); // assign each new vertex to its own cluster - thrust::sequence(rmm::exec_policy(stream_)->on(stream_), - cluster_v_.begin(), cluster_v_.end()); + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end()); timer_stop(stream_); } - vertex_t renumber_clusters(vertex_t *d_cluster_vec) { + vertex_t renumber_clusters(vertex_t *d_cluster_vec) + { vertex_t *d_tmp_array = tmp_arr_v_.data().get(); vertex_t *d_cluster_inverse = cluster_inverse_v_.data().get(); vertex_t *d_cluster = cluster_v_.data().get(); - vertex_t old_num_clusters = cluster_v_.size(); + vertex_t old_num_clusters = cluster_v_.size(); // // New technique. Initialize cluster_inverse_v_ to 0 @@ -486,20 +484,23 @@ public: // Iterate over every element c in cluster_v_ and set cluster_inverse_v to 1 // auto first_1 = thrust::make_constant_iterator(1); - auto last_1 = first_1 + old_num_clusters; + auto last_1 = first_1 + old_num_clusters; - thrust::scatter(rmm::exec_policy(stream_)->on(stream_), first_1, last_1, cluster_v_.begin(), cluster_inverse_v_.begin()); + thrust::scatter(rmm::exec_policy(stream_)->on(stream_), + first_1, + last_1, + cluster_v_.begin(), + cluster_inverse_v_.begin()); // // Now we'll copy all of the clusters that have a value of 1 into a temporary array // - auto copy_end = thrust::copy_if(rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(old_num_clusters), - tmp_arr_v_.begin(), - [d_cluster_inverse] __device__ (const vertex_t idx) { - return d_cluster_inverse[idx] == 1; - }); + auto copy_end = thrust::copy_if( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(old_num_clusters), + tmp_arr_v_.begin(), + [d_cluster_inverse] __device__(const vertex_t idx) { return d_cluster_inverse[idx] == 1; }); vertex_t new_num_clusters = thrust::distance(tmp_arr_v_.begin(), copy_end); tmp_arr_v_.resize(new_num_clusters); @@ -510,11 +511,10 @@ public: thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(new_num_clusters), - [d_cluster_inverse, d_tmp_array] __device__ (const vertex_t idx) { + [d_cluster_inverse, d_tmp_array] __device__(const vertex_t idx) { d_cluster_inverse[d_tmp_array[idx]] = idx; }); - thrust::for_each(rmm::exec_policy(stream_)->on(stream_), thrust::make_counting_iterator(0), thrust::make_counting_iterator(old_num_clusters), @@ -535,7 +535,8 @@ public: return new_num_clusters; } - void generate_superverticies_graph(graph_t &graph, vertex_t num_clusters) { + void generate_superverticies_graph(graph_t &graph, vertex_t num_clusters) + { rmm::device_vector new_src_v(graph.number_of_edges); rmm::device_vector new_dst_v(graph.number_of_edges); rmm::device_vector new_weight_v(graph.number_of_edges); @@ -551,16 +552,20 @@ public: // // Renumber the COO // - thrust::for_each( - rmm::exec_policy(stream_)->on(stream_), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(graph.number_of_edges), - [d_old_src, d_old_dst, d_old_weight, d_new_src, d_new_dst, d_new_weight, d_clusters] - __device__( edge_t e) { - d_new_src[e] = d_clusters[d_old_src[e]]; - d_new_dst[e] = d_clusters[d_old_dst[e]]; - d_new_weight[e] = d_old_weight[e]; - }); + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(graph.number_of_edges), + [d_old_src, + d_old_dst, + d_old_weight, + d_new_src, + d_new_dst, + d_new_weight, + d_clusters] __device__(edge_t e) { + d_new_src[e] = d_clusters[d_old_src[e]]; + d_new_dst[e] = d_clusters[d_old_dst[e]]; + d_new_weight[e] = d_old_weight[e]; + }); thrust::stable_sort_by_key( rmm::exec_policy(stream_)->on(stream_), @@ -580,28 +585,24 @@ public: auto start = thrust::make_zip_iterator(thrust::make_tuple(d_new_src, d_new_dst)); auto new_start = thrust::make_zip_iterator(thrust::make_tuple(d_old_src, d_old_dst)); auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), - start, - start + graph.number_of_edges, - d_new_weight, - new_start, - d_old_weight, - thrust::equal_to>(), - thrust::plus()); + start, + start + graph.number_of_edges, + d_new_weight, + new_start, + d_old_weight, + thrust::equal_to>(), + thrust::plus()); graph.number_of_edges = thrust::distance(new_start, new_end.first); graph.number_of_vertices = num_clusters; - detail::fill_offset(d_old_src, - graph.offsets, - num_clusters, - graph.number_of_edges, - stream_); + detail::fill_offset(d_old_src, graph.offsets, num_clusters, graph.number_of_edges, stream_); CHECK_CUDA(stream_); src_indices_v_.resize(graph.number_of_edges); } -protected: + protected: vertex_t number_of_vertices_; edge_t number_of_edges_; cudaStream_t stream_; @@ -633,5 +634,4 @@ protected: #endif }; - -} // namespace cugraph +} // namespace cugraph From 213a124f8ee8a8bc902cad3cb23a1049d8a85612 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 2 Sep 2020 19:38:09 -0700 Subject: [PATCH 07/74] upgrade cub/thrust to latest commits --- cpp/CMakeLists.txt | 8 +++----- cpp/cmake/thrust-ret-if-fail.patch | 16 ---------------- 2 files changed, 3 insertions(+), 21 deletions(-) delete mode 100644 cpp/cmake/thrust-ret-if-fail.patch diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c260563446e..f43b829859a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -195,8 +195,8 @@ message("Fetching CUB") FetchContent_Declare( cub GIT_REPOSITORY https://github.com/thrust/cub.git - GIT_TAG 1.9.10 - GIT_SHALLOW true + # August 28, 2020 + GIT_TAG 2442f44532ffcc53298c7e3a298feb5134563860 ) FetchContent_GetProperties(cub) @@ -212,9 +212,7 @@ message("Fetching Thrust") FetchContent_Declare( thrust GIT_REPOSITORY https://github.com/thrust/thrust.git - GIT_TAG 1.9.10 - GIT_SHALLOW true - PATCH_COMMAND COMMAND patch -p1 < "${CMAKE_CURRENT_SOURCE_DIR}/cmake/thrust-ret-if-fail.patch" + GIT_TAG 52a8bda46c5c2128414d1d47f546b486ff0be2f0 ) FetchContent_GetProperties(thrust) diff --git a/cpp/cmake/thrust-ret-if-fail.patch b/cpp/cmake/thrust-ret-if-fail.patch deleted file mode 100644 index 990b3f993be..00000000000 --- a/cpp/cmake/thrust-ret-if-fail.patch +++ /dev/null @@ -1,16 +0,0 @@ -diff --git a/thrust/system/cuda/detail/core/util.h b/thrust/system/cuda/detail/core/util.h -index a2c87772..ea4ed640 100644 ---- a/thrust/system/cuda/detail/core/util.h -+++ b/thrust/system/cuda/detail/core/util.h -@@ -652,7 +652,10 @@ namespace core { - } - - #define CUDA_CUB_RET_IF_FAIL(e) \ -- if (cub::Debug((e), __FILE__, __LINE__)) return e; -+ { \ -+ auto const error = (e); \ -+ if (cub::Debug(error, __FILE__, __LINE__)) return error; \ -+ } - - // uninitialized - // ------- From 0e7f636bc3e97cbf32743bd212f763b5901e19c6 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Wed, 2 Sep 2020 19:40:29 -0700 Subject: [PATCH 08/74] add to changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28000de4e77..dbf06b6723c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree - PR 1115 Replace deprecated rmm::mr::get_default_resource with rmm::mr::get_current_device_resource +- PR 1132 Upgrade CUB/Thrust to laest commits ## Bug Fixes - PR #1131 Show style checker errors with set +e From eb12dcda11efb13789e17327945adf85fe1c5b1a Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 3 Sep 2020 12:03:41 -0400 Subject: [PATCH 09/74] Change calling sequence for C++ louvain to return pair of values instead of passing pointers/references --- cpp/include/algorithms.hpp | 32 ++++++++++---------- cpp/src/community/ECG.cu | 9 ++---- cpp/src/community/leiden.cu | 22 ++++++-------- cpp/src/community/louvain.cu | 21 ++++++------- cpp/tests/community/leiden_test.cpp | 2 +- cpp/tests/community/louvain_test.cu | 2 +- python/cugraph/community/leiden.pxd | 5 ++- python/cugraph/community/leiden_wrapper.pyx | 20 +++++------- python/cugraph/community/louvain.pxd | 5 ++- python/cugraph/community/louvain_wrapper.pyx | 20 +++++------- 10 files changed, 59 insertions(+), 79 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 489f43a69c4..7bf6fcd9746 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -623,8 +623,6 @@ void bfs(raft::handle_t const &handle, * @tparam weight_t Type of edge weights. Supported values : float or double. * * @param[in] graph input graph object (CSR) - * @param[out] final_modularity modularity of the returned clustering - * @param[out] num_level number of levels of the returned clustering * @param[out] clustering Pointer to device array where the clustering should be stored * @param[in] max_iter (optional) maximum number of iterations to run (default 100) * @param[in] resolution (optional) The value of the resolution parameter to use. @@ -633,14 +631,16 @@ void bfs(raft::handle_t const &handle, * communities, lower resolutions lead to fewer larger * communities. (default 1) * + * @return a pair containing: + * 1) number of levels of the returned clustering + * 2) modularity of the returned clustering + * */ template -void louvain(GraphCSRView const &graph, - weight_t *final_modularity, - int *num_level, - vertex_t *louvain_parts, - int max_iter = 100, - weight_t resolution = weight_t{1}); +std::pair louvain(GraphCSRView const &graph, + vertex_t *louvain_parts, + int max_iter = 100, + weight_t resolution = weight_t{1}); /** * @brief Leiden implementation @@ -663,8 +663,6 @@ void louvain(GraphCSRView const &graph, * @tparam weight_t Type of edge weights. Supported values : float or double. * * @param[in] graph input graph object (CSR) - * @param[out] final_modularity modularity of the returned clustering - * @param[out] num_level number of levels of the returned clustering * @param[out] clustering Pointer to device array where the clustering should be stored * @param[in] max_iter (optional) maximum number of iterations to run (default 100) * @param[in] resolution (optional) The value of the resolution parameter to use. @@ -672,14 +670,16 @@ void louvain(GraphCSRView const &graph, * of the communities. Higher resolutions lead to more smaller * communities, lower resolutions lead to fewer larger * communities. (default 1) + * + * @return a pair containing: + * 1) number of levels of the returned clustering + * 2) modularity of the returned clustering */ template -void leiden(GraphCSRView const &graph, - weight_t &final_modularity, - int &num_level, - vertex_t *leiden_parts, - int max_iter = 100, - weight_t resolution = weight_t{1}); +std::pair leiden(GraphCSRView const &graph, + vertex_t *leiden_parts, + int max_iter = 100, + weight_t resolution = weight_t{1}); /** * @brief Computes the ecg clustering of the given graph. diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ECG.cu index 47a80fa48d6..bea4987fb1c 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ECG.cu @@ -142,10 +142,7 @@ void ecg(GraphCSRView const &graph, rmm::device_vector parts_v(size); vertex_t *d_parts = parts_v.data().get(); - weight_t final_modularity; - vertex_t num_level; - - cugraph::louvain(permuted_graph->view(), &final_modularity, &num_level, d_parts, 1); + cugraph::louvain(permuted_graph->view(), d_parts, 1); // For each edge in the graph determine whether the endpoints are in the same partition // Keep a sum for each edge of the total number of times its endpoints are in the same partition @@ -178,9 +175,7 @@ void ecg(GraphCSRView const &graph, louvain_graph.number_of_vertices = graph.number_of_vertices; louvain_graph.number_of_edges = graph.number_of_edges; - weight_t final_modularity; - vertex_t num_level; - cugraph::louvain(louvain_graph, &final_modularity, &num_level, ecg_parts, 100); + cugraph::louvain(louvain_graph, ecg_parts, 100); } // Explicit template instantiations. diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 58b7abe8850..2286f0072f7 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -37,22 +37,20 @@ std::pair leiden(GraphCSRView const & } // namespace detail template -void leiden(GraphCSRView const &graph, - weight_t &final_modularity, - int &num_level, - vertex_t *leiden_parts, - int max_level, - weight_t resolution) +std::pair leiden(GraphCSRView const &graph, + vertex_t *leiden_parts, + int max_level, + weight_t resolution) { cudaStream_t stream{0}; - std::tie(num_level, final_modularity) = - detail::leiden(graph, leiden_parts, max_level, resolution, stream); + return detail::leiden(graph, leiden_parts, max_level, resolution, stream); } -template void leiden( - GraphCSRView const &, float &, int &, int32_t *, int, float); -template void leiden( - GraphCSRView const &, double &, int &, int32_t *, int, double); +template std::pair leiden( + GraphCSRView const &, int32_t *, int, float); + +template std::pair leiden( + GraphCSRView const &, int32_t *, int, double); } // namespace cugraph diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index ff7dd820dd0..335b92d8632 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -37,22 +37,19 @@ std::pair louvain(GraphCSRView const } // namespace detail template -void louvain(GraphCSRView const &graph, - weight_t *final_modularity, - int *num_level, - vertex_t *louvain_parts, - int max_level, - weight_t resolution) +std::pair louvain(GraphCSRView const &graph, + vertex_t *louvain_parts, + int max_level, + weight_t resolution) { cudaStream_t stream{0}; - std::tie(*num_level, *final_modularity) = - detail::louvain(graph, louvain_parts, max_level, resolution, stream); + return detail::louvain(graph, louvain_parts, max_level, resolution, stream); } -template void louvain( - GraphCSRView const &, float *, int *, int32_t *, int, float); -template void louvain( - GraphCSRView const &, double *, int *, int32_t *, int, double); +template std::pair louvain( + GraphCSRView const &, int32_t *, int, float); +template std::pair louvain( + GraphCSRView const &, int32_t *, int, double); } // namespace cugraph diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index 1e8ba85249d..ec65d9e06d8 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -59,7 +59,7 @@ TEST(leiden_karate, success) float modularity{0.0}; int num_level = 40; - cugraph::leiden(G, modularity, num_level, result_v.data().get()); + std::tie(num_level, modularity) = cugraph::leiden(G, result_v.data().get()); cudaMemcpy((void*)&(cluster_id[0]), result_v.data().get(), diff --git a/cpp/tests/community/louvain_test.cu b/cpp/tests/community/louvain_test.cu index 98e603738e6..76eb92f45f7 100644 --- a/cpp/tests/community/louvain_test.cu +++ b/cpp/tests/community/louvain_test.cu @@ -57,7 +57,7 @@ TEST(louvain, success) float modularity{0.0}; int num_level = 40; - cugraph::louvain(G, &modularity, &num_level, result_v.data().get()); + std::tie(num_level, modularity) = cugraph::louvain(G, result_v.data().get()); cudaMemcpy((void*)&(cluster_id[0]), result_v.data().get(), diff --git a/python/cugraph/community/leiden.pxd b/python/cugraph/community/leiden.pxd index 1c6009b30b6..82d5e88becc 100644 --- a/python/cugraph/community/leiden.pxd +++ b/python/cugraph/community/leiden.pxd @@ -16,15 +16,14 @@ # cython: embedsignature = True # cython: language_level = 3 +from libcpp.utility cimport pair from cugraph.structure.graph_new cimport * cdef extern from "algorithms.hpp" namespace "cugraph": - cdef void leiden[vertex_t,edge_t,weight_t]( + cdef pair[int, weight_t] leiden[vertex_t,edge_t,weight_t]( const GraphCSRView[vertex_t,edge_t,weight_t] &graph, - weight_t &final_modularity, - int &num_level, vertex_t *leiden_parts, int max_level, weight_t resolution) except + diff --git a/python/cugraph/community/leiden_wrapper.pyx b/python/cugraph/community/leiden_wrapper.pyx index 9ed220bb2a2..53e0411bd07 100644 --- a/python/cugraph/community/leiden_wrapper.pyx +++ b/python/cugraph/community/leiden_wrapper.pyx @@ -69,12 +69,10 @@ def leiden(input_graph, max_iter, resolution): c_weights, num_verts, num_edges) graph_float.get_vertex_identifiers(c_identifier) - c_leiden(graph_float, - final_modularity_float, - num_level, - c_partition, - max_iter, - resolution) + num_level, final_modularity_float = c_leiden(graph_float, + c_partition, + max_iter, + resolution) final_modularity = final_modularity_float else: @@ -82,12 +80,10 @@ def leiden(input_graph, max_iter, resolution): c_weights, num_verts, num_edges) graph_double.get_vertex_identifiers(c_identifier) - c_leiden(graph_double, - final_modularity_double, - num_level, - c_partition, - max_iter, - resolution) + num_level, final_modularity_double = c_leiden(graph_double, + c_partition, + max_iter, + resolution) final_modularity = final_modularity_double return df, final_modularity diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index 7cc72b4d0ed..c2fa00b4492 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -16,15 +16,14 @@ # cython: embedsignature = True # cython: language_level = 3 +from libcpp.utility cimport pair from cugraph.structure.graph_new cimport * cdef extern from "algorithms.hpp" namespace "cugraph": - cdef void louvain[vertex_t,edge_t,weight_t]( + cdef pair[int, weight_t] louvain[vertex_t,edge_t,weight_t]( const GraphCSRView[vertex_t,edge_t,weight_t] &graph, - weight_t *final_modularity, - int *num_level, vertex_t *louvain_parts, int max_level, weight_t resolution) except + diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx index 79db57125b1..79b43168b14 100644 --- a/python/cugraph/community/louvain_wrapper.pyx +++ b/python/cugraph/community/louvain_wrapper.pyx @@ -69,12 +69,10 @@ def louvain(input_graph, max_iter, resolution): c_weights, num_verts, num_edges) graph_float.get_vertex_identifiers(c_identifier) - c_louvain(graph_float, - &final_modularity_float, - &num_level, - c_partition, - max_iter, - resolution) + num_level, final_modularity_float = c_louvain(graph_float, + c_partition, + max_iter, + resolution) final_modularity = final_modularity_float else: @@ -82,12 +80,10 @@ def louvain(input_graph, max_iter, resolution): c_weights, num_verts, num_edges) graph_double.get_vertex_identifiers(c_identifier) - c_louvain(graph_double, - &final_modularity_double, - &num_level, - c_partition, - max_iter, - resolution) + num_level, final_modularity_double = c_louvain(graph_double, + c_partition, + max_iter, + resolution) final_modularity = final_modularity_double return df, final_modularity From fea92308b95823a532365ea9d3e7bd1b8f66edb9 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 3 Sep 2020 13:03:38 -0400 Subject: [PATCH 10/74] add handle to louvain/leiden/ecg --- cpp/include/algorithms.hpp | 23 ++++++++++++++------ cpp/src/community/ECG.cu | 13 ++++++----- cpp/src/community/leiden.cu | 16 ++++++++------ cpp/src/community/leiden.cuh | 4 ++-- cpp/src/community/louvain.cu | 14 +++++++----- cpp/src/community/louvain.cuh | 5 ++++- cpp/tests/community/ecg_test.cu | 6 +++-- cpp/tests/community/leiden_test.cpp | 3 ++- cpp/tests/community/louvain_test.cu | 4 +++- python/cugraph/community/ecg.pxd | 1 + python/cugraph/community/ecg_wrapper.pyx | 15 +++++++++++-- python/cugraph/community/leiden.pxd | 1 + python/cugraph/community/leiden_wrapper.pyx | 9 ++++++-- python/cugraph/community/louvain.pxd | 1 + python/cugraph/community/louvain_wrapper.pyx | 9 ++++++-- 15 files changed, 86 insertions(+), 38 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 7bf6fcd9746..f87a8bdc0cb 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -622,6 +622,7 @@ void bfs(raft::handle_t const &handle, * Supported value : int (signed, 32-bit) * @tparam weight_t Type of edge weights. Supported values : float or double. * + * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, * @param[in] graph input graph object (CSR) * @param[out] clustering Pointer to device array where the clustering should be stored * @param[in] max_iter (optional) maximum number of iterations to run (default 100) @@ -637,7 +638,8 @@ void bfs(raft::handle_t const &handle, * */ template -std::pair louvain(GraphCSRView const &graph, +std::pair louvain(raft::handle_t const &handle, + GraphCSRView const &graph, vertex_t *louvain_parts, int max_iter = 100, weight_t resolution = weight_t{1}); @@ -662,6 +664,7 @@ std::pair louvain(GraphCSRView const * Supported value : int (signed, 32-bit) * @tparam weight_t Type of edge weights. Supported values : float or double. * + * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, * @param[in] graph input graph object (CSR) * @param[out] clustering Pointer to device array where the clustering should be stored * @param[in] max_iter (optional) maximum number of iterations to run (default 100) @@ -676,7 +679,8 @@ std::pair louvain(GraphCSRView const * 2) modularity of the returned clustering */ template -std::pair leiden(GraphCSRView const &graph, +std::pair leiden(raft::handle_t const &handle, + GraphCSRView const &graph, vertex_t *leiden_parts, int max_iter = 100, weight_t resolution = weight_t{1}); @@ -692,12 +696,13 @@ std::pair leiden(GraphCSRView const & * * @throws cugraph::logic_error when an error occurs. * - * @tparam VT Type of vertex identifiers. Supported value : int (signed, + * @tparam vertex_t Type of vertex identifiers. Supported value : int (signed, * 32-bit) - * @tparam ET Type of edge identifiers. Supported value : int (signed, + * @tparam edge_t Type of edge identifiers. Supported value : int (signed, * 32-bit) - * @tparam WT Type of edge weights. Supported values : float or double. + * @tparam weight_t Type of edge weights. Supported values : float or double. * + * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, * @param[in] graph_coo input graph object (COO) * @param[in] graph_csr input graph object (CSR) * @param[in] min_weight The minimum weight parameter @@ -705,8 +710,12 @@ std::pair leiden(GraphCSRView const & * @param[out] ecg_parts A device pointer to array where the partitioning should be * written */ -template -void ecg(GraphCSRView const &graph_csr, WT min_weight, VT ensemble_size, VT *ecg_parts); +template +void ecg(raft::handle_t const &handle, + GraphCSRView const &graph_csr, + weight_t min_weight, + vertex_t ensemble_size, + vertex_t *ecg_parts); namespace triangle { diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ECG.cu index bea4987fb1c..9e185110650 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ECG.cu @@ -108,7 +108,8 @@ void get_permutation_vector(T size, T seed, T *permutation, cudaStream_t stream) namespace cugraph { template -void ecg(GraphCSRView const &graph, +void ecg(raft::handle_t const &handle, + GraphCSRView const &graph, weight_t min_weight, vertex_t ensemble_size, vertex_t *ecg_parts) @@ -142,7 +143,7 @@ void ecg(GraphCSRView const &graph, rmm::device_vector parts_v(size); vertex_t *d_parts = parts_v.data().get(); - cugraph::louvain(permuted_graph->view(), d_parts, 1); + cugraph::louvain(handle, permuted_graph->view(), d_parts, 1); // For each edge in the graph determine whether the endpoints are in the same partition // Keep a sum for each edge of the total number of times its endpoints are in the same partition @@ -175,15 +176,17 @@ void ecg(GraphCSRView const &graph, louvain_graph.number_of_vertices = graph.number_of_vertices; louvain_graph.number_of_edges = graph.number_of_edges; - cugraph::louvain(louvain_graph, ecg_parts, 100); + cugraph::louvain(handle, louvain_graph, ecg_parts, 100); } // Explicit template instantiations. -template void ecg(GraphCSRView const &graph, +template void ecg(raft::handle_t const &, + GraphCSRView const &graph, float min_weight, int32_t ensemble_size, int32_t *ecg_parts); -template void ecg(GraphCSRView const &graph, +template void ecg(raft::handle_t const &, + GraphCSRView const &graph, double min_weight, int32_t ensemble_size, int32_t *ecg_parts); diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 2286f0072f7..3d77555fc06 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -20,7 +20,8 @@ namespace cugraph { namespace detail { template -std::pair leiden(GraphCSRView const &graph, +std::pair leiden(raft::handle_t const &handle, + GraphCSRView const &graph, vertex_t *leiden_parts, int max_level, weight_t resolution, @@ -29,7 +30,7 @@ std::pair leiden(GraphCSRView const & CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, leiden expects a weighted graph"); CUGRAPH_EXPECTS(leiden_parts != nullptr, "API error, leiden_parts is null"); - Leiden> runner(graph, stream); + Leiden> runner(handle, graph, stream); return runner.compute(leiden_parts, max_level, resolution); } @@ -37,20 +38,21 @@ std::pair leiden(GraphCSRView const & } // namespace detail template -std::pair leiden(GraphCSRView const &graph, +std::pair leiden(raft::handle_t const &handle, + GraphCSRView const &graph, vertex_t *leiden_parts, int max_level, weight_t resolution) { cudaStream_t stream{0}; - return detail::leiden(graph, leiden_parts, max_level, resolution, stream); + return detail::leiden(handle, graph, leiden_parts, max_level, resolution, stream); } template std::pair leiden( - GraphCSRView const &, int32_t *, int, float); + raft::handle_t const &, GraphCSRView const &, int32_t *, int, float); template std::pair leiden( - GraphCSRView const &, int32_t *, int, double); + raft::handle_t const &, GraphCSRView const &, int32_t *, int, double); -} // namespace cugraph +} // namespace cugraph diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index 16b19cf968b..df1e64cc9f0 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -27,8 +27,8 @@ class Leiden : public Louvain { using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Leiden(graph_type const &graph, cudaStream_t stream) - : Louvain(graph, stream), constraint_v_(graph.number_of_vertices) + Leiden(raft::handle_t const &handle, graph_type const &graph, cudaStream_t stream) + : Louvain(handle, graph, stream), constraint_v_(graph.number_of_vertices) { } diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 335b92d8632..166057d1859 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -20,7 +20,8 @@ namespace cugraph { namespace detail { template -std::pair louvain(GraphCSRView const &graph, +std::pair louvain(raft::handle_t const &handle, + GraphCSRView const &graph, vertex_t *louvain_parts, int max_level, weight_t resolution, @@ -29,7 +30,7 @@ std::pair louvain(GraphCSRView const CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); CUGRAPH_EXPECTS(louvain_parts != nullptr, "API error, louvain_parts is null"); - Louvain> runner(graph, stream); + Louvain> runner(handle, graph, stream); return runner.compute(louvain_parts, max_level, resolution); } @@ -37,19 +38,20 @@ std::pair louvain(GraphCSRView const } // namespace detail template -std::pair louvain(GraphCSRView const &graph, +std::pair louvain(raft::handle_t const &handle, + GraphCSRView const &graph, vertex_t *louvain_parts, int max_level, weight_t resolution) { cudaStream_t stream{0}; - return detail::louvain(graph, louvain_parts, max_level, resolution, stream); + return detail::louvain(handle, graph, louvain_parts, max_level, resolution, stream); } template std::pair louvain( - GraphCSRView const &, int32_t *, int, float); + raft::handle_t const &, GraphCSRView const &, int32_t *, int, float); template std::pair louvain( - GraphCSRView const &, int32_t *, int, double); + raft::handle_t const &, GraphCSRView const &, int32_t *, int, double); } // namespace cugraph diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 27a888c9a88..c6a3c30d591 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -38,11 +38,13 @@ class Louvain { using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Louvain(graph_type const &graph, cudaStream_t stream) + Louvain(raft::handle_t const &handle, + graph_type const &graph, cudaStream_t stream) : #ifdef TIMING hr_timer_(), #endif + handle_(handle), // FIXME: Don't really need to copy here but would need // to change the logic to populate this properly @@ -603,6 +605,7 @@ class Louvain { } protected: + raft::handle_t const &handle_; vertex_t number_of_vertices_; edge_t number_of_edges_; cudaStream_t stream_; diff --git a/cpp/tests/community/ecg_test.cu b/cpp/tests/community/ecg_test.cu index 6246a42021d..b20dd365ef2 100644 --- a/cpp/tests/community/ecg_test.cu +++ b/cpp/tests/community/ecg_test.cu @@ -45,7 +45,8 @@ TEST(ecg, success) cugraph::GraphCSRView graph_csr( offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); - cugraph::ecg(graph_csr, .05, 16, result_v.data().get()); + raft::handle_t handle; + cugraph::ecg(handle, graph_csr, .05, 16, result_v.data().get()); cluster_id = result_v; int max = *max_element(cluster_id.begin(), cluster_id.end()); @@ -106,7 +107,8 @@ TEST(ecg, dolphin) cugraph::GraphCSRView graph_csr( offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); - cugraph::ecg(graph_csr, .05, 16, result_v.data().get()); + raft::handle_t handle; + cugraph::ecg(handle, graph_csr, .05, 16, result_v.data().get()); cluster_id = result_v; int max = *max_element(cluster_id.begin(), cluster_id.end()); diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index ec65d9e06d8..c8b14ebd8a1 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -59,7 +59,8 @@ TEST(leiden_karate, success) float modularity{0.0}; int num_level = 40; - std::tie(num_level, modularity) = cugraph::leiden(G, result_v.data().get()); + raft::handle_t handle; + std::tie(num_level, modularity) = cugraph::leiden(handle, G, result_v.data().get()); cudaMemcpy((void*)&(cluster_id[0]), result_v.data().get(), diff --git a/cpp/tests/community/louvain_test.cu b/cpp/tests/community/louvain_test.cu index 76eb92f45f7..d3f59161336 100644 --- a/cpp/tests/community/louvain_test.cu +++ b/cpp/tests/community/louvain_test.cu @@ -57,7 +57,9 @@ TEST(louvain, success) float modularity{0.0}; int num_level = 40; - std::tie(num_level, modularity) = cugraph::louvain(G, result_v.data().get()); + raft::handle_t handle; + + std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data().get()); cudaMemcpy((void*)&(cluster_id[0]), result_v.data().get(), diff --git a/python/cugraph/community/ecg.pxd b/python/cugraph/community/ecg.pxd index 33af448754b..ba20ef8e849 100644 --- a/python/cugraph/community/ecg.pxd +++ b/python/cugraph/community/ecg.pxd @@ -22,6 +22,7 @@ from cugraph.structure.graph_new cimport * cdef extern from "algorithms.hpp" namespace "cugraph": cdef void ecg[VT,ET,WT]( + const handle_t &handle, const GraphCSRView[VT,ET,WT] &graph, WT min_weight, VT ensemble_size, diff --git a/python/cugraph/community/ecg_wrapper.pyx b/python/cugraph/community/ecg_wrapper.pyx index 913a633c088..5f9d951a3ce 100644 --- a/python/cugraph/community/ecg_wrapper.pyx +++ b/python/cugraph/community/ecg_wrapper.pyx @@ -36,6 +36,9 @@ def ecg(input_graph, min_weight=.05, ensemble_size=16): if input_graph.adjlist.weights is None: raise Exception('ECG must be called on a weighted graph') + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32, np.int64]) [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) @@ -62,13 +65,21 @@ def ecg(input_graph, min_weight=.05, ensemble_size=16): graph_float.get_vertex_identifiers(c_identifier) - c_ecg[int,int,float](graph_float, min_weight, ensemble_size, c_partition) + c_ecg[int,int,float](handle_ptr.get()[0], + graph_float, + min_weight, + ensemble_size, + c_partition) else: graph_double = GraphCSRView[int,int,double](c_offsets, c_indices, c_weights, num_verts, num_edges) graph_double.get_vertex_identifiers(c_identifier) - c_ecg[int,int,double](graph_double, min_weight, ensemble_size, c_partition) + c_ecg[int,int,double](handle_ptr.get()[0], + graph_double, + min_weight, + ensemble_size, + c_partition) return df diff --git a/python/cugraph/community/leiden.pxd b/python/cugraph/community/leiden.pxd index 82d5e88becc..9238d845605 100644 --- a/python/cugraph/community/leiden.pxd +++ b/python/cugraph/community/leiden.pxd @@ -23,6 +23,7 @@ from cugraph.structure.graph_new cimport * cdef extern from "algorithms.hpp" namespace "cugraph": cdef pair[int, weight_t] leiden[vertex_t,edge_t,weight_t]( + const handle_t &handle, const GraphCSRView[vertex_t,edge_t,weight_t] &graph, vertex_t *leiden_parts, int max_level, diff --git a/python/cugraph/community/leiden_wrapper.pyx b/python/cugraph/community/leiden_wrapper.pyx index 53e0411bd07..fdb909ee7cb 100644 --- a/python/cugraph/community/leiden_wrapper.pyx +++ b/python/cugraph/community/leiden_wrapper.pyx @@ -33,6 +33,9 @@ def leiden(input_graph, max_iter, resolution): if not input_graph.adjlist: input_graph.view_adj_list() + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + weights = None final_modularity = None @@ -69,7 +72,8 @@ def leiden(input_graph, max_iter, resolution): c_weights, num_verts, num_edges) graph_float.get_vertex_identifiers(c_identifier) - num_level, final_modularity_float = c_leiden(graph_float, + num_level, final_modularity_float = c_leiden(handle_ptr.get()[0], + graph_float, c_partition, max_iter, resolution) @@ -80,7 +84,8 @@ def leiden(input_graph, max_iter, resolution): c_weights, num_verts, num_edges) graph_double.get_vertex_identifiers(c_identifier) - num_level, final_modularity_double = c_leiden(graph_double, + num_level, final_modularity_double = c_leiden(handle_ptr.get()[0], + graph_double, c_partition, max_iter, resolution) diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index c2fa00b4492..bccd42d5501 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -23,6 +23,7 @@ from cugraph.structure.graph_new cimport * cdef extern from "algorithms.hpp" namespace "cugraph": cdef pair[int, weight_t] louvain[vertex_t,edge_t,weight_t]( + const handle_t &handle, const GraphCSRView[vertex_t,edge_t,weight_t] &graph, vertex_t *louvain_parts, int max_level, diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx index 79b43168b14..50dc69d7b5b 100644 --- a/python/cugraph/community/louvain_wrapper.pyx +++ b/python/cugraph/community/louvain_wrapper.pyx @@ -33,6 +33,9 @@ def louvain(input_graph, max_iter, resolution): if not input_graph.adjlist: input_graph.view_adj_list() + cdef unique_ptr[handle_t] handle_ptr + handle_ptr.reset(new handle_t()) + weights = None final_modularity = None @@ -69,7 +72,8 @@ def louvain(input_graph, max_iter, resolution): c_weights, num_verts, num_edges) graph_float.get_vertex_identifiers(c_identifier) - num_level, final_modularity_float = c_louvain(graph_float, + num_level, final_modularity_float = c_louvain(handle_ptr.get()[0], + graph_float, c_partition, max_iter, resolution) @@ -80,7 +84,8 @@ def louvain(input_graph, max_iter, resolution): c_weights, num_verts, num_edges) graph_double.get_vertex_identifiers(c_identifier) - num_level, final_modularity_double = c_louvain(graph_double, + num_level, final_modularity_double = c_louvain(handle_ptr.get()[0], + graph_double, c_partition, max_iter, resolution) From c95fe8a9cf98528739783b4993e818181bf64d6d Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 3 Sep 2020 13:08:18 -0400 Subject: [PATCH 11/74] clang format updates --- cpp/src/community/leiden.cu | 2 +- cpp/src/community/louvain.cuh | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 3d77555fc06..1011950e1ff 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -55,4 +55,4 @@ template std::pair leiden( template std::pair leiden( raft::handle_t const &, GraphCSRView const &, int32_t *, int, double); -} // namespace cugraph +} // namespace cugraph diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index c6a3c30d591..eb2f2cc1b8e 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -38,8 +38,7 @@ class Louvain { using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Louvain(raft::handle_t const &handle, - graph_type const &graph, cudaStream_t stream) + Louvain(raft::handle_t const &handle, graph_type const &graph, cudaStream_t stream) : #ifdef TIMING hr_timer_(), From dc8b64a40364e0de0f251d9135b54ccc9f8dcbac Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 3 Sep 2020 12:31:31 -0700 Subject: [PATCH 12/74] use the cub submodule in thrust instead of fetching it --- cpp/CMakeLists.txt | 26 ++++---------------------- 1 file changed, 4 insertions(+), 22 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index f43b829859a..3f3d82371e9 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -186,32 +186,16 @@ if (RMM_INCLUDE AND RMM_LIBRARY) endif (RMM_INCLUDE AND RMM_LIBRARY) ################################################################################################### -# - Fetch Content ----------------------------------------------------------------------------- +# - Fetch Content --------------------------------------------------------------------------------- include(FetchContent) -# - CUB -message("Fetching CUB") - -FetchContent_Declare( - cub - GIT_REPOSITORY https://github.com/thrust/cub.git - # August 28, 2020 - GIT_TAG 2442f44532ffcc53298c7e3a298feb5134563860 -) - -FetchContent_GetProperties(cub) -if(NOT cub_POPULATED) - FetchContent_Populate(cub) - # We are not using the cub CMake targets, so no need to call `add_subdirectory()`. -endif() -set(CUB_INCLUDE_DIR "${cub_SOURCE_DIR}") - -# - THRUST +# - THRUST/CUB message("Fetching Thrust") FetchContent_Declare( thrust GIT_REPOSITORY https://github.com/thrust/thrust.git + # August 28, 2020 GIT_TAG 52a8bda46c5c2128414d1d47f546b486ff0be2f0 ) @@ -221,9 +205,7 @@ if(NOT thrust_POPULATED) # We are not using the thrust CMake targets, so no need to call `add_subdirectory()`. endif() set(THRUST_INCLUDE_DIR "${thrust_SOURCE_DIR}") - - - +set(CUB_INCLUDE_DIR "${thrust_SOURCE_DIR}/dependencies/cub" PARENT_SCOPE) ################################################################################################### # - External Projects ----------------------------------------------------------------------------- From f1f651aa986a109e3ce2cf8aefd33d57efdda5f6 Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Thu, 3 Sep 2020 14:52:46 -0700 Subject: [PATCH 13/74] fix typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index dbf06b6723c..5fa3e117a04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,7 @@ ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree - PR 1115 Replace deprecated rmm::mr::get_default_resource with rmm::mr::get_current_device_resource -- PR 1132 Upgrade CUB/Thrust to laest commits +- PR 1132 Upgrade Thrust to latest commit ## Bug Fixes - PR #1131 Show style checker errors with set +e From aed3cf9d3108b5f17f0b1f714e455fab30cea8bd Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Thu, 3 Sep 2020 20:59:08 -0500 Subject: [PATCH 14/74] Fixed sub-comm partitioning key-name mechanism. --- cpp/include/partition_manager.hpp | 51 +++++++++++++++++++------------ 1 file changed, 32 insertions(+), 19 deletions(-) diff --git a/cpp/include/partition_manager.hpp b/cpp/include/partition_manager.hpp index 32d935dbf7d..8d848196406 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/partition_manager.hpp @@ -97,13 +97,24 @@ class partition_manager_t { // default key-naming mechanism: // struct key_naming_t { + // simplified key (one per all row subcomms / one per all column sub-comms): + // + key_naming_t(std::string const& row_suffix = std::string("_p_row"), + std::string const& col_suffix = std::string("_p_col"), + std::string const& prefix = std::string("comm")) + : row_suffix_(row_suffix), col_suffix_(col_suffix), prefix_(prefix), name_(prefix_) + { + } + + // more involved key naming, using row/col indices: + // key_naming_t(int row_indx, int col_indx, - std::string const& col_suffix = std::string("_col"), - std::string const& row_suffix = std::string("_row"), - std::string const& prefix = std::string("partition")) - : col_suffix_(col_suffix), - row_suffix_(row_suffix), + std::string const& row_suffix = std::string("_p_row"), + std::string const& col_suffix = std::string("_p_col"), + std::string const& prefix = std::string("comm")) + : row_suffix_(row_suffix), + col_suffix_(col_suffix), prefix_(prefix), name_(prefix_ + "_" + to_string(row_indx) + "_" + to_string(col_indx)) { @@ -114,8 +125,8 @@ struct key_naming_t { std::string row_name(void) const { return name_ + row_suffix_; } private: - std::string const col_suffix_; std::string const row_suffix_; + std::string const col_suffix_; std::string const prefix_; std::string name_; }; @@ -123,38 +134,41 @@ struct key_naming_t { using pair_comms_t = std::pair, std::shared_ptr>; -enum class key_2d_t : int { ROW = 0, COL = 1 }; - // class responsible for creating 2D partition sub-comms: // this is instantiated by each worker (processing element, PE) // for the row/column it belongs to; // +// naming policy defaults to simplified naming: +// one key per row subcomms, one per column subcomms; +// template class subcomm_factory_t { public: - subcomm_factory_t(raft::handle_t& handle, size_type p_row_index, size_type p_col_index) - : handle_(handle), row_index_(p_row_index), col_index_(p_col_index) + subcomm_factory_t(raft::handle_t& handle, size_type row_size) + : handle_(handle), row_size_(row_size) { init_row_col_comms(); } virtual ~subcomm_factory_t(void) {} + pair_comms_t const& row_col_comms(void) const { return row_col_subcomms_; } + protected: virtual void init_row_col_comms(void) { - name_policy_t key{row_index_, col_index_}; + name_policy_t key; raft::comms::comms_t const& communicator = handle_.get_comms(); int const rank = communicator.get_rank(); - int row_color = rank / row_index_; - int col_color = rank % row_index_; + int row_index = rank / row_size_; + int col_index = rank % row_size_; - auto row_comm = std::make_shared( - communicator.comm_split(row_color, static_cast(key_2d_t::ROW))); + auto row_comm = + std::make_shared(communicator.comm_split(row_index, col_index)); handle_.set_subcomm(key.row_name(), row_comm); - auto col_comm = std::make_shared( - communicator.comm_split(col_color, static_cast(key_2d_t::COL))); + auto col_comm = + std::make_shared(communicator.comm_split(col_index, row_index)); handle_.set_subcomm(key.col_name(), col_comm); row_col_subcomms_.first = row_comm; @@ -163,8 +177,7 @@ class subcomm_factory_t { private: raft::handle_t& handle_; - size_type row_index_; - size_type col_index_; + size_type row_size_; pair_comms_t row_col_subcomms_; }; } // namespace partition_2d From 88364596cb61e1308fc77174b63e130f2d4edffb Mon Sep 17 00:00:00 2001 From: Rong Ou Date: Fri, 4 Sep 2020 18:02:10 -0700 Subject: [PATCH 15/74] CUB_INCLUDE_DIR is not needed since Thrust has a symlink --- cpp/CMakeLists.txt | 2 -- 1 file changed, 2 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3f3d82371e9..cd6fa0471df 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -205,7 +205,6 @@ if(NOT thrust_POPULATED) # We are not using the thrust CMake targets, so no need to call `add_subdirectory()`. endif() set(THRUST_INCLUDE_DIR "${thrust_SOURCE_DIR}") -set(CUB_INCLUDE_DIR "${thrust_SOURCE_DIR}/dependencies/cub" PARENT_SCOPE) ################################################################################################### # - External Projects ----------------------------------------------------------------------------- @@ -350,7 +349,6 @@ add_dependencies(cugraph raft) # - include paths --------------------------------------------------------------------------------- target_include_directories(cugraph PRIVATE - "${CUB_INCLUDE_DIR}" "${THRUST_INCLUDE_DIR}" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" "${LIBCYPHERPARSER_INCLUDE}" From 69c085e967c71ed4169ae6e9ac08030f43e42128 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 9 Sep 2020 13:58:59 -0400 Subject: [PATCH 16/74] update changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 28000de4e77..0960f097faf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree - PR 1115 Replace deprecated rmm::mr::get_default_resource with rmm::mr::get_current_device_resource +- PR 1135 SG Updates to Louvain et. al. ## Bug Fixes - PR #1131 Show style checker errors with set +e From a5e5f369fb1b36578c54161ee791f7fa5a53cfdb Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Wed, 9 Sep 2020 15:34:55 -0400 Subject: [PATCH 17/74] address PR comments --- cpp/src/community/leiden.cu | 2 +- cpp/src/community/leiden.cuh | 2 +- cpp/src/community/louvain.cu | 2 +- cpp/src/community/louvain.cuh | 11 +++++------ 4 files changed, 8 insertions(+), 9 deletions(-) diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 1011950e1ff..8ab21098b07 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -32,7 +32,7 @@ std::pair leiden(raft::handle_t const &handle, Leiden> runner(handle, graph, stream); - return runner.compute(leiden_parts, max_level, resolution); + return runner(leiden_parts, max_level, resolution); } } // namespace detail diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index df1e64cc9f0..b89dd25bc55 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -97,7 +97,7 @@ class Leiden : public Louvain { return cur_Q; } - std::pair compute(vertex_t *d_cluster_vec, int max_level, weight_t resolution) + std::pair operator()(vertex_t *d_cluster_vec, int max_level, weight_t resolution) { int num_level{0}; diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 166057d1859..7da884daa92 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -32,7 +32,7 @@ std::pair louvain(raft::handle_t const &handle, Louvain> runner(handle, graph, stream); - return runner.compute(louvain_parts, max_level, resolution); + return runner(louvain_parts, max_level, resolution); } } // namespace detail diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index eb2f2cc1b8e..fa62b70474f 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -118,9 +118,9 @@ class Louvain { return Q; } - virtual std::pair compute(vertex_t *d_cluster_vec, - int max_level, - weight_t resolution) + virtual std::pair operator()(vertex_t *d_cluster_vec, + int max_level, + weight_t resolution) { int num_level{0}; @@ -210,9 +210,8 @@ class Louvain { thrust::make_counting_iterator(graph.number_of_vertices), [d_offsets, d_indices, d_weights, d_vertex_weights, d_cluster_weights] __device__( vertex_t src) { - weight_t sum{0.0}; - - for (edge_t i = d_offsets[src]; i < d_offsets[src + 1]; ++i) { sum += d_weights[i]; } + weight_t sum = + thrust::reduce(thrust::seq, d_weights + d_offsets[src], d_weights + d_offsets[src + 1]); d_vertex_weights[src] = sum; d_cluster_weights[src] = sum; From b0a5effe8781443c43738e41d093eb1010cf5ae7 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 9 Sep 2020 23:05:07 -0500 Subject: [PATCH 18/74] safety commit - building with new MG Louvain placeholders --- python/cugraph/dask/__init__.py | 1 + python/cugraph/dask/community/__init__.py | 14 +++ python/cugraph/dask/community/louvain.pxd | 32 +++++++ python/cugraph/dask/community/louvain.py | 55 ++++++++++++ .../dask/community/louvain_wrapper.pyx | 57 ++++++++++++ python/cugraph/tests/dask/test_mg_louvain.py | 88 +++++++++++++++++++ 6 files changed, 247 insertions(+) create mode 100644 python/cugraph/dask/community/__init__.py create mode 100644 python/cugraph/dask/community/louvain.pxd create mode 100644 python/cugraph/dask/community/louvain.py create mode 100644 python/cugraph/dask/community/louvain_wrapper.pyx create mode 100644 python/cugraph/tests/dask/test_mg_louvain.py diff --git a/python/cugraph/dask/__init__.py b/python/cugraph/dask/__init__.py index 76c47338852..e62a8bfcdb4 100644 --- a/python/cugraph/dask/__init__.py +++ b/python/cugraph/dask/__init__.py @@ -14,3 +14,4 @@ from .link_analysis.pagerank import pagerank from .traversal.bfs import bfs from .common.read_utils import get_chunksize +from .community.louvain import louvain diff --git a/python/cugraph/dask/community/__init__.py b/python/cugraph/dask/community/__init__.py new file mode 100644 index 00000000000..3eb2ddc8090 --- /dev/null +++ b/python/cugraph/dask/community/__init__.py @@ -0,0 +1,14 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .louvain import louvain diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd new file mode 100644 index 00000000000..76a752e5700 --- /dev/null +++ b/python/cugraph/dask/community/louvain.pxd @@ -0,0 +1,32 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + + +from cugraph.structure.graph cimport * + + +# FIXME: need header for MG louvain +cdef extern from "algorithms.hpp" namespace "cugraph": + + cdef void louvain[vertex_t,edge_t,weight_t]( + const GraphCSRView[vertex_t,edge_t,weight_t] &graph, + weight_t *final_modularity, + int *num_level, + vertex_t *louvain_parts, + int max_level, + weight_t resolution) except + diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py new file mode 100644 index 00000000000..1b9ea373efa --- /dev/null +++ b/python/cugraph/dask/community/louvain.py @@ -0,0 +1,55 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from cugraph.dask.community import louvain_wrapper + + +def louvain(input_graph, max_iter=100, resolution=1.): + """ + Compute the modularity optimizing partition of the input graph using the + Louvain method on multiple GPUs + + Examples + -------- + >>> import cugraph.dask as dcg + >>> Comms.initialize() + >>> chunksize = dcg.get_chunksize(input_data_path) + >>> ddf = dask_cudf.read_csv('datasets/karate.csv', chunksize=chunksize, + delimiter=' ', + names=['src', 'dst', 'value'], + dtype=['int32', 'int32', 'float32']) + >>> dg = cugraph.Graph() + >>> dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', + edge_attr='value') + >>> parts, modularity_score = dcg.louvain(dg) + """ + # FIXME: import here to prevent circular import: cugraph->louvain + # wrapper->cugraph/structure->cugraph/dask->dask/louvain->cugraph/structure + # from cugraph.structure.graph import Graph + + # FIXME: dask methods to populate graphs from edgelists are only present on + # DiGraph classes. Disable the Graph check for now and assume inputs are + # symmetric DiGraphs. + # if type(input_graph) is not Graph: + # raise Exception("input graph must be undirected") + + parts, modularity_score = louvain_wrapper.louvain( + input_graph, max_iter, resolution + ) + + if input_graph.renumbered: + # MG renumbering is lazy, but it's safe to assume it's been called at + # this point if renumbered=True + parts = input_graph.unrenumber(parts, "vertex") + + return parts, modularity_score diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx new file mode 100644 index 00000000000..9787eada435 --- /dev/null +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -0,0 +1,57 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +#from cugraph.dask.community.louvain cimport louvain as c_louvain +#from cugraph.structure.graph cimport * + +import cudf +import numpy as np + + +def louvain(input_graph, max_iter, resolution): + """ + Call MG Louvain + """ + # FIXME: view_adj_list() is not supported for a distributed graph but should + # still be done? + # if not input_graph.adjlist: + # input_graph.view_adj_list() + + weights = None + final_modularity = None + + # FIXME: this needs to go here to stop circular import + from cugraph.structure import graph_new_wrapper + + [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + + num_verts = input_graph.number_of_vertices() + num_edges = input_graph.number_of_edges(directed_edges=True) + + if input_graph.adjlist.weights is not None: + [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + else: + weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) + + # Create the output dataframe + df = cudf.DataFrame() + df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) + df['partition'] = cudf.Series(np.zeros(num_verts,dtype=np.int32)) + + + return df, final_modularity diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py new file mode 100644 index 00000000000..aa15c8df17d --- /dev/null +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -0,0 +1,88 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +import pandas +import numpy as np + +import cugraph.dask as dcg +import cugraph.comms as Comms +from dask.distributed import Client +import cugraph +import dask_cudf +import dask +import cudf +from dask_cuda import LocalCUDACluster +from cugraph.tests import utils +from cugraph.structure.number_map import NumberMap + +try: + from rapids_pytest_benchmark import setFixtureParamNames +except ImportError: + print("\n\nWARNING: rapids_pytest_benchmark is not installed, " + "falling back to pytest_benchmark fixtures.\n") + + # if rapids_pytest_benchmark is not available, just perfrom time-only + # benchmarking and replace the util functions with nops + gpubenchmark = pytest_benchmark.plugin.benchmark + + def setFixtureParamNames(*args, **kwargs): + pass + +############################################################################### +# Fixtures +@pytest.fixture(scope="module") +def client_connection(): + # setup + cluster = LocalCUDACluster() + client = Client(cluster) + Comms.initialize() + + yield client + + # teardown + Comms.destroy() + client.close() + cluster.close() + + +@pytest.fixture(scope="module", + params=utils.DATASETS) +def daskGraphFromDataset(request, client_connection): + """ + Returns a new dask dataframe created from the dataset file param. + """ + # Since parameterized fixtures do not assign param names to param values, + # manually call the helper to do so. + setFixtureParamNames(request, ["dataset"]) + dataset = request.param + + chunksize = dcg.get_chunksize(dataset) + ddf = dask_cudf.read_csv(dataset, chunksize=chunksize, + delimiter=' ', + names=['src', 'dst', 'value'], + dtype=['int32', 'int32', 'float32']) + #dg = cugraph.proto.Graph() + dg = cugraph.DiGraph() + dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') + return dg + + +############################################################################### +# Tests +def test_mg_louvain_with_edgevals(daskGraphFromDataset): + # FIXME: daskGraphFromDataset returns a DiGraph, which Louvain is currently + # accepting. In the future, an MNMG symmeterize will need to be called to + # create a Graph for Louvain. + parts, mod = dcg.louvain(daskGraphFromDataset) From 4fccf69ecf4d303d6d2c1be6e2e86504b7feeea4 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 10 Sep 2020 00:34:53 -0500 Subject: [PATCH 19/74] Renamed graph_new to graph_primtypes to hopefully clarify its contents, removed unused graph_new.pxd (which was identical to graph.pxd, which then got renamed to graph_primtypes.pxd) --- .../centrality/betweenness_centrality.pxd | 2 +- .../betweenness_centrality_wrapper.pyx | 2 +- .../edge_betweenness_centrality_wrapper.pyx | 4 +- python/cugraph/centrality/katz_centrality.pxd | 2 +- .../centrality/katz_centrality_wrapper.pyx | 6 +- python/cugraph/community/ecg.pxd | 2 +- python/cugraph/community/ecg_wrapper.pyx | 10 +- python/cugraph/community/ktruss_subgraph.pxd | 2 +- .../community/ktruss_subgraph_wrapper.pyx | 6 +- python/cugraph/community/leiden.pxd | 2 +- python/cugraph/community/leiden_wrapper.pyx | 8 +- python/cugraph/community/louvain.pxd | 2 +- python/cugraph/community/louvain_wrapper.pyx | 8 +- .../cugraph/community/spectral_clustering.pxd | 10 +- .../community/spectral_clustering_wrapper.pyx | 26 +-- .../cugraph/community/subgraph_extraction.pxd | 2 +- .../community/subgraph_extraction_wrapper.pyx | 14 +- python/cugraph/community/triangle_count.pxd | 2 +- .../community/triangle_count_wrapper.pyx | 10 +- python/cugraph/components/connectivity.pxd | 3 +- .../components/connectivity_wrapper.pyx | 24 +-- python/cugraph/cores/core_number.pxd | 3 +- python/cugraph/cores/core_number_wrapper.pyx | 6 +- python/cugraph/cores/k_core.pxd | 2 +- python/cugraph/cores/k_core_wrapper.pyx | 8 +- .../dask/link_analysis/mg_pagerank.pxd | 2 +- .../link_analysis/mg_pagerank_wrapper.pyx | 12 +- python/cugraph/dask/structure/replication.pyx | 2 +- python/cugraph/dask/traversal/mg_bfs.pxd | 2 +- .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 6 +- python/cugraph/layout/force_atlas2.pxd | 2 +- .../cugraph/layout/force_atlas2_wrapper.pyx | 4 +- python/cugraph/link_analysis/hits.pxd | 2 +- python/cugraph/link_analysis/hits_wrapper.pyx | 8 +- python/cugraph/link_analysis/pagerank.pxd | 2 +- .../link_analysis/pagerank_wrapper.pyx | 16 +- python/cugraph/link_prediction/jaccard.pxd | 4 +- .../link_prediction/jaccard_wrapper.pyx | 24 +-- python/cugraph/link_prediction/overlap.pxd | 4 +- .../link_prediction/overlap_wrapper.pyx | 16 +- python/cugraph/structure/graph.py | 20 +- python/cugraph/structure/graph_new.pxd | 192 ------------------ .../{graph.pxd => graph_primtypes.pxd} | 0 .../{graph_new.pyx => graph_primtypes.pyx} | 0 ...rapper.pyx => graph_primtypes_wrapper.pyx} | 28 +-- python/cugraph/structure/utils.pxd | 2 +- python/cugraph/structure/utils_wrapper.pyx | 2 +- python/cugraph/traversal/bfs.pxd | 2 +- python/cugraph/traversal/bfs_wrapper.pyx | 6 +- python/cugraph/traversal/sssp.pxd | 2 +- python/cugraph/traversal/sssp_wrapper.pyx | 8 +- 51 files changed, 169 insertions(+), 365 deletions(-) delete mode 100644 python/cugraph/structure/graph_new.pxd rename python/cugraph/structure/{graph.pxd => graph_primtypes.pxd} (100%) rename python/cugraph/structure/{graph_new.pyx => graph_primtypes.pyx} (100%) rename python/cugraph/structure/{graph_new_wrapper.pyx => graph_primtypes_wrapper.pyx} (96%) diff --git a/python/cugraph/centrality/betweenness_centrality.pxd b/python/cugraph/centrality/betweenness_centrality.pxd index 0c17a17ad5a..829d7be37d9 100644 --- a/python/cugraph/centrality/betweenness_centrality.pxd +++ b/python/cugraph/centrality/betweenness_centrality.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool cdef extern from "algorithms.hpp" namespace "cugraph": diff --git a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx index a20a58b844b..bb0e88a79ba 100644 --- a/python/cugraph/centrality/betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/betweenness_centrality_wrapper.pyx @@ -19,7 +19,7 @@ from cugraph.centrality.betweenness_centrality cimport betweenness_centrality as c_betweenness_centrality from cugraph.centrality.betweenness_centrality cimport handle_t from cugraph.structure.graph import DiGraph -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t from libcpp cimport bool import cudf diff --git a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx index 9a5a022f640..cdc8a1c61a2 100644 --- a/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx +++ b/python/cugraph/centrality/edge_betweenness_centrality_wrapper.pyx @@ -17,9 +17,9 @@ # cython: language_level = 3 from cugraph.centrality.betweenness_centrality cimport edge_betweenness_centrality as c_edge_betweenness_centrality -from cugraph.structure import graph_new_wrapper +from cugraph.structure import graph_primtypes_wrapper from cugraph.structure.graph import DiGraph, Graph -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t from libcpp cimport bool import cudf diff --git a/python/cugraph/centrality/katz_centrality.pxd b/python/cugraph/centrality/katz_centrality.pxd index a8496a2f508..53867f48ac6 100644 --- a/python/cugraph/centrality/katz_centrality.pxd +++ b/python/cugraph/centrality/katz_centrality.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool cdef extern from "algorithms.hpp" namespace "cugraph": diff --git a/python/cugraph/centrality/katz_centrality_wrapper.pyx b/python/cugraph/centrality/katz_centrality_wrapper.pyx index 01b942991a5..926ed0452e0 100644 --- a/python/cugraph/centrality/katz_centrality_wrapper.pyx +++ b/python/cugraph/centrality/katz_centrality_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.centrality.katz_centrality cimport katz_centrality as c_katz_centrality -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t @@ -37,7 +37,7 @@ def get_output_df(input_graph, nstart): if len(nstart) != num_verts: raise ValueError('nstart must have initial guess for all vertices') - nstart['values'] = graph_new_wrapper.datatype_cast([nstart['values']], [np.float64]) + nstart['values'] = graph_primtypes_wrapper.datatype_cast([nstart['values']], [np.float64]) df['katz_centrality'][nstart['vertex']] = nstart['values'] return df diff --git a/python/cugraph/community/ecg.pxd b/python/cugraph/community/ecg.pxd index 33af448754b..ee862a75f64 100644 --- a/python/cugraph/community/ecg.pxd +++ b/python/cugraph/community/ecg.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": diff --git a/python/cugraph/community/ecg_wrapper.pyx b/python/cugraph/community/ecg_wrapper.pyx index 913a633c088..35b03c5df17 100644 --- a/python/cugraph/community/ecg_wrapper.pyx +++ b/python/cugraph/community/ecg_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.community.ecg cimport ecg as c_ecg -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf @@ -36,9 +36,9 @@ def ecg(input_graph, min_weight=.05, ensemble_size=16): if input_graph.adjlist.weights is None: raise Exception('ECG must be called on a weighted graph') - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, - input_graph.adjlist.indices], [np.int32, np.int64]) - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, + input_graph.adjlist.indices], [np.int32, np.int64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) diff --git a/python/cugraph/community/ktruss_subgraph.pxd b/python/cugraph/community/ktruss_subgraph.pxd index 08e59d2f8f2..ab3a5189414 100644 --- a/python/cugraph/community/ktruss_subgraph.pxd +++ b/python/cugraph/community/ktruss_subgraph.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": diff --git a/python/cugraph/community/ktruss_subgraph_wrapper.pyx b/python/cugraph/community/ktruss_subgraph_wrapper.pyx index 8a2c81f70fa..9f8138f4d57 100644 --- a/python/cugraph/community/ktruss_subgraph_wrapper.pyx +++ b/python/cugraph/community/ktruss_subgraph_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.community.ktruss_subgraph cimport * -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t from libc.float cimport FLT_MAX_EXP @@ -39,7 +39,7 @@ def ktruss_subgraph_double(input_graph, k, use_weights): def ktruss_subgraph(input_graph, k, use_weights): - if graph_new_wrapper.weight_type(input_graph) == np.float64 and use_weights: + if graph_primtypes_wrapper.weight_type(input_graph) == np.float64 and use_weights: return ktruss_subgraph_double(input_graph, k, use_weights) else: return ktruss_subgraph_float(input_graph, k, use_weights) diff --git a/python/cugraph/community/leiden.pxd b/python/cugraph/community/leiden.pxd index 1c6009b30b6..a240c36317c 100644 --- a/python/cugraph/community/leiden.pxd +++ b/python/cugraph/community/leiden.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": diff --git a/python/cugraph/community/leiden_wrapper.pyx b/python/cugraph/community/leiden_wrapper.pyx index 9ed220bb2a2..fc965642d5f 100644 --- a/python/cugraph/community/leiden_wrapper.pyx +++ b/python/cugraph/community/leiden_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.community.leiden cimport leiden as c_leiden -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf @@ -36,13 +36,13 @@ def leiden(input_graph, max_iter, resolution): weights = None final_modularity = None - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) if input_graph.adjlist.weights is not None: - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index 7cc72b4d0ed..2db4edc8f83 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx index 79db57125b1..a72a341eaa2 100644 --- a/python/cugraph/community/louvain_wrapper.pyx +++ b/python/cugraph/community/louvain_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.community.louvain cimport louvain as c_louvain -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf @@ -36,13 +36,13 @@ def louvain(input_graph, max_iter, resolution): weights = None final_modularity = None - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) if input_graph.adjlist.weights is not None: - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) diff --git a/python/cugraph/community/spectral_clustering.pxd b/python/cugraph/community/spectral_clustering.pxd index 360ff08a04e..27ce6130b05 100644 --- a/python/cugraph/community/spectral_clustering.pxd +++ b/python/cugraph/community/spectral_clustering.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph::ext_raft": @@ -30,7 +30,7 @@ cdef extern from "algorithms.hpp" namespace "cugraph::ext_raft": const float kmean_tolerance, const int kmean_max_iter, VT* clustering) except + - + cdef void spectralModularityMaximization[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, const int n_clusters, @@ -40,19 +40,19 @@ cdef extern from "algorithms.hpp" namespace "cugraph::ext_raft": const float kmean_tolerance, const int kmean_max_iter, VT* clustering) except + - + cdef void analyzeClustering_modularity[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, const int n_clusters, const VT* clustering, WT* score) except + - + cdef void analyzeClustering_edge_cut[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, const int n_clusters, const VT* clustering, WT* score) except + - + cdef void analyzeClustering_ratio_cut[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, const int n_clusters, diff --git a/python/cugraph/community/spectral_clustering_wrapper.pyx b/python/cugraph/community/spectral_clustering_wrapper.pyx index fff027bac7e..0593d987c0d 100644 --- a/python/cugraph/community/spectral_clustering_wrapper.pyx +++ b/python/cugraph/community/spectral_clustering_wrapper.pyx @@ -21,8 +21,8 @@ from cugraph.community.spectral_clustering cimport spectralModularityMaximizatio from cugraph.community.spectral_clustering cimport analyzeClustering_modularity as c_analyze_clustering_modularity from cugraph.community.spectral_clustering cimport analyzeClustering_edge_cut as c_analyze_clustering_edge_cut from cugraph.community.spectral_clustering cimport analyzeClustering_ratio_cut as c_analyze_clustering_ratio_cut -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t @@ -50,13 +50,13 @@ def spectralBalancedCutClustering(input_graph, weights = None - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) if input_graph.adjlist.weights is not None: - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) @@ -122,8 +122,8 @@ def spectralModularityMaximizationClustering(input_graph, if input_graph.adjlist.weights is None: raise Exception("spectral modularity maximization must be called on a graph with weights") - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) @@ -181,8 +181,8 @@ def analyzeClustering_modularity(input_graph, n_clusters, clustering): if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) score = None num_verts = input_graph.number_of_vertices() @@ -191,7 +191,7 @@ def analyzeClustering_modularity(input_graph, n_clusters, clustering): if input_graph.adjlist.weights is None: raise Exception("analyze clustering modularity must be called on a graph with weights") if input_graph.adjlist.weights is not None: - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) @@ -237,14 +237,14 @@ def analyzeClustering_edge_cut(input_graph, n_clusters, clustering): if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) score = None num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) if input_graph.adjlist.weights is not None: - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) @@ -290,14 +290,14 @@ def analyzeClustering_ratio_cut(input_graph, n_clusters, clustering): if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) score = None num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) if input_graph.adjlist.weights is not None: - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) diff --git a/python/cugraph/community/subgraph_extraction.pxd b/python/cugraph/community/subgraph_extraction.pxd index 12cef73fad4..97a71056006 100644 --- a/python/cugraph/community/subgraph_extraction.pxd +++ b/python/cugraph/community/subgraph_extraction.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp.memory cimport unique_ptr diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx index 03593dafe03..553bcaa4bfc 100644 --- a/python/cugraph/community/subgraph_extraction_wrapper.pyx +++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.community.subgraph_extraction cimport extract_subgraph_vertex as c_extract_subgraph_vertex -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf @@ -38,13 +38,13 @@ def subgraph(input_graph, vertices): if not input_graph.edgelist: input_graph.view_edge_list() - [src, dst] = graph_new_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], input_graph.edgelist.edgelist_df['dst']], [np.int32]) if input_graph.edgelist.weights: - [weights] = graph_new_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['weights']], [np.float32, np.float64]) if weights.dtype == np.float64: use_float = False - + cdef GraphCOOView[int,int,float] in_graph_float cdef GraphCOOView[int,int,double] in_graph_double cdef unique_ptr[GraphCOO[int,int,float]] out_graph_float @@ -56,7 +56,7 @@ def subgraph(input_graph, vertices): if weights is not None: c_weights = weights.__cuda_array_interface__['data'][0] - + cdef uintptr_t c_vertices = vertices.__cuda_array_interface__['data'][0] num_verts = input_graph.number_of_vertices() @@ -77,5 +77,5 @@ def subgraph(input_graph, vertices): df = df.merge(vertices_df, left_on='src', right_on='index', how='left').drop(['src', 'index']).rename(columns={'v': 'src'}, copy=False) df = df.merge(vertices_df, left_on='dst', right_on='index', how='left').drop(['dst', 'index']).rename(columns={'v': 'dst'}, copy=False) - + return df diff --git a/python/cugraph/community/triangle_count.pxd b/python/cugraph/community/triangle_count.pxd index 6876d067f7a..70795a3f43a 100644 --- a/python/cugraph/community/triangle_count.pxd +++ b/python/cugraph/community/triangle_count.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uint64_t diff --git a/python/cugraph/community/triangle_count_wrapper.pyx b/python/cugraph/community/triangle_count_wrapper.pyx index f34f6a7a947..d7cabd4676f 100644 --- a/python/cugraph/community/triangle_count_wrapper.pyx +++ b/python/cugraph/community/triangle_count_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.community.triangle_count cimport triangle_count as c_triangle_count -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import numpy as np @@ -36,8 +36,8 @@ def triangles(input_graph): if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, - input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, + input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) @@ -49,5 +49,5 @@ def triangles(input_graph): graph = GraphCSRView[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) result = c_triangle_count(graph) - + return result diff --git a/python/cugraph/components/connectivity.pxd b/python/cugraph/components/connectivity.pxd index b2dc953e052..94fa165969d 100644 --- a/python/cugraph/components/connectivity.pxd +++ b/python/cugraph/components/connectivity.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": @@ -30,4 +30,3 @@ cdef extern from "algorithms.hpp" namespace "cugraph": const GraphCSRView[VT,ET,WT] &graph, cugraph_cc_t connect_type, VT *labels) except + - diff --git a/python/cugraph/components/connectivity_wrapper.pyx b/python/cugraph/components/connectivity_wrapper.pyx index a738ad0c9db..9f6fa353001 100644 --- a/python/cugraph/components/connectivity_wrapper.pyx +++ b/python/cugraph/components/connectivity_wrapper.pyx @@ -17,9 +17,9 @@ # cython: language_level = 3 from cugraph.components.connectivity cimport * -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from cugraph.structure import utils_wrapper -from cugraph.structure import graph_new_wrapper +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cugraph.structure.symmetrize import symmetrize from cugraph.structure.graph import Graph as type_Graph @@ -33,24 +33,24 @@ def weakly_connected_components(input_graph): """ offsets = None indices = None - + if type(input_graph) is not type_Graph: # # Need to create a symmetrized CSR for this local # computation, don't want to keep it. # - [src, dst] = graph_new_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], - input_graph.edgelist.edgelist_df['dst']], - [np.int32]) + [src, dst] = graph_primtypes_wrapper.datatype_cast([input_graph.edgelist.edgelist_df['src'], + input_graph.edgelist.edgelist_df['dst']], + [np.int32]) src, dst = symmetrize(src, dst) [offsets, indices] = utils_wrapper.coo2csr(src, dst)[0:2] else: if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, - input_graph.adjlist.indices], - [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, + input_graph.adjlist.indices], + [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) @@ -58,7 +58,7 @@ def weakly_connected_components(input_graph): df = cudf.DataFrame() df['vertices'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) df['labels'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - + cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] cdef uintptr_t c_identifier = df['vertices'].__cuda_array_interface__['data'][0]; @@ -83,7 +83,7 @@ def strongly_connected_components(input_graph): if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) @@ -91,7 +91,7 @@ def strongly_connected_components(input_graph): df = cudf.DataFrame() df['vertices'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) df['labels'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - + cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] cdef uintptr_t c_identifier = df['vertices'].__cuda_array_interface__['data'][0]; diff --git a/python/cugraph/cores/core_number.pxd b/python/cugraph/cores/core_number.pxd index f679ccf7800..cf28720a3e8 100644 --- a/python/cugraph/cores/core_number.pxd +++ b/python/cugraph/cores/core_number.pxd @@ -16,11 +16,10 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": cdef void core_number[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, VT *core_number) except + - diff --git a/python/cugraph/cores/core_number_wrapper.pyx b/python/cugraph/cores/core_number_wrapper.pyx index 0b8dc63c294..3df1df5f8e9 100644 --- a/python/cugraph/cores/core_number_wrapper.pyx +++ b/python/cugraph/cores/core_number_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 cimport cugraph.cores.core_number as c_core -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t import cudf @@ -33,7 +33,7 @@ def core_number(input_graph): if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) diff --git a/python/cugraph/cores/k_core.pxd b/python/cugraph/cores/k_core.pxd index 9b001494143..556dbc95ed9 100644 --- a/python/cugraph/cores/k_core.pxd +++ b/python/cugraph/cores/k_core.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": diff --git a/python/cugraph/cores/k_core_wrapper.pyx b/python/cugraph/cores/k_core_wrapper.pyx index 3083ffdf42e..51ecec09dc5 100644 --- a/python/cugraph/cores/k_core_wrapper.pyx +++ b/python/cugraph/cores/k_core_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.cores.k_core cimport k_core as c_k_core -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t from libc.float cimport FLT_MAX_EXP @@ -32,7 +32,7 @@ import numpy as np #### Ripple down through implementation (algorithms.hpp, core_number.cu) cdef (uintptr_t, uintptr_t) core_number_params(core_number): - [core_number['vertex'], core_number['values']] = graph_new_wrapper.datatype_cast([core_number['vertex'], core_number['values']], [np.int32]) + [core_number['vertex'], core_number['values']] = graph_primtypes_wrapper.datatype_cast([core_number['vertex'], core_number['values']], [np.int32]) cdef uintptr_t c_vertex = core_number['vertex'].__cuda_array_interface__['data'][0] cdef uintptr_t c_values = core_number['values'].__cuda_array_interface__['data'][0] return (c_vertex, c_values) @@ -54,7 +54,7 @@ def k_core(input_graph, k, core_number): """ Call k_core """ - if graph_new_wrapper.weight_type(input_graph) == np.float64: + if graph_primtypes_wrapper.weight_type(input_graph) == np.float64: return k_core_double(input_graph, k, core_number) else: return k_core_float(input_graph, k, core_number) diff --git a/python/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/dask/link_analysis/mg_pagerank.pxd index 4de9becf10d..429cb775e07 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank.pxd +++ b/python/cugraph/dask/link_analysis/mg_pagerank.pxd @@ -14,7 +14,7 @@ # limitations under the License. # -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index c5a72647e03..39b856e4946 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -17,8 +17,8 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.link_analysis cimport mg_pagerank as c_pagerank import cudf -from cugraph.structure.graph_new cimport * -import cugraph.structure.graph_new_wrapper as graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref @@ -41,12 +41,12 @@ def mg_pagerank(input_df, local_data, rank, handle, alpha=0.85, max_iter=100, to dst = dst - local_offset num_local_verts = local_data['verts'][rank] num_local_edges = len(src) - + cdef uintptr_t c_local_verts = local_data['verts'].__array_interface__['data'][0] cdef uintptr_t c_local_edges = local_data['edges'].__array_interface__['data'][0] cdef uintptr_t c_local_offsets = local_data['offsets'].__array_interface__['data'][0] - [src, dst] = graph_new_wrapper.datatype_cast([src, dst], [np.int32]) + [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) _offsets, indices, weights = coo2csr(dst, src, None) offsets = _offsets[:num_local_verts + 1] del _offsets @@ -56,11 +56,11 @@ def mg_pagerank(input_df, local_data, rank, handle, alpha=0.85, max_iter=100, to cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_pagerank_val = df['pagerank'].__cuda_array_interface__['data'][0]; - + cdef uintptr_t c_pers_vtx = NULL cdef uintptr_t c_pers_val = NULL cdef int sz = 0 - + cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] cdef uintptr_t c_weights = NULL diff --git a/python/cugraph/dask/structure/replication.pyx b/python/cugraph/dask/structure/replication.pyx index 7256fa63448..6d579e126bf 100644 --- a/python/cugraph/dask/structure/replication.pyx +++ b/python/cugraph/dask/structure/replication.pyx @@ -18,7 +18,7 @@ from libc.stdint cimport uintptr_t from cugraph.structure cimport utils as c_utils -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t import cudf diff --git a/python/cugraph/dask/traversal/mg_bfs.pxd b/python/cugraph/dask/traversal/mg_bfs.pxd index 8b9e8c1c81f..68010e2b816 100644 --- a/python/cugraph/dask/traversal/mg_bfs.pxd +++ b/python/cugraph/dask/traversal/mg_bfs.pxd @@ -14,7 +14,7 @@ # limitations under the License. # -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index 66a2668a41f..4c13aeb1286 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -17,8 +17,8 @@ from cugraph.structure.utils_wrapper import * from cugraph.dask.traversal cimport mg_bfs as c_bfs import cudf -from cugraph.structure.graph_new cimport * -import cugraph.structure.graph_new_wrapper as graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t def mg_bfs(input_df, local_data, rank, handle, start, result_len, return_distances=False): @@ -40,7 +40,7 @@ def mg_bfs(input_df, local_data, rank, handle, start, result_len, return_distanc num_local_edges = len(src) # Convert to local CSR - [src, dst] = graph_new_wrapper.datatype_cast([src, dst], [np.int32]) + [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) _offsets, indices, weights = coo2csr(src, dst, None) offsets = _offsets[:num_local_verts + 1] del _offsets diff --git a/python/cugraph/layout/force_atlas2.pxd b/python/cugraph/layout/force_atlas2.pxd index 3b1d64d31a1..a26abaa16c0 100644 --- a/python/cugraph/layout/force_atlas2.pxd +++ b/python/cugraph/layout/force_atlas2.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool cdef extern from "internals.hpp" namespace "cugraph::internals": diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx index 128e5f61f3c..31bf8fc029e 100644 --- a/python/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/layout/force_atlas2_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 from cugraph.layout.force_atlas2 cimport force_atlas2 as c_force_atlas2 -from cugraph.structure import graph_new_wrapper -from cugraph.structure.graph_new cimport * +from cugraph.structure import graph_primtypes_wrapper +from cugraph.structure.graph_primtypes cimport * from cugraph.structure import utils_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t diff --git a/python/cugraph/link_analysis/hits.pxd b/python/cugraph/link_analysis/hits.pxd index 2efa417655a..60d25fd3cdb 100644 --- a/python/cugraph/link_analysis/hits.pxd +++ b/python/cugraph/link_analysis/hits.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool diff --git a/python/cugraph/link_analysis/hits_wrapper.pyx b/python/cugraph/link_analysis/hits_wrapper.pyx index 5f52df63fe8..3e19e38a023 100644 --- a/python/cugraph/link_analysis/hits_wrapper.pyx +++ b/python/cugraph/link_analysis/hits_wrapper.pyx @@ -17,10 +17,10 @@ # cython: language_level = 3 from cugraph.link_analysis.hits cimport hits as c_hits -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t -from cugraph.structure import graph_new_wrapper +from cugraph.structure import graph_primtypes_wrapper import cudf import rmm import numpy as np @@ -38,7 +38,7 @@ def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) @@ -59,7 +59,7 @@ def hits(input_graph, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): cdef uintptr_t c_weights = NULL cdef GraphCSRView[int,int,float] graph_float - + graph_float = GraphCSRView[int,int,float](c_offsets, c_indices, c_weights, num_verts, num_edges) c_hits[int,int,float](graph_float, max_iter, tol, NULL, diff --git a/python/cugraph/link_analysis/pagerank.pxd b/python/cugraph/link_analysis/pagerank.pxd index e5ec22a5d35..df94b95d72e 100644 --- a/python/cugraph/link_analysis/pagerank.pxd +++ b/python/cugraph/link_analysis/pagerank.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 4b045264ead..9f4e555bbd9 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -18,10 +18,10 @@ #cimport cugraph.link_analysis.pagerank as c_pagerank from cugraph.link_analysis.pagerank cimport pagerank as c_pagerank -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t -from cugraph.structure import graph_new_wrapper +from cugraph.structure import graph_primtypes_wrapper import cudf import rmm import numpy as np @@ -39,8 +39,8 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. cdef unique_ptr[handle_t] handle_ptr handle_ptr.reset(new handle_t()) - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices], [np.int32]) - [weights] = graph_new_wrapper.datatype_cast([input_graph.transposedadjlist.weights], [np.float32, np.float64]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices], [np.int32]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.weights], [np.float32, np.float64]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) @@ -74,21 +74,21 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. cdef GraphCSCView[int,int,float] graph_float cdef GraphCSCView[int,int,double] graph_double - + if personalization is not None: sz = personalization['vertex'].shape[0] personalization['vertex'] = personalization['vertex'].astype(np.int32) personalization['values'] = personalization['values'].astype(df['pagerank'].dtype) c_pers_vtx = personalization['vertex'].__cuda_array_interface__['data'][0] c_pers_val = personalization['values'].__cuda_array_interface__['data'][0] - - if (df['pagerank'].dtype == np.float32): + + if (df['pagerank'].dtype == np.float32): graph_float = GraphCSCView[int,int,float](c_offsets, c_indices, c_weights, num_verts, num_edges) c_pagerank[int,int,float](handle_ptr.get()[0], graph_float, c_pagerank_val, sz, c_pers_vtx, c_pers_val, alpha, tol, max_iter, has_guess) graph_float.get_vertex_identifiers(c_identifier) - else: + else: graph_double = GraphCSCView[int,int,double](c_offsets, c_indices, c_weights, num_verts, num_edges) c_pagerank[int,int,double](handle_ptr.get()[0], graph_double, c_pagerank_val, sz, c_pers_vtx, c_pers_val, alpha, tol, max_iter, has_guess) diff --git a/python/cugraph/link_prediction/jaccard.pxd b/python/cugraph/link_prediction/jaccard.pxd index 4cb5a46fe53..bc55bb2cdf0 100644 --- a/python/cugraph/link_prediction/jaccard.pxd +++ b/python/cugraph/link_prediction/jaccard.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": @@ -25,7 +25,7 @@ cdef extern from "algorithms.hpp" namespace "cugraph": const GraphCSRView[VT,ET,WT] &graph, const WT *weights, WT *result) except + - + cdef void jaccard_list[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, const WT *weights, diff --git a/python/cugraph/link_prediction/jaccard_wrapper.pyx b/python/cugraph/link_prediction/jaccard_wrapper.pyx index 24e2ca429f5..cacd13dec65 100644 --- a/python/cugraph/link_prediction/jaccard_wrapper.pyx +++ b/python/cugraph/link_prediction/jaccard_wrapper.pyx @@ -18,8 +18,8 @@ from cugraph.link_prediction.jaccard cimport jaccard as c_jaccard from cugraph.link_prediction.jaccard cimport jaccard_list as c_jaccard_list -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython cimport floating @@ -35,21 +35,21 @@ def jaccard(input_graph, weights_arr=None, vertex_pair=None): indices = None if input_graph.adjlist: - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, - input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, + input_graph.adjlist.indices], [np.int32]) elif input_graph.transposedadjlist: # # NOTE: jaccard ONLY operates on an undirected graph, so CSR and CSC should be # equivalent. The undirected check has already happened, so we'll just use # the CSC as if it were CSR. # - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, - input_graph.transposedadjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, + input_graph.transposedadjlist.indices], [np.int32]) else: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, - input_graph.adjlist.indices], [np.int32]) - + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, + input_graph.adjlist.indices], [np.int32]) + num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) @@ -71,7 +71,7 @@ def jaccard(input_graph, weights_arr=None, vertex_pair=None): weight_type = np.float32 if weights_arr is not None: - [weights] = graph_new_wrapper.datatype_cast([weights_arr], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([weights_arr], [np.float32, np.float64]) c_weights = weights.__cuda_array_interface__['data'][0] weight_type = weights.dtype @@ -111,7 +111,7 @@ def jaccard(input_graph, weights_arr=None, vertex_pair=None): c_first_col, c_second_col, c_result_col) - + return df else: # error check performed in jaccard.py @@ -153,5 +153,5 @@ def jaccard(input_graph, weights_arr=None, vertex_pair=None): c_result_col) graph_double.get_source_indices(c_src_index_col) - + return df diff --git a/python/cugraph/link_prediction/overlap.pxd b/python/cugraph/link_prediction/overlap.pxd index 5f8c8ee8449..970032b56eb 100644 --- a/python/cugraph/link_prediction/overlap.pxd +++ b/python/cugraph/link_prediction/overlap.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": @@ -25,7 +25,7 @@ cdef extern from "algorithms.hpp" namespace "cugraph": const GraphCSRView[VT,ET,WT] &graph, const WT *weights, WT *result) except + - + cdef void overlap_list[VT,ET,WT]( const GraphCSRView[VT,ET,WT] &graph, const WT *weights, diff --git a/python/cugraph/link_prediction/overlap_wrapper.pyx b/python/cugraph/link_prediction/overlap_wrapper.pyx index 61b04d0d315..9e2f3ba49d7 100644 --- a/python/cugraph/link_prediction/overlap_wrapper.pyx +++ b/python/cugraph/link_prediction/overlap_wrapper.pyx @@ -18,8 +18,8 @@ from cugraph.link_prediction.overlap cimport overlap as c_overlap from cugraph.link_prediction.overlap cimport overlap_list as c_overlap_list -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython cimport floating @@ -35,14 +35,14 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): if not input_graph.adjlist: input_graph.view_adj_list() - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) first = None second = None - + cdef uintptr_t c_result_col = NULL cdef uintptr_t c_first_col = NULL cdef uintptr_t c_second_col = NULL @@ -58,7 +58,7 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): weight_type = np.float32 if weights_arr is not None: - [weights] = graph_new_wrapper.datatype_cast([weights_arr], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([weights_arr], [np.float32, np.float64]) c_weights = weights.__cuda_array_interface__['data'][0] weight_type = weights.dtype @@ -69,7 +69,7 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): df = cudf.DataFrame() df['overlap_coeff'] = result - + first = vertex_pair['first'] second = vertex_pair['second'] @@ -97,7 +97,7 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): c_first_col, c_second_col, c_result_col) - + return df else: # error check performed in overlap.py @@ -139,5 +139,5 @@ def overlap(input_graph, weights_arr=None, vertex_pair=None): c_result_col) graph_double.get_source_indices(c_src_index_col) - + return df diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index c918cd44ae2..9361477941e 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -11,7 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure import graph_new_wrapper +from cugraph.structure import graph_primtypes_wrapper from cugraph.structure.symmetrize import symmetrize from cugraph.structure.number_map import NumberMap from cugraph.dask.common.input_utils import get_local_data @@ -526,7 +526,7 @@ def view_edge_list(self): raise Exception("Graph has no Edgelist.") return self.edgelist.edgelist_df if self.edgelist is None: - src, dst, weights = graph_new_wrapper.view_edge_list(self) + src, dst, weights = graph_primtypes_wrapper.view_edge_list(self) self.edgelist = self.EdgeList(src, dst, weights) edgelist_df = self.edgelist.edgelist_df @@ -696,7 +696,7 @@ def view_adj_list(self): self.transposedadjlist.weights, ) else: - off, ind, vals = graph_new_wrapper.view_adj_list(self) + off, ind, vals = graph_primtypes_wrapper.view_adj_list(self) self.adjlist = self.AdjList(off, ind, vals) if self.batch_enabled: @@ -739,9 +739,8 @@ def view_transposed_adj_list(self): self.adjlist.weights, ) else: - off, ind, vals = graph_new_wrapper.view_transposed_adj_list( - self - ) + off, ind, vals = \ + graph_primtypes_wrapper.view_transposed_adj_list(self) self.transposedadjlist = self.transposedAdjList(off, ind, vals) if self.batch_enabled: @@ -776,7 +775,7 @@ def get_two_hop_neighbors(self): """ if self.distributed: raise Exception("Not supported for distributed graph") - df = graph_new_wrapper.get_two_hop_neighbors(self) + df = graph_primtypes_wrapper.get_two_hop_neighbors(self) if self.renumbered is True: df = self.unrenumber(df, "first") df = self.unrenumber(df, "second") @@ -1003,9 +1002,8 @@ def degrees(self, vertex_subset=None): """ if self.distributed: raise Exception("Not supported for distributed graph") - vertex_col, in_degree_col, out_degree_col = graph_new_wrapper._degrees( - self - ) + vertex_col, in_degree_col, out_degree_col = \ + graph_primtypes_wrapper._degrees(self) df = cudf.DataFrame() df["vertex"] = vertex_col @@ -1021,7 +1019,7 @@ def degrees(self, vertex_subset=None): return df def _degree(self, vertex_subset, x=0): - vertex_col, degree_col = graph_new_wrapper._degree(self, x) + vertex_col, degree_col = graph_primtypes_wrapper._degree(self, x) df = cudf.DataFrame() df["vertex"] = vertex_col df["degree"] = degree_col diff --git a/python/cugraph/structure/graph_new.pxd b/python/cugraph/structure/graph_new.pxd deleted file mode 100644 index 2343a0604dc..00000000000 --- a/python/cugraph/structure/graph_new.pxd +++ /dev/null @@ -1,192 +0,0 @@ -# Copyright (c) 2019-2020, NVIDIA CORPORATION. -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# cython: profile=False -# distutils: language = c++ -# cython: embedsignature = True -# cython: language_level = 3 - -from libcpp cimport bool -from libcpp.memory cimport unique_ptr - -from rmm._lib.device_buffer cimport device_buffer - -cdef extern from "raft/handle.hpp" namespace "raft": - cdef cppclass handle_t: - handle_t() except + - -cdef extern from "graph.hpp" namespace "cugraph": - - ctypedef enum PropType: - PROP_UNDEF "cugraph::PROP_UNDEF" - PROP_FALSE "cugraph::PROP_FALSE" - PROP_TRUE "cugraph::PROP_TRUE" - - ctypedef enum DegreeDirection: - DIRECTION_IN_PLUS_OUT "cugraph::DegreeDirection::IN_PLUS_OUT" - DIRECTION_IN "cugraph::DegreeDirection::IN" - DIRECTION_OUT "cugraph::DegreeDirection::OUT" - - struct GraphProperties: - bool directed - bool weighted - bool multigraph - bool bipartite - bool tree - PropType has_negative_edges - - cdef cppclass GraphViewBase[VT,ET,WT]: - WT *edge_data - handle_t *handle; - GraphProperties prop - VT number_of_vertices - ET number_of_edges - VT* local_vertices - ET* local_edges - VT* local_offsets - void set_handle(handle_t*) - void set_local_data(VT* local_vertices_, ET* local_edges_, VT* local_offsets_) - void get_vertex_identifiers(VT *) const - - GraphViewBase(WT*,VT,ET) - - cdef cppclass GraphCOOView[VT,ET,WT](GraphViewBase[VT,ET,WT]): - VT *src_indices - VT *dst_indices - - void degree(ET *,DegreeDirection) const - - GraphCOOView() - GraphCOOView(const VT *, const ET *, const WT *, size_t, size_t) - - cdef cppclass GraphCompressedSparseBaseView[VT,ET,WT](GraphViewBase[VT,ET,WT]): - ET *offsets - VT *indices - - void get_source_indices(VT *) const - void degree(ET *,DegreeDirection) const - - GraphCompressedSparseBaseView(const VT *, const ET *, const WT *, size_t, size_t) - - cdef cppclass GraphCSRView[VT,ET,WT](GraphCompressedSparseBaseView[VT,ET,WT]): - GraphCSRView() - GraphCSRView(const VT *, const ET *, const WT *, size_t, size_t) - - cdef cppclass GraphCSCView[VT,ET,WT](GraphCompressedSparseBaseView[VT,ET,WT]): - GraphCSCView() - GraphCSCView(const VT *, const ET *, const WT *, size_t, size_t) - - cdef cppclass GraphCOOContents[VT,ET,WT]: - VT number_of_vertices - ET number_of_edges - unique_ptr[device_buffer] src_indices - unique_ptr[device_buffer] dst_indices - unique_ptr[device_buffer] edge_data - - cdef cppclass GraphCOO[VT,ET,WT]: - GraphCOO( - VT nv, - ET ne, - bool has_data) except+ - GraphCOOContents[VT,ET,WT] release() - GraphCOOView[VT,ET,WT] view() - - cdef cppclass GraphSparseContents[VT,ET,WT]: - VT number_of_vertices - ET number_of_edges - unique_ptr[device_buffer] offsets - unique_ptr[device_buffer] indices - unique_ptr[device_buffer] edge_data - - cdef cppclass GraphCSC[VT,ET,WT]: - GraphCSC( - VT nv, - ET ne, - bool has_data) except+ - GraphSparseContents[VT,ET,WT] release() - GraphCSCView[VT,ET,WT] view() - - cdef cppclass GraphCSR[VT,ET,WT]: - GraphCSR( - VT nv, - ET ne, - bool has_data) except+ - GraphSparseContents[VT,ET,WT] release() - GraphCSRView[VT,ET,WT] view() - - - -cdef extern from "algorithms.hpp" namespace "cugraph": - - cdef unique_ptr[GraphCOO[VT, ET, WT]] get_two_hop_neighbors[VT,ET,WT]( - const GraphCSRView[VT, ET, WT] &graph) except + - -cdef extern from "functions.hpp" namespace "cugraph": - - cdef unique_ptr[device_buffer] renumber_vertices[VT_IN,VT_OUT,ET]( - ET number_of_edges, - const VT_IN *src, - const VT_IN *dst, - VT_OUT *src_renumbered, - VT_OUT *dst_renumbered, - ET *map_size) except + - - -cdef extern from "" namespace "std" nogil: - cdef unique_ptr[GraphCOO[int,int,float]] move(unique_ptr[GraphCOO[int,int,float]]) - cdef unique_ptr[GraphCOO[int,int,double]] move(unique_ptr[GraphCOO[int,int,double]]) - cdef GraphCOOContents[int,int,float] move(GraphCOOContents[int,int,float]) - cdef GraphCOOContents[int,int,double] move(GraphCOOContents[int,int,double]) - cdef device_buffer move(device_buffer) - cdef unique_ptr[device_buffer] move(unique_ptr[device_buffer]) - cdef unique_ptr[GraphCSR[int,int,float]] move(unique_ptr[GraphCSR[int,int,float]]) - cdef unique_ptr[GraphCSR[int,int,double]] move(unique_ptr[GraphCSR[int,int,double]]) - cdef GraphSparseContents[int,int,float] move(GraphSparseContents[int,int,float]) - cdef GraphSparseContents[int,int,double] move(GraphSparseContents[int,int,double]) - -ctypedef unique_ptr[GraphCOO[int,int,float]] GraphCOOPtrFloat -ctypedef unique_ptr[GraphCOO[int,int,double]] GraphCOOPtrDouble - -ctypedef fused GraphCOOPtrType: - GraphCOOPtrFloat - GraphCOOPtrDouble - -ctypedef unique_ptr[GraphCSR[int,int,float]] GraphCSRPtrFloat -ctypedef unique_ptr[GraphCSR[int,int,double]] GraphCSRPtrDouble - -ctypedef fused GraphCSRPtrType: - GraphCSRPtrFloat - GraphCSRPtrDouble - -ctypedef GraphCOOView[int,int,float] GraphCOOViewFloat -ctypedef GraphCOOView[int,int,double] GraphCOOViewDouble -ctypedef GraphCSRView[int,int,float] GraphCSRViewFloat -ctypedef GraphCSRView[int,int,double] GraphCSRViewDouble - -ctypedef fused GraphCOOViewType: - GraphCOOViewFloat - GraphCOOViewDouble - -ctypedef fused GraphCSRViewType: - GraphCSRViewFloat - GraphCSRViewDouble - -ctypedef fused GraphViewType: - GraphCOOViewFloat - GraphCOOViewDouble - GraphCSRViewFloat - GraphCSRViewDouble - -cdef coo_to_df(GraphCOOPtrType graph) -cdef csr_to_series(GraphCSRPtrType graph) -cdef GraphViewType get_graph_view(input_graph, bool weightless=*, GraphViewType* dummy=*) diff --git a/python/cugraph/structure/graph.pxd b/python/cugraph/structure/graph_primtypes.pxd similarity index 100% rename from python/cugraph/structure/graph.pxd rename to python/cugraph/structure/graph_primtypes.pxd diff --git a/python/cugraph/structure/graph_new.pyx b/python/cugraph/structure/graph_primtypes.pyx similarity index 100% rename from python/cugraph/structure/graph_new.pyx rename to python/cugraph/structure/graph_primtypes.pyx diff --git a/python/cugraph/structure/graph_new_wrapper.pyx b/python/cugraph/structure/graph_primtypes_wrapper.pyx similarity index 96% rename from python/cugraph/structure/graph_new_wrapper.pyx rename to python/cugraph/structure/graph_primtypes_wrapper.pyx index da596bc988f..7bc62b9a1af 100644 --- a/python/cugraph/structure/graph_new_wrapper.pyx +++ b/python/cugraph/structure/graph_primtypes_wrapper.pyx @@ -16,9 +16,9 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * -from cugraph.structure.graph_new cimport get_two_hop_neighbors as c_get_two_hop_neighbors -from cugraph.structure.graph_new cimport renumber_vertices as c_renumber_vertices +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure.graph_primtypes cimport get_two_hop_neighbors as c_get_two_hop_neighbors +from cugraph.structure.graph_primtypes cimport renumber_vertices as c_renumber_vertices from cugraph.structure.utils_wrapper import * from libcpp cimport bool from libc.stdint cimport uintptr_t @@ -47,7 +47,7 @@ def datatype_cast(cols, dtypes): def renumber(source_col, dest_col): num_edges = len(source_col) - + src_renumbered = cudf.Series(np.zeros(num_edges), dtype=np.int32) dst_renumbered = cudf.Series(np.zeros(num_edges), dtype=np.int32) @@ -74,15 +74,15 @@ def renumber(source_col, dest_col): c_src_renumbered, c_dst_renumbered, &map_size)) - - + + map = DeviceBuffer.c_from_unique_ptr(move(numbering_map)) map = Buffer(map) - + output_map = cudf.Series(data=map, dtype=source_col.dtype) return src_renumbered, dst_renumbered, output_map - + def view_adj_list(input_graph): @@ -213,7 +213,7 @@ def _degree_csr(offsets, indices, x=0): cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] graph = GraphCSRView[int,int,float](c_offsets, c_indices, NULL, num_verts, num_edges) - + graph.degree( c_degree, dir) graph.get_vertex_identifiers(c_vertex) @@ -224,7 +224,7 @@ def _degree(input_graph, x=0): transpose_x = { 0: 0, 2: 1, 1: 2 } - + if input_graph.adjlist is not None: return _degree_csr(input_graph.adjlist.offsets, input_graph.adjlist.indices, @@ -248,17 +248,17 @@ def _degree(input_graph, x=0): data.calculate_parts_to_sizes(comms) degree_ddf = [client.submit(_degree_coo, wf[1][0], 'src', 'dst', x, num_verts, comms.sessionId, workers=[wf[0]]) for idx, wf in enumerate(data.worker_to_parts.items())] wait(degree_ddf) - return degree_ddf[0].result() + return degree_ddf[0].result() return _degree_coo(input_graph.edgelist.edgelist_df, 'src', 'dst', x) - + raise Exception("input_graph not COO, CSR or CSC") - + def _degrees(input_graph): verts, indegrees = _degree(input_graph,1) verts, outdegrees = _degree(input_graph, 2) - + return verts, indegrees, outdegrees diff --git a/python/cugraph/structure/utils.pxd b/python/cugraph/structure/utils.pxd index 3f48e0fdd2d..0ec9c914347 100644 --- a/python/cugraph/structure/utils.pxd +++ b/python/cugraph/structure/utils.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp.memory cimport unique_ptr cdef extern from "raft/handle.hpp" namespace "raft": diff --git a/python/cugraph/structure/utils_wrapper.pyx b/python/cugraph/structure/utils_wrapper.pyx index a847f74d73c..00af5813056 100644 --- a/python/cugraph/structure/utils_wrapper.pyx +++ b/python/cugraph/structure/utils_wrapper.pyx @@ -18,7 +18,7 @@ from libc.stdint cimport uintptr_t from cugraph.structure cimport utils as c_utils -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libc.stdint cimport uintptr_t import cudf diff --git a/python/cugraph/traversal/bfs.pxd b/python/cugraph/traversal/bfs.pxd index ea9f3e4a0e4..0502754c161 100644 --- a/python/cugraph/traversal/bfs.pxd +++ b/python/cugraph/traversal/bfs.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx index dbbda90b17e..c13e1eb58ee 100644 --- a/python/cugraph/traversal/bfs_wrapper.pyx +++ b/python/cugraph/traversal/bfs_wrapper.pyx @@ -17,8 +17,8 @@ # cython: language_level = 3 cimport cugraph.traversal.bfs as c_bfs -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t from libc.float cimport FLT_MAX_EXP @@ -56,7 +56,7 @@ def bfs(input_graph, start, directed=True, # Step 3: Extract CSR offsets, indices, weights are not expected # - offsets: int (signed, 32-bit) # - indices: int (signed, 32-bit) - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) c_offsets_ptr = offsets.__cuda_array_interface__['data'][0] c_indices_ptr = indices.__cuda_array_interface__['data'][0] diff --git a/python/cugraph/traversal/sssp.pxd b/python/cugraph/traversal/sssp.pxd index 7067a5e983f..8f36ff12ae8 100644 --- a/python/cugraph/traversal/sssp.pxd +++ b/python/cugraph/traversal/sssp.pxd @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.structure.graph_new cimport * +from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": diff --git a/python/cugraph/traversal/sssp_wrapper.pyx b/python/cugraph/traversal/sssp_wrapper.pyx index ab844819291..1504eee53e1 100644 --- a/python/cugraph/traversal/sssp_wrapper.pyx +++ b/python/cugraph/traversal/sssp_wrapper.pyx @@ -18,8 +18,8 @@ cimport cugraph.traversal.sssp as c_sssp cimport cugraph.traversal.bfs as c_bfs -from cugraph.structure.graph_new cimport * -from cugraph.structure import graph_new_wrapper +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper from libcpp cimport bool from libc.stdint cimport uintptr_t @@ -60,8 +60,8 @@ def sssp(input_graph, source): # - indices: int (signed, 32-bit) # - weights: float / double # Extract data_type from weights (not None: float / double, None: signed int 32-bit) - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) c_offsets_ptr = offsets.__cuda_array_interface__['data'][0] c_indices_ptr = indices.__cuda_array_interface__['data'][0] From 1c3cefa0f532c92305f17f483eac3c105e3c307b Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 10 Sep 2020 01:50:42 -0500 Subject: [PATCH 20/74] WIP for calling MG Louvain from Cython wrapper, calling Dask correctly, etc. --- python/cugraph/dask/community/louvain.pxd | 9 ++- python/cugraph/dask/community/louvain.py | 57 ++++++++++++++++--- .../dask/community/louvain_wrapper.pyx | 16 +++--- 3 files changed, 62 insertions(+), 20 deletions(-) diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index 76a752e5700..79c0d556502 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -16,17 +16,16 @@ # cython: embedsignature = True # cython: language_level = 3 +from libcpp.pair cimport pair -from cugraph.structure.graph cimport * +from cugraph.structure.graph_primtypes cimport * -# FIXME: need header for MG louvain cdef extern from "algorithms.hpp" namespace "cugraph": - cdef void louvain[vertex_t,edge_t,weight_t]( + cdef pair[int,weight_t] louvain[vertex_t,edge_t,weight_t]( + const handle_t &handle, const GraphCSRView[vertex_t,edge_t,weight_t] &graph, - weight_t *final_modularity, - int *num_level, vertex_t *louvain_parts, int max_level, weight_t resolution) except + diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 1b9ea373efa..f00310af74c 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -11,10 +11,27 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.dask.community import louvain_wrapper +from dask.distributed import wait, default_client +import cudf +import cugraph.comms.comms as Comms +from cugraph.dask.common.input_utils import get_local_data -def louvain(input_graph, max_iter=100, resolution=1.): +from cugraph.dask.community import louvain_wrapper as c_mg_louvain + + +def call_louvain(sID, graph, max_iter, resolution): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return c_mg_louvain.louvain(graph, + wid, + handle, + start, + num_verts, + return_distances) + + +def louvain(graph, max_iter=100, resolution=1.): """ Compute the modularity optimizing partition of the input graph using the Louvain method on multiple GPUs @@ -40,16 +57,40 @@ def louvain(input_graph, max_iter=100, resolution=1.): # FIXME: dask methods to populate graphs from edgelists are only present on # DiGraph classes. Disable the Graph check for now and assume inputs are # symmetric DiGraphs. - # if type(input_graph) is not Graph: + # if type(graph) is not Graph: # raise Exception("input graph must be undirected") - parts, modularity_score = louvain_wrapper.louvain( - input_graph, max_iter, resolution - ) + client = default_client() + + if(graph.local_data is not None and + graph.local_data['by'] == 'src'): + data = graph.local_data['data'] + else: + data = get_local_data(graph, by='src', load_balance=load_balance) + + if graph.renumbered: + start = graph.lookup_internal_vertex_id(cudf.Series([start], + dtype='int32')).compute() + start = start.iloc[0] + + + result = dict([(data.worker_info[wf[0]]["rank"], + client.submit( + call_louvain, + Comms.get_session_id(), + wf[1], + data.local_data, + max_iter, + resolution, + workers=[wf[0]])) + for idx, wf in enumerate(data.worker_to_parts.items())]) + wait(result) + + (parts, modularity_score) = result[0].result() - if input_graph.renumbered: + if graph.renumbered: # MG renumbering is lazy, but it's safe to assume it's been called at # this point if renumbered=True - parts = input_graph.unrenumber(parts, "vertex") + parts = graph.unrenumber(parts, "vertex") return parts, modularity_score diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 9787eada435..79c2a72c707 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -16,8 +16,9 @@ # cython: embedsignature = True # cython: language_level = 3 -#from cugraph.dask.community.louvain cimport louvain as c_louvain -#from cugraph.structure.graph cimport * +from cugraph.dask.community.louvain cimport louvain as c_louvain +from cugraph.structure.graph_primtypes cimport * +from cugraph.structure import graph_primtypes_wrapper import cudf import numpy as np @@ -35,19 +36,20 @@ def louvain(input_graph, max_iter, resolution): weights = None final_modularity = None - # FIXME: this needs to go here to stop circular import - from cugraph.structure import graph_new_wrapper - - [offsets, indices] = graph_new_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() num_edges = input_graph.number_of_edges(directed_edges=True) if input_graph.adjlist.weights is not None: - [weights] = graph_new_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) + #### + # FIXME: call louvain as declared in louvain.pxd here + #### + # Create the output dataframe df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) From 3cadbbbf3cbe525d5ea716a988bb45fbae997b6e Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 10 Sep 2020 01:53:57 -0500 Subject: [PATCH 21/74] Added FIXME to test to describe an additional check that's needed --- python/cugraph/tests/dask/test_mg_louvain.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index aa15c8df17d..f07b3f78419 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -73,7 +73,7 @@ def daskGraphFromDataset(request, client_connection): delimiter=' ', names=['src', 'dst', 'value'], dtype=['int32', 'int32', 'float32']) - #dg = cugraph.proto.Graph() + dg = cugraph.DiGraph() dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') return dg @@ -86,3 +86,6 @@ def test_mg_louvain_with_edgevals(daskGraphFromDataset): # accepting. In the future, an MNMG symmeterize will need to be called to # create a Graph for Louvain. parts, mod = dcg.louvain(daskGraphFromDataset) + + # FIXME: either call Nx with the same dataset and compare results, or + # hadcode golden results to compare to. From e2b8a587af5a9d5691da8a4f6e953f9b2c45d847 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 10 Sep 2020 02:09:11 -0500 Subject: [PATCH 22/74] Temp fix for a circular import problem --- python/cugraph/dask/community/louvain_wrapper.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 79c2a72c707..b5049cff733 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -18,7 +18,6 @@ from cugraph.dask.community.louvain cimport louvain as c_louvain from cugraph.structure.graph_primtypes cimport * -from cugraph.structure import graph_primtypes_wrapper import cudf import numpy as np @@ -36,6 +35,9 @@ def louvain(input_graph, max_iter, resolution): weights = None final_modularity = None + # FIXME: This must be imported here to prevent a circular import + from cugraph.structure import graph_primtypes_wrapper + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) num_verts = input_graph.number_of_vertices() From 43ff9ad31bc06dd9e785564983de4c43a5990b03 Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 10 Sep 2020 13:23:06 -0400 Subject: [PATCH 23/74] respond to PR comments --- cpp/include/algorithms.hpp | 26 ++--- cpp/src/community/ECG.cu | 12 +- cpp/src/community/leiden.cu | 44 +++----- cpp/src/community/leiden.cuh | 10 +- cpp/src/community/louvain.cu | 44 +++----- cpp/src/community/louvain.cuh | 12 +- cpp/src/community/triangles_counting.cu | 4 +- cpp/tests/community/leiden_test.cpp | 2 +- cpp/tests/community/louvain_test.cu | 140 +----------------------- python/cugraph/community/leiden.pxd | 4 +- python/cugraph/community/louvain.pxd | 4 +- 11 files changed, 71 insertions(+), 231 deletions(-) diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index f87a8bdc0cb..457aebb27a1 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -638,11 +638,11 @@ void bfs(raft::handle_t const &handle, * */ template -std::pair louvain(raft::handle_t const &handle, - GraphCSRView const &graph, - vertex_t *louvain_parts, - int max_iter = 100, - weight_t resolution = weight_t{1}); +std::pair louvain(raft::handle_t const &handle, + GraphCSRView const &graph, + vertex_t *clustering, + size_t max_iter = 100, + weight_t resolution = weight_t{1}); /** * @brief Leiden implementation @@ -679,11 +679,11 @@ std::pair louvain(raft::handle_t const &handle, * 2) modularity of the returned clustering */ template -std::pair leiden(raft::handle_t const &handle, - GraphCSRView const &graph, - vertex_t *leiden_parts, - int max_iter = 100, - weight_t resolution = weight_t{1}); +std::pair leiden(raft::handle_t const &handle, + GraphCSRView const &graph, + vertex_t *clustering, + size_t max_iter = 100, + weight_t resolution = weight_t{1}); /** * @brief Computes the ecg clustering of the given graph. @@ -707,15 +707,15 @@ std::pair leiden(raft::handle_t const &handle, * @param[in] graph_csr input graph object (CSR) * @param[in] min_weight The minimum weight parameter * @param[in] ensemble_size The ensemble size parameter - * @param[out] ecg_parts A device pointer to array where the partitioning should be + * @param[out] clustering A device pointer to array where the partitioning should be * written */ template void ecg(raft::handle_t const &handle, - GraphCSRView const &graph_csr, + GraphCSRView const &graph, weight_t min_weight, vertex_t ensemble_size, - vertex_t *ecg_parts); + vertex_t *clustering); namespace triangle { diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ECG.cu index 9e185110650..9d67a159dd4 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ECG.cu @@ -112,10 +112,10 @@ void ecg(raft::handle_t const &handle, GraphCSRView const &graph, weight_t min_weight, vertex_t ensemble_size, - vertex_t *ecg_parts) + vertex_t *clustering) { CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); - CUGRAPH_EXPECTS(ecg_parts != nullptr, "Invalid API parameter: ecg_parts is NULL"); + CUGRAPH_EXPECTS(clustering != nullptr, "Invalid API parameter: clustering is NULL"); cudaStream_t stream{0}; @@ -143,7 +143,7 @@ void ecg(raft::handle_t const &handle, rmm::device_vector parts_v(size); vertex_t *d_parts = parts_v.data().get(); - cugraph::louvain(handle, permuted_graph->view(), d_parts, 1); + cugraph::louvain(handle, permuted_graph->view(), d_parts, size_t{1}); // For each edge in the graph determine whether the endpoints are in the same partition // Keep a sum for each edge of the total number of times its endpoints are in the same partition @@ -176,7 +176,7 @@ void ecg(raft::handle_t const &handle, louvain_graph.number_of_vertices = graph.number_of_vertices; louvain_graph.number_of_edges = graph.number_of_edges; - cugraph::louvain(handle, louvain_graph, ecg_parts, 100); + cugraph::louvain(handle, louvain_graph, clustering, size_t{100}); } // Explicit template instantiations. @@ -184,10 +184,10 @@ template void ecg(raft::handle_t const &, GraphCSRView const &graph, float min_weight, int32_t ensemble_size, - int32_t *ecg_parts); + int32_t *clustering); template void ecg(raft::handle_t const &, GraphCSRView const &graph, double min_weight, int32_t ensemble_size, - int32_t *ecg_parts); + int32_t *clustering); } // namespace cugraph diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 8ab21098b07..24ec8fd36ac 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -17,42 +17,30 @@ #include namespace cugraph { -namespace detail { template -std::pair leiden(raft::handle_t const &handle, - GraphCSRView const &graph, - vertex_t *leiden_parts, - int max_level, - weight_t resolution, - cudaStream_t stream) +std::pair leiden(raft::handle_t const &handle, + GraphCSRView const &graph, + vertex_t *clustering, + size_t max_level, + weight_t resolution) { CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, leiden expects a weighted graph"); - CUGRAPH_EXPECTS(leiden_parts != nullptr, "API error, leiden_parts is null"); + CUGRAPH_EXPECTS(clustering != nullptr, "API error, clustering is null"); - Leiden> runner(handle, graph, stream); + Leiden> runner(handle, graph); - return runner(leiden_parts, max_level, resolution); + return runner(clustering, max_level, resolution); } -} // namespace detail +// Explicit template instantations +template std::pair leiden( + raft::handle_t const &, GraphCSRView const &, int32_t *, size_t, float); -template -std::pair leiden(raft::handle_t const &handle, - GraphCSRView const &graph, - vertex_t *leiden_parts, - int max_level, - weight_t resolution) -{ - cudaStream_t stream{0}; - - return detail::leiden(handle, graph, leiden_parts, max_level, resolution, stream); -} - -template std::pair leiden( - raft::handle_t const &, GraphCSRView const &, int32_t *, int, float); - -template std::pair leiden( - raft::handle_t const &, GraphCSRView const &, int32_t *, int, double); +template std::pair leiden(raft::handle_t const &, + GraphCSRView const &, + int32_t *, + size_t, + double); } // namespace cugraph diff --git a/cpp/src/community/leiden.cuh b/cpp/src/community/leiden.cuh index b89dd25bc55..f2f84433284 100644 --- a/cpp/src/community/leiden.cuh +++ b/cpp/src/community/leiden.cuh @@ -27,8 +27,8 @@ class Leiden : public Louvain { using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Leiden(raft::handle_t const &handle, graph_type const &graph, cudaStream_t stream) - : Louvain(handle, graph, stream), constraint_v_(graph.number_of_vertices) + Leiden(raft::handle_t const &handle, graph_type const &graph) + : Louvain(handle, graph), constraint_v_(graph.number_of_vertices) { } @@ -97,9 +97,11 @@ class Leiden : public Louvain { return cur_Q; } - std::pair operator()(vertex_t *d_cluster_vec, int max_level, weight_t resolution) + std::pair operator()(vertex_t *d_cluster_vec, + size_t max_level, + weight_t resolution) { - int num_level{0}; + size_t num_level{0}; weight_t total_edge_weight = thrust::reduce(rmm::exec_policy(this->stream_)->on(this->stream_), this->weights_v_.begin(), diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 7da884daa92..b11f794194c 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -17,41 +17,29 @@ #include namespace cugraph { -namespace detail { template -std::pair louvain(raft::handle_t const &handle, - GraphCSRView const &graph, - vertex_t *louvain_parts, - int max_level, - weight_t resolution, - cudaStream_t stream) +std::pair louvain(raft::handle_t const &handle, + GraphCSRView const &graph, + vertex_t *clustering, + size_t max_level, + weight_t resolution) { CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); - CUGRAPH_EXPECTS(louvain_parts != nullptr, "API error, louvain_parts is null"); + CUGRAPH_EXPECTS(clustering != nullptr, "API error, clustering is null"); - Louvain> runner(handle, graph, stream); + Louvain> runner(handle, graph); - return runner(louvain_parts, max_level, resolution); + return runner(clustering, max_level, resolution); } -} // namespace detail - -template -std::pair louvain(raft::handle_t const &handle, - GraphCSRView const &graph, - vertex_t *louvain_parts, - int max_level, - weight_t resolution) -{ - cudaStream_t stream{0}; - - return detail::louvain(handle, graph, louvain_parts, max_level, resolution, stream); -} - -template std::pair louvain( - raft::handle_t const &, GraphCSRView const &, int32_t *, int, float); -template std::pair louvain( - raft::handle_t const &, GraphCSRView const &, int32_t *, int, double); +// Explicit template instantations +template std::pair louvain( + raft::handle_t const &, GraphCSRView const &, int32_t *, size_t, float); +template std::pair louvain(raft::handle_t const &, + GraphCSRView const &, + int32_t *, + size_t, + double); } // namespace cugraph diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index fa62b70474f..8cec3eccfe6 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -38,7 +38,7 @@ class Louvain { using edge_t = typename graph_type::edge_type; using weight_t = typename graph_type::weight_type; - Louvain(raft::handle_t const &handle, graph_type const &graph, cudaStream_t stream) + Louvain(raft::handle_t const &handle, graph_type const &graph) : #ifdef TIMING hr_timer_(), @@ -60,7 +60,7 @@ class Louvain { cluster_inverse_v_(graph.number_of_vertices), number_of_vertices_(graph.number_of_vertices), number_of_edges_(graph.number_of_edges), - stream_(stream) + stream_(handle.get_stream()) { } @@ -118,11 +118,11 @@ class Louvain { return Q; } - virtual std::pair operator()(vertex_t *d_cluster_vec, - int max_level, - weight_t resolution) + virtual std::pair operator()(vertex_t *d_cluster_vec, + size_t max_level, + weight_t resolution) { - int num_level{0}; + size_t num_level{0}; weight_t total_edge_weight = thrust::reduce(rmm::exec_policy(stream_)->on(stream_), weights_v_.begin(), weights_v_.end()); diff --git a/cpp/src/community/triangles_counting.cu b/cpp/src/community/triangles_counting.cu index 265083d6ef4..f6670365652 100644 --- a/cpp/src/community/triangles_counting.cu +++ b/cpp/src/community/triangles_counting.cu @@ -826,8 +826,8 @@ void TrianglesCount::count() else if (mean_deg < DEG_THR2) tcount_wrp(); else { - const int shMinBlkXSM{6}; - if (int64_t{m_shared_mem_per_block * 8} < int64_t{m_mat.N * shMinBlkXSM}) + const int shMinBlkXSM = 6; + if (size_t{m_shared_mem_per_block * 8 / shMinBlkXSM} < (size_t)m_mat.N) tcount_b2b(); else tcount_bsh(); diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index c8b14ebd8a1..2d4acf3765d 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -57,7 +57,7 @@ TEST(leiden_karate, success) offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); float modularity{0.0}; - int num_level = 40; + size_t num_level = 40; raft::handle_t handle; std::tie(num_level, modularity) = cugraph::leiden(handle, G, result_v.data().get()); diff --git a/cpp/tests/community/louvain_test.cu b/cpp/tests/community/louvain_test.cu index d3f59161336..20fa7b1d3d9 100644 --- a/cpp/tests/community/louvain_test.cu +++ b/cpp/tests/community/louvain_test.cu @@ -55,7 +55,7 @@ TEST(louvain, success) offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); float modularity{0.0}; - int num_level = 40; + size_t num_level = 40; raft::handle_t handle; @@ -72,142 +72,4 @@ TEST(louvain, success) ASSERT_GE(modularity, 0.402777 * 0.95); } -#if 0 -TEST(louvain_modularity, simple) -{ - std::vector off_h = {0, 1, 4, 7, 10, 11, 12}; - std::vector src_ind_h = {0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 5}; - std::vector ind_h = {1, 0, 2, 3, 1, 3, 4, 1, 2, 5, 2, 3}; - std::vector w_h = {1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; - std::vector v_weights_h = {1.0, 3.0, 3.0, 3.0, 1.0, 1.0}; - - // - // Initial cluster, everything on its own - // - std::vector cluster_h = {0, 1, 2, 3, 4, 5}; - std::vector cluster_weights_h = {1.0, 3.0, 3.0, 3.0, 1.0, 1.0}; - - std::vector cluster_hash_h = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - std::vector delta_Q_h = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; - std::vector tmp_size_V_h = {0.0, 0.0, 0.0, 0.0, 0.0, 0.0}; - - int num_verts = off_h.size() - 1; - int num_edges = ind_h.size(); - - float q{0.0}; - - rmm::device_vector offsets_v(off_h); - rmm::device_vector src_indices_v(src_ind_h); - rmm::device_vector indices_v(ind_h); - rmm::device_vector weights_v(w_h); - rmm::device_vector vertex_weights_v(v_weights_h); - rmm::device_vector cluster_v(cluster_h); - rmm::device_vector cluster_weights_v(cluster_weights_h); - rmm::device_vector cluster_hash_v(cluster_hash_h); - rmm::device_vector delta_Q_v(delta_Q_h); - rmm::device_vector tmp_size_V_v(tmp_size_V_h); - - cudaStream_t stream{0}; - - // - // Create graph - // - cugraph::GraphCSRView G( - offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); - - q = cugraph::detail::modularity(float{12}, float{1}, G, cluster_v.data().get(), stream); - - ASSERT_FLOAT_EQ(q, float{-30.0 / 144.0}); - - cugraph::detail::compute_delta_modularity(float{12}, - float{1}, - G, - src_indices_v, - vertex_weights_v, - cluster_weights_v, - cluster_v, - cluster_hash_v, - delta_Q_v, - tmp_size_V_v); - - CUDA_TRY(cudaMemcpy(cluster_hash_h.data(), - cluster_hash_v.data().get(), - sizeof(int) * num_edges, - cudaMemcpyDeviceToHost)); - CUDA_TRY(cudaMemcpy( - delta_Q_h.data(), delta_Q_v.data().get(), sizeof(float) * num_edges, cudaMemcpyDeviceToHost)); - - ASSERT_EQ(cluster_hash_h[0], 1); - ASSERT_EQ(cluster_hash_h[10], 2); - ASSERT_EQ(cluster_hash_h[11], 3); - ASSERT_FLOAT_EQ(delta_Q_h[0], float{1.0 / 8.0}); - ASSERT_FLOAT_EQ(delta_Q_h[10], float{1.0 / 8.0}); - ASSERT_FLOAT_EQ(delta_Q_h[11], float{1.0 / 8.0}); - - // - // Move vertex 0 into cluster 1 - // - cluster_h[0] = 1; - cluster_weights_h[0] = 0.0; - cluster_weights_h[1] = 4.0; - - CUDA_TRY(cudaMemcpy( - cluster_v.data().get(), cluster_h.data(), sizeof(int) * num_verts, cudaMemcpyHostToDevice)); - CUDA_TRY(cudaMemcpy(cluster_weights_v.data().get(), - cluster_weights_h.data(), - sizeof(float) * num_verts, - cudaMemcpyHostToDevice)); - - q = cugraph::detail::modularity(float{12}, float{1}, G, cluster_v.data().get()); - - ASSERT_FLOAT_EQ(q, float{-12.0 / 144.0}); - - cugraph::detail::compute_delta_modularity(float{12}, - float{1}, - G, - src_indices_v, - vertex_weights_v, - cluster_weights_v, - cluster_v, - cluster_hash_v, - delta_Q_v, - tmp_size_V_v); - - CUDA_TRY(cudaMemcpy(cluster_hash_h.data(), - cluster_hash_v.data().get(), - sizeof(int) * num_edges, - cudaMemcpyDeviceToHost)); - CUDA_TRY(cudaMemcpy( - delta_Q_h.data(), delta_Q_v.data().get(), sizeof(float) * num_edges, cudaMemcpyDeviceToHost)); - - ASSERT_EQ(cluster_hash_h[10], 2); - ASSERT_EQ(cluster_hash_h[11], 3); - ASSERT_FLOAT_EQ(delta_Q_h[10], float{1.0 / 8.0}); - ASSERT_FLOAT_EQ(delta_Q_h[11], float{1.0 / 8.0}); - - // - // Move vertex 1 into cluster 2. Not the optimal, in fact it will reduce - // modularity (so Louvain would never do this), but let's see if it reduces - // by the expected amount (-12/144). - // - ASSERT_EQ(cluster_hash_h[3], 2); - ASSERT_FLOAT_EQ(delta_Q_h[3], float{-12.0 / 144.0}); - - cluster_h[1] = 2; - cluster_weights_h[1] = 1.0; - cluster_weights_h[2] = 6.0; - - CUDA_TRY(cudaMemcpy( - cluster_v.data().get(), cluster_h.data(), sizeof(int) * num_verts, cudaMemcpyHostToDevice)); - CUDA_TRY(cudaMemcpy(cluster_weights_v.data().get(), - cluster_weights_h.data(), - sizeof(float) * num_verts, - cudaMemcpyHostToDevice)); - - q = cugraph::detail::modularity(float{12}, float{1}, G, cluster_v.data().get()); - - ASSERT_FLOAT_EQ(q, float{-24.0 / 144.0}); -} -#endif - CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/python/cugraph/community/leiden.pxd b/python/cugraph/community/leiden.pxd index 9238d845605..48b9d2b738a 100644 --- a/python/cugraph/community/leiden.pxd +++ b/python/cugraph/community/leiden.pxd @@ -22,9 +22,9 @@ from cugraph.structure.graph_new cimport * cdef extern from "algorithms.hpp" namespace "cugraph": - cdef pair[int, weight_t] leiden[vertex_t,edge_t,weight_t]( + cdef pair[size_t, weight_t] leiden[vertex_t,edge_t,weight_t]( const handle_t &handle, const GraphCSRView[vertex_t,edge_t,weight_t] &graph, vertex_t *leiden_parts, - int max_level, + size_t max_level, weight_t resolution) except + diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index bccd42d5501..a40bf8850ce 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -22,9 +22,9 @@ from cugraph.structure.graph_new cimport * cdef extern from "algorithms.hpp" namespace "cugraph": - cdef pair[int, weight_t] louvain[vertex_t,edge_t,weight_t]( + cdef pair[size_t, weight_t] louvain[vertex_t,edge_t,weight_t]( const handle_t &handle, const GraphCSRView[vertex_t,edge_t,weight_t] &graph, vertex_t *louvain_parts, - int max_level, + size_t max_level, weight_t resolution) except + From 3e41bfaa65915190e73bc9572ce064c16c605b9d Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 10 Sep 2020 13:26:19 -0400 Subject: [PATCH 24/74] update error message prefix --- cpp/src/community/ECG.cu | 2 +- cpp/src/community/leiden.cu | 4 ++-- cpp/src/community/louvain.cu | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/cpp/src/community/ECG.cu b/cpp/src/community/ECG.cu index 9d67a159dd4..ce7e9dd1ad2 100644 --- a/cpp/src/community/ECG.cu +++ b/cpp/src/community/ECG.cu @@ -115,7 +115,7 @@ void ecg(raft::handle_t const &handle, vertex_t *clustering) { CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "Invalid API parameter: clustering is NULL"); + CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is NULL"); cudaStream_t stream{0}; diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index 24ec8fd36ac..d53e88413e4 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -25,8 +25,8 @@ std::pair leiden(raft::handle_t const &handle, size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, leiden expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "API error, clustering is null"); + CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: leiden expects a weighted graph"); + CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); Leiden> runner(handle, graph); diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index b11f794194c..3f9d4753315 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -25,8 +25,8 @@ std::pair louvain(raft::handle_t const &handle, size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "API error, louvain expects a weighted graph"); - CUGRAPH_EXPECTS(clustering != nullptr, "API error, clustering is null"); + CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); + CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); Louvain> runner(handle, graph); From 175be02a1aa0a124c622a54d84a4041606064d02 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 10 Sep 2020 12:27:34 -0500 Subject: [PATCH 25/74] Further updates to Louvain MG cython/wrapper code. No longer builds at the moment since it's not calling the Louvain C++ API which has not been merged yet. --- python/cugraph/dask/community/louvain.pxd | 13 ++-- python/cugraph/dask/community/louvain.py | 42 ++++++------ .../dask/community/louvain_wrapper.pyx | 67 +++++++++++++++---- 3 files changed, 83 insertions(+), 39 deletions(-) diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index 79c0d556502..4b1b49a5a04 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -23,9 +23,10 @@ from cugraph.structure.graph_primtypes cimport * cdef extern from "algorithms.hpp" namespace "cugraph": - cdef pair[int,weight_t] louvain[vertex_t,edge_t,weight_t]( - const handle_t &handle, - const GraphCSRView[vertex_t,edge_t,weight_t] &graph, - vertex_t *louvain_parts, - int max_level, - weight_t resolution) except + + cdef pair[int, weight_t] \ + louvain[vertex_t, edge_t, weight_t]( + const handle_t &handle, + const GraphCSRView[vertex_t,edge_t,weight_t] &graph, + vertex_t *louvain_parts, + int max_level, + weight_t resolution) except + diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index f00310af74c..eccb7510a20 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -20,10 +20,11 @@ from cugraph.dask.community import louvain_wrapper as c_mg_louvain -def call_louvain(sID, graph, max_iter, resolution): +def call_louvain(sID, data, local_data, max_iter, resolution): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) - return c_mg_louvain.louvain(graph, + return c_mg_louvain.louvain(data[0], + local_data, wid, handle, start, @@ -31,7 +32,7 @@ def call_louvain(sID, graph, max_iter, resolution): return_distances) -def louvain(graph, max_iter=100, resolution=1.): +def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): """ Compute the modularity optimizing partition of the input graph using the Louvain method on multiple GPUs @@ -50,6 +51,8 @@ def louvain(graph, max_iter=100, resolution=1.): edge_attr='value') >>> parts, modularity_score = dcg.louvain(dg) """ + # FIXME: finish docstring: describe parameters, etc. + # FIXME: import here to prevent circular import: cugraph->louvain # wrapper->cugraph/structure->cugraph/dask->dask/louvain->cugraph/structure # from cugraph.structure.graph import Graph @@ -62,35 +65,34 @@ def louvain(graph, max_iter=100, resolution=1.): client = default_client() - if(graph.local_data is not None and - graph.local_data['by'] == 'src'): - data = graph.local_data['data'] + if(input_graph.local_data is not None and + input_graph.local_data['by'] == 'src'): + data = input_graph.local_data['data'] else: - data = get_local_data(graph, by='src', load_balance=load_balance) + data = get_local_data(input_graph, by='src', load_balance=load_balance) - if graph.renumbered: - start = graph.lookup_internal_vertex_id(cudf.Series([start], + if input_graph.renumbered: + start = input_graph.lookup_internal_vertex_id(cudf.Series([start], dtype='int32')).compute() start = start.iloc[0] - result = dict([(data.worker_info[wf[0]]["rank"], client.submit( - call_louvain, - Comms.get_session_id(), - wf[1], - data.local_data, - max_iter, - resolution, - workers=[wf[0]])) - for idx, wf in enumerate(data.worker_to_parts.items())]) + call_louvain, + Comms.get_session_id(), + wf[1], + data.local_data, + max_iter, + resolution, + workers=[wf[0]])) + for idx, wf in enumerate(data.worker_to_parts.items())]) wait(result) (parts, modularity_score) = result[0].result() - if graph.renumbered: + if input_graph.renumbered: # MG renumbering is lazy, but it's safe to assume it's been called at # this point if renumbered=True - parts = graph.unrenumber(parts, "vertex") + parts = input_graph.unrenumber(parts, "vertex") return parts, modularity_score diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index b5049cff733..c807f5ccbca 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -16,21 +16,28 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.dask.community.louvain cimport louvain as c_louvain +from libc.stdint cimport uintptr_t +from libcpp.pair cimport pair + +from cugraph.dask.community cimport louvain as c_louvain from cugraph.structure.graph_primtypes cimport * import cudf import numpy as np -def louvain(input_graph, max_iter, resolution): +def louvain(input_df, local_data, wid, handle, max_level, resolution): """ Call MG Louvain """ + + cdef size_t handle_size_t = handle.getHandle() + handle_ = handle_size_t + # FIXME: view_adj_list() is not supported for a distributed graph but should # still be done? - # if not input_graph.adjlist: - # input_graph.view_adj_list() + # if not input_df.adjlist: + # input_df.view_adj_list() weights = None final_modularity = None @@ -38,24 +45,58 @@ def louvain(input_graph, max_iter, resolution): # FIXME: This must be imported here to prevent a circular import from cugraph.structure import graph_primtypes_wrapper - [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.offsets, input_graph.adjlist.indices], [np.int32]) + [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_df.adjlist.offsets, input_df.adjlist.indices], [np.int32]) - num_verts = input_graph.number_of_vertices() - num_edges = input_graph.number_of_edges(directed_edges=True) + num_verts = input_df.number_of_vertices() + num_edges = input_df.number_of_edges(directed_edges=True) - if input_graph.adjlist.weights is not None: - [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.adjlist.weights], [np.float32, np.float64]) + # FIXME: assuming adjlist is not present because of view_adj_list() FIXME above. + #if input_df.adjlist.weights is not None: + if input_df.adjlist and input_df.adjlist.weights is not None: + [weights] = graph_primtypes_wrapper.datatype_cast([input_df.adjlist.weights], [np.float32, np.float64]) else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) - #### - # FIXME: call louvain as declared in louvain.pxd here - #### - # Create the output dataframe df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) df['partition'] = cudf.Series(np.zeros(num_verts,dtype=np.int32)) + cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] + cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0] + cdef uintptr_t c_partition = df['partition'].__cuda_array_interface__['data'][0] + cdef uintptr_t c_weights = weights.__cuda_array_interface__['data'][0] + + cdef GraphCSRView[int,int,float] graph_float + cdef GraphCSRView[int,int,double] graph_double + + # FIXME: figure out parts + cdef uintptr_t parts = 0 + + cdef float final_modularity_float = 1.0 + cdef double final_modularity_double = 1.0 + cdef int num_level = 0 + + cdef pair[int,float] resultpair_float + cdef pair[int,double] resultpair_double + + if weights.dtype == np.float32: + graph_float = GraphCSRView[int,int,float](c_offsets, c_indices, + c_weights, num_verts, num_edges) + + graph_float.get_vertex_identifiers(c_identifier) + resultpair_float = c_louvain.louvain[int,int,float](handle_[0], graph_float, parts, max_level, resolution) + + final_modularity = resultpair_float.second + + else: + graph_double = GraphCSRView[int,int,double](c_offsets, c_indices, + c_weights, num_verts, num_edges) + + graph_double.get_vertex_identifiers(c_identifier) + resultpair_double = c_louvain.louvain[int,int,double](handle_[0], graph_double, parts, max_level, resolution) + + final_modularity = resultpair_double.second return df, final_modularity From 6ac279bf094ada405e86d6263e2f9ddeaabf2a7b Mon Sep 17 00:00:00 2001 From: Chuck Hastings Date: Thu, 10 Sep 2020 13:30:06 -0400 Subject: [PATCH 26/74] fix clang errors --- cpp/src/community/leiden.cu | 3 ++- cpp/src/community/louvain.cu | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/community/leiden.cu b/cpp/src/community/leiden.cu index d53e88413e4..9e5a847cdf0 100644 --- a/cpp/src/community/leiden.cu +++ b/cpp/src/community/leiden.cu @@ -25,7 +25,8 @@ std::pair leiden(raft::handle_t const &handle, size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: leiden expects a weighted graph"); + CUGRAPH_EXPECTS(graph.edge_data != nullptr, + "Invalid input argument: leiden expects a weighted graph"); CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); Leiden> runner(handle, graph); diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 3f9d4753315..1f193e9a2f9 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -25,7 +25,8 @@ std::pair louvain(raft::handle_t const &handle, size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(graph.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); + CUGRAPH_EXPECTS(graph.edge_data != nullptr, + "Invalid input argument: louvain expects a weighted graph"); CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); Louvain> runner(handle, graph); From 7714425f4bd62bb81528256b124cb12cdb674f5d Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Thu, 10 Sep 2020 16:04:59 -0400 Subject: [PATCH 27/74] fixed doc error of target than destination --- python/cugraph/structure/convert_matrix.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py index 0266a158bb1..015ae54dee5 100644 --- a/python/cugraph/structure/convert_matrix.py +++ b/python/cugraph/structure/convert_matrix.py @@ -33,7 +33,7 @@ def from_cudf_edgelist(df, source='source', destination='destination', (optional) weights. source : string or integer This is used to index the source column. - target : string or integer + destination : string or integer This is used to index the destination (or target following NetworkX's terminology) column. weight : string or integer, optional From 4f5290342b7947534f5b8ad379f6ab868d05dc09 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Thu, 10 Sep 2020 16:24:42 -0400 Subject: [PATCH 28/74] doc updates --- python/cugraph/structure/convert_matrix.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py index 015ae54dee5..1ef30cde583 100644 --- a/python/cugraph/structure/convert_matrix.py +++ b/python/cugraph/structure/convert_matrix.py @@ -23,7 +23,8 @@ def from_cudf_edgelist(df, source='source', destination='destination', """ Return a new graph created from the edge list representaion. This function is added for NetworkX compatibility (this function is a RAPIDS version of - NetworkX's from_pandas_edge_list()). + NetworkX's from_pandas_edge_list()). This function does not support multiple + source or destination columns. But does support renumbering Parameters ---------- @@ -36,9 +37,14 @@ def from_cudf_edgelist(df, source='source', destination='destination', destination : string or integer This is used to index the destination (or target following NetworkX's terminology) column. - weight : string or integer, optional + edge_attr : string or integer, optional This pointer can be ``None``. If not, this is used to index the weight column. + create_using : cuGraph.Graph + Specify the type of Graph to create. Default is cugraph.Graph + renumber : bool + If source and destination indices are not in range 0 to V where V + is number of vertices, renumber argument should be True. Examples -------- From 4f67ecec4edfb6c2c5c5f850c3490fc79a71bcf5 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 11 Sep 2020 15:50:33 -0500 Subject: [PATCH 29/74] Updated to properly call new Louvain C++ API from cython wrapper, test seems to be running and returning expected results now (based on comparison to non-dask SG test). --- python/cugraph/dask/community/louvain.py | 12 +-- .../dask/community/louvain_wrapper.pyx | 86 +++++++++++-------- python/cugraph/tests/dask/test_mg_louvain.py | 6 +- 3 files changed, 59 insertions(+), 45 deletions(-) diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index eccb7510a20..73cc79a867e 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -20,16 +20,15 @@ from cugraph.dask.community import louvain_wrapper as c_mg_louvain -def call_louvain(sID, data, local_data, max_iter, resolution): +def call_louvain(sID, data, local_data, max_level, resolution): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) return c_mg_louvain.louvain(data[0], local_data, wid, handle, - start, - num_verts, - return_distances) + max_level, + resolution) def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): @@ -71,11 +70,6 @@ def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): else: data = get_local_data(input_graph, by='src', load_balance=load_balance) - if input_graph.renumbered: - start = input_graph.lookup_internal_vertex_id(cudf.Series([start], - dtype='int32')).compute() - start = start.iloc[0] - result = dict([(data.worker_info[wf[0]]["rank"], client.submit( call_louvain, diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index c807f5ccbca..7cf72e20fba 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -26,77 +26,93 @@ import cudf import numpy as np -def louvain(input_df, local_data, wid, handle, max_level, resolution): +def louvain(input_df, local_data, rank, handle, max_level, resolution): """ Call MG Louvain """ + # FIXME: This must be imported here to prevent a circular import + from cugraph.structure import graph_primtypes_wrapper cdef size_t handle_size_t = handle.getHandle() handle_ = handle_size_t - # FIXME: view_adj_list() is not supported for a distributed graph but should - # still be done? - # if not input_df.adjlist: - # input_df.view_adj_list() - - weights = None final_modularity = None - # FIXME: This must be imported here to prevent a circular import - from cugraph.structure import graph_primtypes_wrapper - - [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_df.adjlist.offsets, input_df.adjlist.indices], [np.int32]) - - num_verts = input_df.number_of_vertices() - num_edges = input_df.number_of_edges(directed_edges=True) - - # FIXME: assuming adjlist is not present because of view_adj_list() FIXME above. - #if input_df.adjlist.weights is not None: - if input_df.adjlist and input_df.adjlist.weights is not None: - [weights] = graph_primtypes_wrapper.datatype_cast([input_df.adjlist.weights], [np.float32, np.float64]) + src = input_df['src'] + dst = input_df['dst'] + if "value" in input_df.columns: + weights = input_df['value'] + else: + weights = None + + num_verts = local_data['verts'].sum() + num_edges = local_data['edges'].sum() + + local_offset = local_data['offsets'][rank] + dst = dst - local_offset + num_local_verts = local_data['verts'][rank] + num_local_edges = len(src) + + cdef uintptr_t c_local_verts = local_data['verts'].__array_interface__['data'][0] + cdef uintptr_t c_local_edges = local_data['edges'].__array_interface__['data'][0] + cdef uintptr_t c_local_offsets = local_data['offsets'].__array_interface__['data'][0] + + [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) + if weights is not None: + if weights.dtype == np.float32: + [weights] = graph_primtypes_wrapper.datatype_cast([weights], [np.float32]) + elif weights.dtype == np.double: + [weights] = graph_primtypes_wrapper.datatype_cast([weights], [np.double]) + else: + raise TypeError(f"unsupported type {weights.dtype} for weights") + + _offsets, indices, weights = graph_primtypes_wrapper.coo2csr(dst, src, weights) else: - weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) + _offsets, indices, weights = graph_primtypes_wrapper.coo2csr(dst, src, None) + + offsets = _offsets[:num_local_verts + 1] + del _offsets # Create the output dataframe df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - df['partition'] = cudf.Series(np.zeros(num_verts,dtype=np.int32)) + df['partition'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] + cdef uintptr_t c_weights = NULL + if weights is not None: + c_weights = weights.__cuda_array_interface__['data'][0] cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0] cdef uintptr_t c_partition = df['partition'].__cuda_array_interface__['data'][0] - cdef uintptr_t c_weights = weights.__cuda_array_interface__['data'][0] cdef GraphCSRView[int,int,float] graph_float cdef GraphCSRView[int,int,double] graph_double - # FIXME: figure out parts - cdef uintptr_t parts = 0 - cdef float final_modularity_float = 1.0 cdef double final_modularity_double = 1.0 cdef int num_level = 0 - cdef pair[int,float] resultpair_float - cdef pair[int,double] resultpair_double - if weights.dtype == np.float32: graph_float = GraphCSRView[int,int,float](c_offsets, c_indices, - c_weights, num_verts, num_edges) - + c_weights, num_verts, num_local_edges) + graph_float.set_local_data(c_local_verts, c_local_edges, c_local_offsets) + graph_float.set_handle(handle_) + num_level, final_modularity_float = \ + c_louvain.louvain[int,int,float](handle_[0], graph_float, c_partition, max_level, resolution) graph_float.get_vertex_identifiers(c_identifier) - resultpair_float = c_louvain.louvain[int,int,float](handle_[0], graph_float, parts, max_level, resolution) - final_modularity = resultpair_float.second + final_modularity = final_modularity_float else: graph_double = GraphCSRView[int,int,double](c_offsets, c_indices, c_weights, num_verts, num_edges) - + graph_double.set_local_data(c_local_verts, c_local_edges, c_local_offsets) + graph_double.set_handle(handle_) + num_level, final_modularity_double = \ + c_louvain.louvain[int,int,double](handle_[0], graph_double, c_partition, max_level, resolution) graph_double.get_vertex_identifiers(c_identifier) - resultpair_double = c_louvain.louvain[int,int,double](handle_[0], graph_double, parts, max_level, resolution) - final_modularity = resultpair_double.second + final_modularity = final_modularity_double return df, final_modularity diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index f07b3f78419..0a4fe3b6661 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -58,7 +58,7 @@ def client_connection(): @pytest.fixture(scope="module", - params=utils.DATASETS) + params=utils.DATASETS_UNDIRECTED) def daskGraphFromDataset(request, client_connection): """ Returns a new dask dataframe created from the dataset file param. @@ -89,3 +89,7 @@ def test_mg_louvain_with_edgevals(daskGraphFromDataset): # FIXME: either call Nx with the same dataset and compare results, or # hadcode golden results to compare to. + print() + print(parts.compute()) + print(mod) + print() From 35e1524212e5e04660028c9633cac751653e85b1 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Mon, 14 Sep 2020 12:38:43 -0500 Subject: [PATCH 30/74] Added C++ graph container factory function and changed Louvain wrappers to call it. --- cpp/CMakeLists.txt | 1 + cpp/include/utilities/cython.hpp | 70 ++++++++++ cpp/src/utilities/cython.cpp | 122 ++++++++++++++++++ python/cugraph/dask/community/louvain.pxd | 18 +-- python/cugraph/dask/community/louvain.py | 1 - .../dask/community/louvain_wrapper.pyx | 49 +++---- python/cugraph/structure/graph_primtypes.pxd | 21 +++ python/cugraph/tests/dask/test_mg_louvain.py | 8 +- 8 files changed, 244 insertions(+), 46 deletions(-) create mode 100644 cpp/include/utilities/cython.hpp create mode 100644 cpp/src/utilities/cython.cpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3c1d0de4420..c867f286ae3 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -311,6 +311,7 @@ add_library(cugraph SHARED src/db/db_parser_integration_test.cu src/db/db_operators.cu src/utilities/spmv_1D.cu + src/utilities/cython.cpp src/structure/graph.cu src/link_analysis/pagerank.cu src/link_analysis/pagerank_1D.cu diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp new file mode 100644 index 00000000000..5d06a2dcca5 --- /dev/null +++ b/cpp/include/utilities/cython.hpp @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +// #include +#include + +namespace cugraph { +namespace cython { + +// FIXME: use std::variant instead of a union if possible +// FIXME: add both CSRView and graph_type_t objects for easier testing during +// the transition +union graphUnion { + graphUnion() {} + GraphCSRView GraphCSRViewFloat; + GraphCSRView GraphCSRViewDouble; +}; + +enum weightTypeEnum { floatType = 0, doubleType = 1 }; + +// FIXME: Add comments describing this struct, where it's used, etc. +struct graph_container_t { + graph_container_t() {} + graphUnion graph; + // FIXME: cython issues using an enum so just using an int for now. + // weightTypeEnum wType; + int wType; +}; + +// Factory function for creating graph containers from basic types +// FIXME: This should accept void* for offsets and indices as well and take a +// dtype directly for each instead of the enum/int. +graph_container_t create_graph_t(raft::handle_t const& handle, + int* offsets, + int* indices, + void* weights, + int weightType, + int num_vertices, + int num_edges, + int* local_vertices, + int* local_edges, + int* local_offsets, + bool transposed, + bool multi_gpu); + +// Wrapper for calling Louvain using a graph container +template +weight_t call_louvain(raft::handle_t const& handle, + graph_container_t graph_container, + int* parts, + size_t max_level, + weight_t resolution); + +} // namespace cython +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp new file mode 100644 index 00000000000..6baf7c3fb9c --- /dev/null +++ b/cpp/src/utilities/cython.cpp @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include + +#include + +namespace cugraph { +namespace cython { + +// Factory function for creating graph containers from basic types +// FIXME: This should accept void* for offsets and indices as well and take a +// dtype directly for each instead of the enum/int. +graph_container_t create_graph_t(raft::handle_t const& handle, + int* offsets, + int* indices, + void* weights, + int weightType, + int num_vertices, + int num_edges, + int* local_vertices, + int* local_edges, + int* local_offsets, + bool transposed, + bool multi_gpu) +{ + graph_container_t graph_container{}; + graph_container.wType = weightType; + + if (weightType == floatType) { + graph_container.graph.GraphCSRViewFloat = GraphCSRView( + offsets, indices, reinterpret_cast(weights), num_vertices, num_edges); + graph_container.graph.GraphCSRViewFloat.set_local_data( + local_vertices, local_edges, local_offsets); + graph_container.graph.GraphCSRViewFloat.set_handle(const_cast(&handle)); + + } else { + graph_container.graph.GraphCSRViewDouble = GraphCSRView( + offsets, indices, reinterpret_cast(weights), num_vertices, num_edges); + graph_container.graph.GraphCSRViewDouble.set_local_data( + local_vertices, local_edges, local_offsets); + graph_container.graph.GraphCSRViewDouble.set_handle(const_cast(&handle)); + } + + return std::move(graph_container); + + // FIXME: instantiate graph_type_t instead when ready, add conditionals for + // properly instantiating MG or not based on multi_gpu, etc. + /* + auto graph = graph_view_t( + handle, + offsets_vect, + indices_vect, + weights_vect, + vertex_partition_segment_offsets_vect, + partition, + num_vertices, + num_edges, + properties, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); + */ +} + +// Wrapper for calling Louvain using a graph container +template +weight_t call_louvain(raft::handle_t const& handle, + graph_container_t graph_container, + int* parts, + size_t max_level, + weight_t resolution) +{ + weight_t final_modularity; + + if (graph_container.wType == floatType) { + std::pair results = louvain(handle, + graph_container.graph.GraphCSRViewFloat, + parts, + max_level, + static_cast(resolution)); + final_modularity = results.second; + } else { + std::pair results = louvain(handle, + graph_container.graph.GraphCSRViewDouble, + parts, + max_level, + static_cast(resolution)); + final_modularity = results.second; + } + + return final_modularity; +} + +// Explicit instantiations +template float call_louvain(raft::handle_t const& handle, + graph_container_t graph_container, + int* parts, + size_t max_level, + float resolution); + +template double call_louvain(raft::handle_t const& handle, + graph_container_t graph_container, + int* parts, + size_t max_level, + double resolution); + +} // namespace cython +} // namespace cugraph diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index 4b1b49a5a04..eb3ec9d77ae 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -16,17 +16,13 @@ # cython: embedsignature = True # cython: language_level = 3 -from libcpp.pair cimport pair - from cugraph.structure.graph_primtypes cimport * +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": -cdef extern from "algorithms.hpp" namespace "cugraph": - - cdef pair[int, weight_t] \ - louvain[vertex_t, edge_t, weight_t]( - const handle_t &handle, - const GraphCSRView[vertex_t,edge_t,weight_t] &graph, - vertex_t *louvain_parts, - int max_level, - weight_t resolution) except + + cdef weight_t call_louvain[weight_t]( + const handle_t &handle, + graph_container_t g, + int *parts, + size_t max_level, + weight_t resolution) except + diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 73cc79a867e..c183d54e85a 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -12,7 +12,6 @@ # limitations under the License. from dask.distributed import wait, default_client -import cudf import cugraph.comms.comms as Comms from cugraph.dask.common.input_utils import get_local_data diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 7cf72e20fba..4229e3f108b 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -34,7 +34,7 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): from cugraph.structure import graph_primtypes_wrapper cdef size_t handle_size_t = handle.getHandle() - handle_ = handle_size_t + handle_ = handle_size_t final_modularity = None @@ -59,17 +59,12 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) if weights is not None: - if weights.dtype == np.float32: - [weights] = graph_primtypes_wrapper.datatype_cast([weights], [np.float32]) - elif weights.dtype == np.double: - [weights] = graph_primtypes_wrapper.datatype_cast([weights], [np.double]) + if weights.dtype in [np.float32, np.double]: + [weights] = graph_primtypes_wrapper.datatype_cast([weights], [weights.dtype]) else: raise TypeError(f"unsupported type {weights.dtype} for weights") - _offsets, indices, weights = graph_primtypes_wrapper.coo2csr(dst, src, weights) - else: - _offsets, indices, weights = graph_primtypes_wrapper.coo2csr(dst, src, None) - + _offsets, indices, weights = graph_primtypes_wrapper.coo2csr(dst, src, weights) offsets = _offsets[:num_local_verts + 1] del _offsets @@ -86,33 +81,31 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0] cdef uintptr_t c_partition = df['partition'].__cuda_array_interface__['data'][0] - cdef GraphCSRView[int,int,float] graph_float - cdef GraphCSRView[int,int,double] graph_double - cdef float final_modularity_float = 1.0 cdef double final_modularity_double = 1.0 cdef int num_level = 0 - if weights.dtype == np.float32: - graph_float = GraphCSRView[int,int,float](c_offsets, c_indices, - c_weights, num_verts, num_local_edges) - graph_float.set_local_data(c_local_verts, c_local_edges, c_local_offsets) - graph_float.set_handle(handle_) - num_level, final_modularity_float = \ - c_louvain.louvain[int,int,float](handle_[0], graph_float, c_partition, max_level, resolution) - graph_float.get_vertex_identifiers(c_identifier) + cdef graph_container_t graph_container + # FIXME: This dict should not be needed, instead update create_graph_t() to + # take weights.dtype directly + # FIXME: offsets and indices should also be void*, and have corresponding + # dtypes passed to create_graph_t() + weightTypeMap = {np.dtype("float32"):0, np.dtype("double"):1} + graph_container = create_graph_t(handle_[0], c_offsets, c_indices, + c_weights, weightTypeMap[weights.dtype], + num_verts, num_local_edges, + c_local_verts, c_local_edges, c_local_offsets, + False, True) # store_transposed, multi_gpu + + if weights.dtype == np.float32: + final_modularity_float = c_louvain.call_louvain[float]( + handle_[0], graph_container, c_partition, max_level, resolution) final_modularity = final_modularity_float else: - graph_double = GraphCSRView[int,int,double](c_offsets, c_indices, - c_weights, num_verts, num_edges) - graph_double.set_local_data(c_local_verts, c_local_edges, c_local_offsets) - graph_double.set_handle(handle_) - num_level, final_modularity_double = \ - c_louvain.louvain[int,int,double](handle_[0], graph_double, c_partition, max_level, resolution) - graph_double.get_vertex_identifiers(c_identifier) - + final_modularity_double = c_louvain.call_louvain[double]( + handle_[0], graph_container, c_partition, max_level, resolution) final_modularity = final_modularity_double return df, final_modularity diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 2343a0604dc..5ae8e9f6ee3 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -190,3 +190,24 @@ ctypedef fused GraphViewType: cdef coo_to_df(GraphCOOPtrType graph) cdef csr_to_series(GraphCSRPtrType graph) cdef GraphViewType get_graph_view(input_graph, bool weightless=*, GraphViewType* dummy=*) + + +# C++ graph factory function and container type +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef struct graph_container_t: + pass + + cdef graph_container_t create_graph_t( + const handle_t &handle, + int *offsets, + int *indices, + void *weights, + int weightType, + int num_vertices, + int num_edges, + int *local_vertices, + int *local_edges, + int *local_offsets, + bool transposed, + bool multi_gpu) except + diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index 0a4fe3b6661..23210596df9 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -13,19 +13,13 @@ import pytest -import pandas -import numpy as np - import cugraph.dask as dcg import cugraph.comms as Comms from dask.distributed import Client import cugraph import dask_cudf -import dask -import cudf from dask_cuda import LocalCUDACluster from cugraph.tests import utils -from cugraph.structure.number_map import NumberMap try: from rapids_pytest_benchmark import setFixtureParamNames @@ -35,11 +29,13 @@ # if rapids_pytest_benchmark is not available, just perfrom time-only # benchmarking and replace the util functions with nops + import pytest_benchmark gpubenchmark = pytest_benchmark.plugin.benchmark def setFixtureParamNames(*args, **kwargs): pass + ############################################################################### # Fixtures @pytest.fixture(scope="module") From 0630aaee0af8ce97fe24811123ffea4beb8294f8 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Mon, 14 Sep 2020 12:42:47 -0500 Subject: [PATCH 31/74] Added PR 1139 to CHANGELOG.md --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4f09a79c322..1af44817078 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - PR 1135 SG Updates to Louvain et. al. - PR 1132 Upgrade Thrust to latest commit - PR #1129 Refactored test to use common dataset and added additional doc pages +- PR #1139 MNMG Louvain Python updates, Cython cleanup ## Bug Fixes - PR #1131 Show style checker errors with set +e From 9d3bce9780e3303c086b2ad4168d5c095a87e89b Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 14 Sep 2020 14:37:12 -0400 Subject: [PATCH 32/74] updated docs --- .../centrality/betweenness_centrality.py | 26 +++++++++++++------ python/cugraph/community/ktruss_subgraph.py | 4 +-- .../cugraph/community/subgraph_extraction.py | 4 +-- python/cugraph/community/triangle_count.py | 6 ++--- python/cugraph/structure/graph.py | 13 +++++++--- 5 files changed, 34 insertions(+), 19 deletions(-) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index 92bc5a7b3e0..9eb5eea8052 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -30,8 +30,13 @@ def betweenness_centrality( result_dtype=np.float64, ): """ - Compute the betweenness centrality for all nodes of the graph G from a - sample of 'k' sources. + Compute the betweenness centrality for all vertices of the graph G. + Betweenness centrality is a measure of the number of shortest paths that + pass through a vertex. A vertex with a high betweenness centrality score + has more paths passing through it and is therefore believed to be more + important. Rather than doing an all-pair shortest path, a sample of k + starting vertices can be used. + CuGraph does not currently support the 'endpoints' and 'weight' parameters as seen in the corresponding networkX call. @@ -99,10 +104,10 @@ def betweenness_centrality( Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> bc = cugraph.betweenness_centrality(G) """ # vertices is intended to be a cuDF series that contains a sampling of @@ -136,8 +141,13 @@ def edge_betweenness_centrality( G, k=None, normalized=True, weight=None, seed=None, result_dtype=np.float64 ): """ - Compute the edge betweenness centrality for all edges of the graph G from a - sample of 'k' sources. + Compute the betweenness centrality for all edges of the graph G. + Betweenness centrality is a measure of the number of shortest paths + that pass over an edge. An edge with a high betweenness centrality + score has more paths passing over it and is therefore believed to be + more important. Rather than doing an all-pair shortest path, a sample + of k starting vertices can be used. + CuGraph does not currently support the 'weight' parameter as seen in the corresponding networkX call. @@ -211,10 +221,10 @@ def edge_betweenness_centrality( Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> ebc = cugraph.edge_betweenness_centrality(G) """ diff --git a/python/cugraph/community/ktruss_subgraph.py b/python/cugraph/community/ktruss_subgraph.py index 74fc343c097..891dd0a7625 100644 --- a/python/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/community/ktruss_subgraph.py @@ -69,10 +69,10 @@ def ktruss_subgraph(G, k, use_weights=True): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> k_subgraph = cugraph.ktruss_subgraph(G, 3) """ diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 6a17061db92..70b49906184 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -36,12 +36,12 @@ def subgraph(G, vertices): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter = ' ', dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> verts = numpy.zeros(3, dtype=numpy.int32) >>> verts[0] = 0 >>> verts[1] = 1 diff --git a/python/cugraph/community/triangle_count.py b/python/cugraph/community/triangle_count.py index 52193c74a3e..7606c4dab13 100644 --- a/python/cugraph/community/triangle_count.py +++ b/python/cugraph/community/triangle_count.py @@ -17,7 +17,7 @@ def triangles(G): """ - Compute the triangle (number of cycles of length three) count of the + Compute the number of triangle (cycles of length three) in the input graph. Parameters @@ -34,12 +34,12 @@ def triangles(G): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter = ' ', dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> count = cugraph.triangles(G) """ diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index c918cd44ae2..83183e82809 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -337,10 +337,10 @@ def from_cudf_edgelist( Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(M, source='0', destination='1', edge_attr='2', + >>> G.from_cudf_edgelist(df, source='0', destination='1', edge_attr='2', renumber=False) """ @@ -586,9 +586,9 @@ def from_cudf_adjlist(self, offset_col, index_col, value_col=None): Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> M = M.to_pandas() + >>> M = gdf.to_pandas() >>> M = scipy.sparse.coo_matrix((M['2'],(M['0'],M['1']))) >>> M = M.tocsr() >>> offsets = cudf.Series(M.indptr) @@ -984,6 +984,11 @@ def degrees(self, vertex_subset=None): Returns ------- df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the degrees. The ordering is + relative to the adjacency list, or that given by the specified + vertex_subset. + df['vertex'] : cudf.Series The vertex IDs (will be identical to vertex_subset if specified). From e44009171f31b9c8568b842a20dfee25f42efd0c Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 14 Sep 2020 14:37:26 -0400 Subject: [PATCH 33/74] added jaccard_coefficient for Nx compatability --- python/cugraph/link_prediction/jaccard.py | 46 +++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/python/cugraph/link_prediction/jaccard.py b/python/cugraph/link_prediction/jaccard.py index e2160a0a803..8acc0e2e44e 100644 --- a/python/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/link_prediction/jaccard.py @@ -127,3 +127,49 @@ def jaccard(input_graph, vertex_pair=None): df = input_graph.unrenumber(df, "destination") return df + +def jaccard_coefficient(G, ebunch=None): + """ + For NetworkX Compatability. See `jaccard` + + Parameters + ---------- + graph : cugraph.Graph + cuGraph graph descriptor, should contain the connectivity information + as an edge list (edge weights are not used for this algorithm). The + graph should be undirected where an undirected edge is represented by a + directed edge in both direction. The adjacency list will be computed if + not already present. + ebunch : cudf.DataFrame + A GPU dataframe consisting of two columns representing pairs of + vertices. If provided, the jaccard coefficient is computed for the + given vertex pairs. If the vertex_pair is not provided then the + current implementation computes the jaccard coefficient for all + adjacent vertices in the graph. + + Returns + ------- + df : cudf.DataFrame + GPU data frame of size E (the default) or the size of the given pairs + (first, second) containing the Jaccard weights. The ordering is + relative to the adjacency list, or that given by the specified vertex + pairs. + + df['source'] : cudf.Series + The source vertex ID (will be identical to first if specified) + df['destination'] : cudf.Series + The destination vertex ID (will be identical to second if + specified) + df['jaccard_coeff'] : cudf.Series + The computed Jaccard coefficient between the source and destination + vertices + + Examples + -------- + >>> gdf = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(gdf, source='0', destination='1') + >>> df = cugraph.jaccard_coefficient(G) + """ + return jaccard(G,ebunch) From 39f2830cdae3609717506f84dc0deff6f1386f35 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 14 Sep 2020 14:48:41 -0400 Subject: [PATCH 34/74] jaccard_coefficient --- python/cugraph/__init__.py | 8 +++++++- python/cugraph/link_prediction/__init__.py | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 6f40641eddc..f228e691bd1 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -47,7 +47,13 @@ ) from cugraph.link_analysis import pagerank, hits -from cugraph.link_prediction import jaccard, overlap, jaccard_w, overlap_w +from cugraph.link_prediction import { + jaccard, + jaccard_coefficient, + overlap, + jaccard_w, + overlap_w, +} from cugraph.traversal import bfs, sssp, filter_unreachable from cugraph.utilities import utils diff --git a/python/cugraph/link_prediction/__init__.py b/python/cugraph/link_prediction/__init__.py index d0912c73751..70e55591639 100644 --- a/python/cugraph/link_prediction/__init__.py +++ b/python/cugraph/link_prediction/__init__.py @@ -12,6 +12,7 @@ # limitations under the License. from cugraph.link_prediction.jaccard import jaccard +from cugraph.link_prediction.jaccard import jaccard_coefficient from cugraph.link_prediction.overlap import overlap from cugraph.link_prediction.wjaccard import jaccard_w from cugraph.link_prediction.woverlap import overlap_w From 51d760c061ca1eb4b2f331d85665f7a634869a74 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Mon, 14 Sep 2020 16:56:19 -0400 Subject: [PATCH 35/74] fixed typo --- python/cugraph/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index f228e691bd1..d0d33e1baca 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -45,15 +45,17 @@ weakly_connected_components, strongly_connected_components, ) + from cugraph.link_analysis import pagerank, hits -from cugraph.link_prediction import { +from cugraph.link_prediction import ( jaccard, jaccard_coefficient, overlap, jaccard_w, overlap_w, -} +) + from cugraph.traversal import bfs, sssp, filter_unreachable from cugraph.utilities import utils From c4d3e467308f18777304648c434b311d8406b9e1 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 15 Sep 2020 12:36:03 -0400 Subject: [PATCH 36/74] flake8 fixes --- python/cugraph/centrality/betweenness_centrality.py | 6 +++--- python/cugraph/link_prediction/jaccard.py | 3 ++- python/cugraph/structure/convert_matrix.py | 4 ++-- python/cugraph/structure/graph.py | 4 ++-- 4 files changed, 9 insertions(+), 8 deletions(-) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index 9eb5eea8052..667ca3c9615 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -35,7 +35,7 @@ def betweenness_centrality( pass through a vertex. A vertex with a high betweenness centrality score has more paths passing through it and is therefore believed to be more important. Rather than doing an all-pair shortest path, a sample of k - starting vertices can be used. + starting vertices can be used. CuGraph does not currently support the 'endpoints' and 'weight' parameters as seen in the corresponding networkX call. @@ -146,8 +146,8 @@ def edge_betweenness_centrality( that pass over an edge. An edge with a high betweenness centrality score has more paths passing over it and is therefore believed to be more important. Rather than doing an all-pair shortest path, a sample - of k starting vertices can be used. - + of k starting vertices can be used. + CuGraph does not currently support the 'weight' parameter as seen in the corresponding networkX call. diff --git a/python/cugraph/link_prediction/jaccard.py b/python/cugraph/link_prediction/jaccard.py index 8acc0e2e44e..27d3b1458a5 100644 --- a/python/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/link_prediction/jaccard.py @@ -128,6 +128,7 @@ def jaccard(input_graph, vertex_pair=None): return df + def jaccard_coefficient(G, ebunch=None): """ For NetworkX Compatability. See `jaccard` @@ -172,4 +173,4 @@ def jaccard_coefficient(G, ebunch=None): >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> df = cugraph.jaccard_coefficient(G) """ - return jaccard(G,ebunch) + return jaccard(G, ebunch) diff --git a/python/cugraph/structure/convert_matrix.py b/python/cugraph/structure/convert_matrix.py index 1ef30cde583..56bb9086380 100644 --- a/python/cugraph/structure/convert_matrix.py +++ b/python/cugraph/structure/convert_matrix.py @@ -23,8 +23,8 @@ def from_cudf_edgelist(df, source='source', destination='destination', """ Return a new graph created from the edge list representaion. This function is added for NetworkX compatibility (this function is a RAPIDS version of - NetworkX's from_pandas_edge_list()). This function does not support multiple - source or destination columns. But does support renumbering + NetworkX's from_pandas_edge_list()). This function does not support + multiple source or destination columns. But does support renumbering Parameters ---------- diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 83183e82809..01b6b1f47a4 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -340,8 +340,8 @@ def from_cudf_edgelist( >>> df = cudf.read_csv('datasets/karate.csv', delimiter=' ', >>> dtype=['int32', 'int32', 'float32'], header=None) >>> G = cugraph.Graph() - >>> G.from_cudf_edgelist(df, source='0', destination='1', edge_attr='2', - renumber=False) + >>> G.from_cudf_edgelist(df, source='0', destination='1', + edge_attr='2', renumber=False) """ if self.edgelist is not None or self.adjlist is not None: From ea428de8900c28ceb80169a13e60f2509fc55f2a Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 15 Sep 2020 12:59:06 -0400 Subject: [PATCH 37/74] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 44b9c097774..cb601701763 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - PR 1081 MNMG Renumbering - sort partitions by degree - PR 1115 Replace deprecated rmm::mr::get_default_resource with rmm::mr::get_current_device_resource - PR #1129 Refactored test to use common dataset and added additional doc pages +- PR #1144 updated documentation and APIs ## Bug Fixes - PR #1131 Show style checker errors with set +e From d8a8d9a13e552dec4418421445bf951b25ab6714 Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 15 Sep 2020 14:56:52 -0400 Subject: [PATCH 38/74] updated test to say "edge betweenness centrality" --- python/cugraph/centrality/betweenness_centrality.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index 667ca3c9615..bd5ebbcc935 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -141,7 +141,7 @@ def edge_betweenness_centrality( G, k=None, normalized=True, weight=None, seed=None, result_dtype=np.float64 ): """ - Compute the betweenness centrality for all edges of the graph G. + Compute the edge betweenness centrality for all edges of the graph G. Betweenness centrality is a measure of the number of shortest paths that pass over an edge. An edge with a high betweenness centrality score has more paths passing over it and is therefore believed to be From 3fca967c6a3ab6c2498a70c8ef59565989f9f1cc Mon Sep 17 00:00:00 2001 From: BradReesWork Date: Tue, 15 Sep 2020 15:06:44 -0400 Subject: [PATCH 39/74] made triangle plural --- python/cugraph/community/triangle_count.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/community/triangle_count.py b/python/cugraph/community/triangle_count.py index 7606c4dab13..586d16bb20b 100644 --- a/python/cugraph/community/triangle_count.py +++ b/python/cugraph/community/triangle_count.py @@ -17,7 +17,7 @@ def triangles(G): """ - Compute the number of triangle (cycles of length three) in the + Compute the number of triangles (cycles of length three) in the input graph. Parameters From f2686d1f7e3447052126ce32d8d1bd2a5129a70a Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Tue, 15 Sep 2020 16:17:07 -0500 Subject: [PATCH 40/74] Addressed code review request on removing partition_manager_t class. --- cpp/include/partition_manager.hpp | 58 ------------------------------- 1 file changed, 58 deletions(-) diff --git a/cpp/include/partition_manager.hpp b/cpp/include/partition_manager.hpp index 8d848196406..b817931e770 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/partition_manager.hpp @@ -36,64 +36,6 @@ std::string to_string(from_t const& value) return ss.str(); } -// class responsible for creating 2D partition of workers: -// responsible with finding appropriate P_ROW x P_COL -// 2D partition and initializing the raft::handle_t communicator -// -// (this might be removed; or, it might exist already) -// -template -class partition_manager_t { - public: - partition_manager_t(raft::handle_t& handle, size_type p_row_size, size_type p_col_size) - : handle_(handle), p_row_size_(p_row_size), p_col_size_(p_col_size) - { - init_communicator(); - } - - partition_manager_t(raft::handle_t const& handle, size_type p_size) : handle_(handle) - { - partition2d(p_size); - init_communicator(); - } - - virtual ~partition_manager_t(void) {} - - protected: - virtual void partition2d(size_type p_size) - { - auto sqr = static_cast(std::sqrt(p_size)); - - // find divisor of p_size - // nearest to sqr; - // - p_row_size_ = nearest_divisor(sqr, p_size); - p_col_size_ = p_size / p_row_size_; - - assert(p_row_size_ > 1 && p_col_size_ > 1); - } - - virtual void init_communicator(void) - { - // TODO: init's handle's communicator (singleton?) - } - - private: - raft::handle_t& handle_; - size_type p_row_size_; - size_type p_col_size_; - - static decltype(auto) nearest_divisor(size_type sqr, size_type p_size) - { - assert(sqr > 0); - - for (size_type div = sqr; div > 0; --div) { - auto p_div = p_size % div; - if (p_div == 0) return div; - } - } -}; - // default key-naming mechanism: // struct key_naming_t { From 628b8285dfc76ba5d42810c981a461a6ddff4a67 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Tue, 15 Sep 2020 16:43:50 -0500 Subject: [PATCH 41/74] Updates from review feedback: added/updated FIXMEs, using a proper scoped enum, removed dead code, removed unneeded std::move() --- cpp/include/utilities/cython.hpp | 18 ++++++------ cpp/src/utilities/cython.cpp | 28 +++++-------------- .../dask/community/louvain_wrapper.pyx | 15 ++++++++-- python/cugraph/structure/graph_primtypes.pxd | 8 ++++-- 4 files changed, 35 insertions(+), 34 deletions(-) diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 5d06a2dcca5..b3383f9c46a 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -16,13 +16,13 @@ #pragma once #include -// #include #include namespace cugraph { namespace cython { -// FIXME: use std::variant instead of a union if possible +// FIXME: use std::variant (or a better alternative, ie. type erasure?) instead +// of a union if possible // FIXME: add both CSRView and graph_type_t objects for easier testing during // the transition union graphUnion { @@ -31,15 +31,17 @@ union graphUnion { GraphCSRView GraphCSRViewDouble; }; -enum weightTypeEnum { floatType = 0, doubleType = 1 }; +enum class weightTypeEnum : int { floatType, doubleType }; -// FIXME: Add comments describing this struct, where it's used, etc. +// "container" for a graph type instance which insulates the owner from the +// specifics of the actual graph type. This is intended to be used in Cython +// code that only needs to pass a graph object to another wrapped C++ API. This +// simplifies the Cython code greatly since it only needs to define the +// container and not the various individual graph types in Cython. struct graph_container_t { graph_container_t() {} graphUnion graph; - // FIXME: cython issues using an enum so just using an int for now. - // weightTypeEnum wType; - int wType; + weightTypeEnum wType; }; // Factory function for creating graph containers from basic types @@ -49,7 +51,7 @@ graph_container_t create_graph_t(raft::handle_t const& handle, int* offsets, int* indices, void* weights, - int weightType, + weightTypeEnum weightType, int num_vertices, int num_edges, int* local_vertices, diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index 6baf7c3fb9c..70195057f74 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -29,7 +29,7 @@ graph_container_t create_graph_t(raft::handle_t const& handle, int* offsets, int* indices, void* weights, - int weightType, + weightTypeEnum weightType, int num_vertices, int num_edges, int* local_vertices, @@ -41,7 +41,10 @@ graph_container_t create_graph_t(raft::handle_t const& handle, graph_container_t graph_container{}; graph_container.wType = weightType; - if (weightType == floatType) { + // FIXME: instantiate graph_type_t instead when ready, add conditionals for + // properly instantiating MG or not based on multi_gpu, etc. + + if (weightType == weightTypeEnum::floatType) { graph_container.graph.GraphCSRViewFloat = GraphCSRView( offsets, indices, reinterpret_cast(weights), num_vertices, num_edges); graph_container.graph.GraphCSRViewFloat.set_local_data( @@ -56,24 +59,7 @@ graph_container_t create_graph_t(raft::handle_t const& handle, graph_container.graph.GraphCSRViewDouble.set_handle(const_cast(&handle)); } - return std::move(graph_container); - - // FIXME: instantiate graph_type_t instead when ready, add conditionals for - // properly instantiating MG or not based on multi_gpu, etc. - /* - auto graph = graph_view_t( - handle, - offsets_vect, - indices_vect, - weights_vect, - vertex_partition_segment_offsets_vect, - partition, - num_vertices, - num_edges, - properties, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); - */ + return graph_container; } // Wrapper for calling Louvain using a graph container @@ -86,7 +72,7 @@ weight_t call_louvain(raft::handle_t const& handle, { weight_t final_modularity; - if (graph_container.wType == floatType) { + if (graph_container.wType == weightTypeEnum::floatType) { std::pair results = louvain(handle, graph_container.graph.GraphCSRViewFloat, parts, diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 4229e3f108b..c6ee9ca489d 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -38,6 +38,10 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): final_modularity = None + # FIXME: much of this code is common to other algo wrappers, consider adding + # this to a shared utility as well (extracting pointers from + # dataframes, handling local_data, etc.) + src = input_df['src'] dst = input_df['dst'] if "value" in input_df.columns: @@ -89,11 +93,16 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): # FIXME: This dict should not be needed, instead update create_graph_t() to # take weights.dtype directly - # FIXME: offsets and indices should also be void*, and have corresponding + # FIXME: Offsets and indices should also be void*, and have corresponding # dtypes passed to create_graph_t() - weightTypeMap = {np.dtype("float32"):0, np.dtype("double"):1} + # FIXME: The excessive casting for the enum arg is needed to make cython + # understand how to pass the enum value (this is the same pattern + # used by cudf). This will not be needed with Cython 3.0 + weightTypeMap = {np.dtype("float32") : weightTypeEnum.floatType, + np.dtype("double") : weightTypeEnum.doubleType} + graph_container = create_graph_t(handle_[0], c_offsets, c_indices, - c_weights, weightTypeMap[weights.dtype], + c_weights, ((weightTypeMap[weights.dtype])), num_verts, num_local_edges, c_local_verts, c_local_edges, c_local_offsets, False, True) # store_transposed, multi_gpu diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 5ae8e9f6ee3..62af91d2af1 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -192,9 +192,13 @@ cdef csr_to_series(GraphCSRPtrType graph) cdef GraphViewType get_graph_view(input_graph, bool weightless=*, GraphViewType* dummy=*) -# C++ graph factory function and container type +# C++ utilities specifically for Cython cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + ctypedef enum weightTypeEnum: + floatType "cugraph::cython::weightTypeEnum::floatType" + doubleType "cugraph::cython::weightTypeEnum::doubleType" + cdef struct graph_container_t: pass @@ -203,7 +207,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": int *offsets, int *indices, void *weights, - int weightType, + weightTypeEnum weightType, int num_vertices, int num_edges, int *local_vertices, From 905485585d899184f0a67dd95bce0362ef691a8a Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Tue, 15 Sep 2020 23:55:27 -0500 Subject: [PATCH 42/74] Chnaged union to hold pointers, but having problem with a double free. --- cpp/include/experimental/graph_view.hpp | 10 +- cpp/include/utilities/cython.hpp | 114 ++++++++++++++---- cpp/src/utilities/cython.cpp | 76 ++++++------ .../dask/community/louvain_wrapper.pyx | 19 +-- python/cugraph/structure/graph_primtypes.pxd | 31 +++-- 5 files changed, 176 insertions(+), 74 deletions(-) diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index b3b899a5068..1e6d8898343 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -75,6 +75,8 @@ std::string const comm_p_col_key = "comm_p_key"; template class partition_t { public: + partition_t() {} + partition_t(std::vector const& vertex_partition_offsets, bool hypergraph_partitioned, int comm_p_row_size, @@ -183,6 +185,8 @@ size_t constexpr num_segments_per_vertex_partition{3}; template class graph_base_t { public: + graph_base_t() {} + graph_base_t(raft::handle_t const& handle, vertex_t number_of_vertices, edge_t number_of_edges, @@ -242,6 +246,8 @@ class graph_view_t const& adj_matrix_partition_offsets, std::vector const& adj_matrix_partition_indices, @@ -324,6 +330,8 @@ class graph_view_t +#include #include namespace cugraph { @@ -23,15 +24,36 @@ namespace cython { // FIXME: use std::variant (or a better alternative, ie. type erasure?) instead // of a union if possible -// FIXME: add both CSRView and graph_type_t objects for easier testing during -// the transition -union graphUnion { - graphUnion() {} - GraphCSRView GraphCSRViewFloat; - GraphCSRView GraphCSRViewDouble; +union graphPtrUnion { + void* null; + GraphCSRView* GraphCSRViewFloatPtr; + GraphCSRView* GraphCSRViewDoublePtr; + experimental::graph_view_t* graph_view_t_float_ptr; + experimental::graph_view_t* graph_view_t_double_ptr; + experimental::graph_view_t* graph_view_t_float_mg_ptr; + experimental::graph_view_t* graph_view_t_double_mg_ptr; + experimental::graph_view_t* graph_view_t_float_transposed_ptr; + experimental::graph_view_t* graph_view_t_double_transposed_ptr; + experimental::graph_view_t* graph_view_t_float_mg_transposed_ptr; + experimental::graph_view_t* graph_view_t_double_mg_transposed_ptr; }; -enum class weightTypeEnum : int { floatType, doubleType }; +enum class numberTypeEnum : int { intType, + floatType, + doubleType +}; +enum class graphTypeEnum : int { null, + GraphCSRViewFloat, + GraphCSRViewDouble, + graph_view_t_float, + graph_view_t_double, + graph_view_t_float_mg, + graph_view_t_double_mg, + graph_view_t_float_transposed, + graph_view_t_double_transposed, + graph_view_t_float_mg_transposed, + graph_view_t_double_mg_transposed +}; // "container" for a graph type instance which insulates the owner from the // specifics of the actual graph type. This is intended to be used in Cython @@ -39,26 +61,70 @@ enum class weightTypeEnum : int { floatType, doubleType }; // simplifies the Cython code greatly since it only needs to define the // container and not the various individual graph types in Cython. struct graph_container_t { - graph_container_t() {} - graphUnion graph; - weightTypeEnum wType; + inline graph_container_t() : + graph_ptr{nullptr}, + graph_ptr_type{graphTypeEnum::null} {} + /* + inline ~graph_container_t() { + switch(graph_ptr_type) { + case graphTypeEnum::GraphCSRViewFloat : + delete graph_ptr.GraphCSRViewFloatPtr; + std::cout << "DELETED GraphCSRViewFloatPtr" << std::endl; + break; + case graphTypeEnum::GraphCSRViewDouble : + delete graph_ptr.GraphCSRViewDoublePtr; + break; + case graphTypeEnum::graph_view_t_float : + delete graph_ptr.graph_view_t_float_ptr; + break; + case graphTypeEnum::graph_view_t_double : + delete graph_ptr.graph_view_t_double_ptr; + break; + case graphTypeEnum::graph_view_t_float_mg : + delete graph_ptr.graph_view_t_float_mg_ptr; + break; + case graphTypeEnum::graph_view_t_double_mg : + delete graph_ptr.graph_view_t_double_mg_ptr; + break; + case graphTypeEnum::graph_view_t_float_transposed : + delete graph_ptr.graph_view_t_float_transposed_ptr; + break; + case graphTypeEnum::graph_view_t_double_transposed : + delete graph_ptr.graph_view_t_double_transposed_ptr; + break; + case graphTypeEnum::graph_view_t_float_mg_transposed : + delete graph_ptr.graph_view_t_float_mg_transposed_ptr; + break; + case graphTypeEnum::graph_view_t_double_mg_transposed : + delete graph_ptr.graph_view_t_double_mg_transposed_ptr; + break; + default : + break; + } + graph_ptr_type = graphTypeEnum::null; + } + */ + graphPtrUnion graph_ptr; + graphTypeEnum graph_ptr_type; }; // Factory function for creating graph containers from basic types -// FIXME: This should accept void* for offsets and indices as well and take a -// dtype directly for each instead of the enum/int. -graph_container_t create_graph_t(raft::handle_t const& handle, - int* offsets, - int* indices, - void* weights, - weightTypeEnum weightType, - int num_vertices, - int num_edges, - int* local_vertices, - int* local_edges, - int* local_offsets, - bool transposed, - bool multi_gpu); +// FIXME: Should local_* values be void* as well? +void create_graph_t(graph_container_t& graph_container, + raft::handle_t const& handle, + void* offsets, + void* indices, + void* weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + int num_vertices, + int num_edges, + int* local_vertices, + int* local_edges, + int* local_offsets, + bool transposed, + bool multi_gpu); // Wrapper for calling Louvain using a graph container template diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index 70195057f74..46361e48684 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -22,44 +22,52 @@ namespace cugraph { namespace cython { -// Factory function for creating graph containers from basic types -// FIXME: This should accept void* for offsets and indices as well and take a -// dtype directly for each instead of the enum/int. -graph_container_t create_graph_t(raft::handle_t const& handle, - int* offsets, - int* indices, - void* weights, - weightTypeEnum weightType, - int num_vertices, - int num_edges, - int* local_vertices, - int* local_edges, - int* local_offsets, - bool transposed, - bool multi_gpu) +// Populates a graph_container_t with a pointer to a new graph object and sets +// the meta-data accordingly. The graph container owns the pointer and it is +// assumed it will delete it on destruction. +// +// FIXME: Should local_* values be void* as well? +void create_graph_t(graph_container_t& graph_container, + raft::handle_t const& handle, + void* offsets, + void* indices, + void* weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + int num_vertices, + int num_edges, + int* local_vertices, + int* local_edges, + int* local_offsets, + bool transposed, + bool multi_gpu) { - graph_container_t graph_container{}; - graph_container.wType = weightType; - // FIXME: instantiate graph_type_t instead when ready, add conditionals for - // properly instantiating MG or not based on multi_gpu, etc. - - if (weightType == weightTypeEnum::floatType) { - graph_container.graph.GraphCSRViewFloat = GraphCSRView( - offsets, indices, reinterpret_cast(weights), num_vertices, num_edges); - graph_container.graph.GraphCSRViewFloat.set_local_data( + if (weightType == numberTypeEnum::floatType) { + graph_container.graph_ptr.GraphCSRViewFloatPtr = new GraphCSRView( + reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); + graph_container.graph_ptr.GraphCSRViewFloatPtr->set_local_data( local_vertices, local_edges, local_offsets); - graph_container.graph.GraphCSRViewFloat.set_handle(const_cast(&handle)); + graph_container.graph_ptr.GraphCSRViewFloatPtr->set_handle(const_cast(&handle)); + graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; } else { - graph_container.graph.GraphCSRViewDouble = GraphCSRView( - offsets, indices, reinterpret_cast(weights), num_vertices, num_edges); - graph_container.graph.GraphCSRViewDouble.set_local_data( + graph_container.graph_ptr.GraphCSRViewDoublePtr = new GraphCSRView( + reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); + graph_container.graph_ptr.GraphCSRViewDoublePtr->set_local_data( local_vertices, local_edges, local_offsets); - graph_container.graph.GraphCSRViewDouble.set_handle(const_cast(&handle)); + graph_container.graph_ptr.GraphCSRViewDoublePtr->set_handle(const_cast(&handle)); + graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; } - - return graph_container; } // Wrapper for calling Louvain using a graph container @@ -72,16 +80,16 @@ weight_t call_louvain(raft::handle_t const& handle, { weight_t final_modularity; - if (graph_container.wType == weightTypeEnum::floatType) { + if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { std::pair results = louvain(handle, - graph_container.graph.GraphCSRViewFloat, + *(graph_container.graph_ptr.GraphCSRViewFloatPtr), parts, max_level, static_cast(resolution)); final_modularity = results.second; } else { std::pair results = louvain(handle, - graph_container.graph.GraphCSRViewDouble, + *(graph_container.graph_ptr.GraphCSRViewDoublePtr), parts, max_level, static_cast(resolution)); diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index c6ee9ca489d..c3588d9bda7 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -98,14 +98,17 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): # FIXME: The excessive casting for the enum arg is needed to make cython # understand how to pass the enum value (this is the same pattern # used by cudf). This will not be needed with Cython 3.0 - weightTypeMap = {np.dtype("float32") : weightTypeEnum.floatType, - np.dtype("double") : weightTypeEnum.doubleType} - - graph_container = create_graph_t(handle_[0], c_offsets, c_indices, - c_weights, ((weightTypeMap[weights.dtype])), - num_verts, num_local_edges, - c_local_verts, c_local_edges, c_local_offsets, - False, True) # store_transposed, multi_gpu + weightTypeMap = {np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + create_graph_t(graph_container, handle_[0], + c_offsets, c_indices, c_weights, + ((numberTypeEnum.intType)), + ((numberTypeEnum.intType)), + ((weightTypeMap[weights.dtype])), + num_verts, num_local_edges, + c_local_verts, c_local_edges, c_local_offsets, + False, True) # store_transposed, multi_gpu if weights.dtype == np.float32: final_modularity_float = c_louvain.call_louvain[float]( diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 62af91d2af1..bf057ce1f55 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -195,19 +195,36 @@ cdef GraphViewType get_graph_view(input_graph, bool weightless=*, GraphViewType* # C++ utilities specifically for Cython cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - ctypedef enum weightTypeEnum: - floatType "cugraph::cython::weightTypeEnum::floatType" - doubleType "cugraph::cython::weightTypeEnum::doubleType" + ctypedef enum numberTypeEnum: + intType "cugraph::cython::numberTypeEnum::intType" + floatType "cugraph::cython::numberTypeEnum::floatType" + doubleType "cugraph::cython::numberTypeEnum::doubleType" + + # ctypedef enum graphTypeEnum: + # null "cugraph::cython::graphTypeEnum::null" + # GraphCSRViewFloat "cugraph::cython::graphTypeEnum::GraphCSRViewFloat" + # GraphCSRViewDouble "cugraph::cython::graphTypeEnum::GraphCSRViewDouble" + # graph_view_t_float "cugraph::cython::graphTypeEnum::graph_view_t_float" + # graph_view_t_double "cugraph::cython::graphTypeEnum::graph_view_t_double" + # graph_view_t_float_mg "cugraph::cython::graphTypeEnum::graph_view_t_float_mg" + # graph_view_t_double_mg "cugraph::cython::graphTypeEnum::graph_view_t_double_mg" + # graph_view_t_float_transposed "cugraph::cython::graphTypeEnum::graph_view_t_float_transposed" + # graph_view_t_double_transposed "cugraph::cython::graphTypeEnum::graph_view_t_double_transposed" + # graph_view_t_float_mg_transposed "cugraph::cython::graphTypeEnum::graph_view_t_float_mg_transposed" + # graph_view_t_double_mg_transposed "cugraph::cython::graphTypeEnum::graph_view_t_double_mg_transposed" cdef struct graph_container_t: pass - cdef graph_container_t create_graph_t( + cdef void create_graph_t( + graph_container_t &graph_container, const handle_t &handle, - int *offsets, - int *indices, + void *offsets, + void *indices, void *weights, - weightTypeEnum weightType, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, int num_vertices, int num_edges, int *local_vertices, From 71aac7bd444ef7881d0b3579f56f15054c9025f1 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 16 Sep 2020 00:22:03 -0500 Subject: [PATCH 43/74] Fixed double free by passing a reference to a graph_container_t instead of a value which gets copied. --- cpp/include/utilities/cython.hpp | 64 +++++++++++------------ cpp/src/utilities/cython.cpp | 26 ++++----- python/cugraph/dask/community/louvain.pxd | 2 +- 3 files changed, 46 insertions(+), 46 deletions(-) diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 91124a74d89..2caff16d59c 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -22,22 +22,6 @@ namespace cugraph { namespace cython { -// FIXME: use std::variant (or a better alternative, ie. type erasure?) instead -// of a union if possible -union graphPtrUnion { - void* null; - GraphCSRView* GraphCSRViewFloatPtr; - GraphCSRView* GraphCSRViewDoublePtr; - experimental::graph_view_t* graph_view_t_float_ptr; - experimental::graph_view_t* graph_view_t_double_ptr; - experimental::graph_view_t* graph_view_t_float_mg_ptr; - experimental::graph_view_t* graph_view_t_double_mg_ptr; - experimental::graph_view_t* graph_view_t_float_transposed_ptr; - experimental::graph_view_t* graph_view_t_double_transposed_ptr; - experimental::graph_view_t* graph_view_t_float_mg_transposed_ptr; - experimental::graph_view_t* graph_view_t_double_mg_transposed_ptr; -}; - enum class numberTypeEnum : int { intType, floatType, doubleType @@ -61,50 +45,66 @@ enum class graphTypeEnum : int { null, // simplifies the Cython code greatly since it only needs to define the // container and not the various individual graph types in Cython. struct graph_container_t { + + // FIXME: use std::variant (or a better alternative, ie. type erasure?) instead + // of a union if possible + union graphPtrUnion { + void* null; + GraphCSRView* GraphCSRViewFloatPtr; + GraphCSRView* GraphCSRViewDoublePtr; + experimental::graph_view_t* graph_view_t_float_ptr; + experimental::graph_view_t* graph_view_t_double_ptr; + experimental::graph_view_t* graph_view_t_float_mg_ptr; + experimental::graph_view_t* graph_view_t_double_mg_ptr; + experimental::graph_view_t* graph_view_t_float_transposed_ptr; + experimental::graph_view_t* graph_view_t_double_transposed_ptr; + experimental::graph_view_t* graph_view_t_float_mg_transposed_ptr; + experimental::graph_view_t* graph_view_t_double_mg_transposed_ptr; + }; + inline graph_container_t() : - graph_ptr{nullptr}, + graph_ptr_union{nullptr}, graph_ptr_type{graphTypeEnum::null} {} - /* + inline ~graph_container_t() { switch(graph_ptr_type) { case graphTypeEnum::GraphCSRViewFloat : - delete graph_ptr.GraphCSRViewFloatPtr; - std::cout << "DELETED GraphCSRViewFloatPtr" << std::endl; + delete graph_ptr_union.GraphCSRViewFloatPtr; break; case graphTypeEnum::GraphCSRViewDouble : - delete graph_ptr.GraphCSRViewDoublePtr; + delete graph_ptr_union.GraphCSRViewDoublePtr; break; case graphTypeEnum::graph_view_t_float : - delete graph_ptr.graph_view_t_float_ptr; + delete graph_ptr_union.graph_view_t_float_ptr; break; case graphTypeEnum::graph_view_t_double : - delete graph_ptr.graph_view_t_double_ptr; + delete graph_ptr_union.graph_view_t_double_ptr; break; case graphTypeEnum::graph_view_t_float_mg : - delete graph_ptr.graph_view_t_float_mg_ptr; + delete graph_ptr_union.graph_view_t_float_mg_ptr; break; case graphTypeEnum::graph_view_t_double_mg : - delete graph_ptr.graph_view_t_double_mg_ptr; + delete graph_ptr_union.graph_view_t_double_mg_ptr; break; case graphTypeEnum::graph_view_t_float_transposed : - delete graph_ptr.graph_view_t_float_transposed_ptr; + delete graph_ptr_union.graph_view_t_float_transposed_ptr; break; case graphTypeEnum::graph_view_t_double_transposed : - delete graph_ptr.graph_view_t_double_transposed_ptr; + delete graph_ptr_union.graph_view_t_double_transposed_ptr; break; case graphTypeEnum::graph_view_t_float_mg_transposed : - delete graph_ptr.graph_view_t_float_mg_transposed_ptr; + delete graph_ptr_union.graph_view_t_float_mg_transposed_ptr; break; case graphTypeEnum::graph_view_t_double_mg_transposed : - delete graph_ptr.graph_view_t_double_mg_transposed_ptr; + delete graph_ptr_union.graph_view_t_double_mg_transposed_ptr; break; default : break; } graph_ptr_type = graphTypeEnum::null; } - */ - graphPtrUnion graph_ptr; + + graphPtrUnion graph_ptr_union; graphTypeEnum graph_ptr_type; }; @@ -129,7 +129,7 @@ void create_graph_t(graph_container_t& graph_container, // Wrapper for calling Louvain using a graph container template weight_t call_louvain(raft::handle_t const& handle, - graph_container_t graph_container, + graph_container_t& graph_container, int* parts, size_t max_level, weight_t resolution); diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index 46361e48684..70a53f78803 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -45,35 +45,35 @@ void create_graph_t(graph_container_t& graph_container, { if (weightType == numberTypeEnum::floatType) { - graph_container.graph_ptr.GraphCSRViewFloatPtr = new GraphCSRView( + graph_container.graph_ptr_union.GraphCSRViewFloatPtr = new GraphCSRView( reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), num_vertices, num_edges); - graph_container.graph_ptr.GraphCSRViewFloatPtr->set_local_data( - local_vertices, local_edges, local_offsets); - graph_container.graph_ptr.GraphCSRViewFloatPtr->set_handle(const_cast(&handle)); graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; + graph_container.graph_ptr_union.GraphCSRViewFloatPtr->set_local_data( + local_vertices, local_edges, local_offsets); + graph_container.graph_ptr_union.GraphCSRViewFloatPtr->set_handle(const_cast(&handle)); } else { - graph_container.graph_ptr.GraphCSRViewDoublePtr = new GraphCSRView( + graph_container.graph_ptr_union.GraphCSRViewDoublePtr = new GraphCSRView( reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), num_vertices, num_edges); - graph_container.graph_ptr.GraphCSRViewDoublePtr->set_local_data( - local_vertices, local_edges, local_offsets); - graph_container.graph_ptr.GraphCSRViewDoublePtr->set_handle(const_cast(&handle)); graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; + graph_container.graph_ptr_union.GraphCSRViewDoublePtr->set_local_data( + local_vertices, local_edges, local_offsets); + graph_container.graph_ptr_union.GraphCSRViewDoublePtr->set_handle(const_cast(&handle)); } } // Wrapper for calling Louvain using a graph container template weight_t call_louvain(raft::handle_t const& handle, - graph_container_t graph_container, + graph_container_t& graph_container, int* parts, size_t max_level, weight_t resolution) @@ -82,14 +82,14 @@ weight_t call_louvain(raft::handle_t const& handle, if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { std::pair results = louvain(handle, - *(graph_container.graph_ptr.GraphCSRViewFloatPtr), + *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), parts, max_level, static_cast(resolution)); final_modularity = results.second; } else { std::pair results = louvain(handle, - *(graph_container.graph_ptr.GraphCSRViewDoublePtr), + *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), parts, max_level, static_cast(resolution)); @@ -101,13 +101,13 @@ weight_t call_louvain(raft::handle_t const& handle, // Explicit instantiations template float call_louvain(raft::handle_t const& handle, - graph_container_t graph_container, + graph_container_t& graph_container, int* parts, size_t max_level, float resolution); template double call_louvain(raft::handle_t const& handle, - graph_container_t graph_container, + graph_container_t& graph_container, int* parts, size_t max_level, double resolution); diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index eb3ec9d77ae..e86942eedb1 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -22,7 +22,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": cdef weight_t call_louvain[weight_t]( const handle_t &handle, - graph_container_t g, + graph_container_t &g, int *parts, size_t max_level, weight_t resolution) except + From 915a56ee0dc234935177e2d6b505b12686323103 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 16 Sep 2020 02:14:27 -0500 Subject: [PATCH 44/74] Started adding new graph_t class instantiations, not done yet and not yet enabled. --- cpp/include/experimental/graph_view.hpp | 8 -- cpp/include/utilities/cython.hpp | 7 +- cpp/src/utilities/cython.cpp | 108 +++++++++++++++++-- python/cugraph/structure/graph_primtypes.pxd | 13 --- 4 files changed, 104 insertions(+), 32 deletions(-) diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 1e6d8898343..1d37858dfa4 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -75,8 +75,6 @@ std::string const comm_p_col_key = "comm_p_key"; template class partition_t { public: - partition_t() {} - partition_t(std::vector const& vertex_partition_offsets, bool hypergraph_partitioned, int comm_p_row_size, @@ -185,8 +183,6 @@ size_t constexpr num_segments_per_vertex_partition{3}; template class graph_base_t { public: - graph_base_t() {} - graph_base_t(raft::handle_t const& handle, vertex_t number_of_vertices, edge_t number_of_edges, @@ -246,8 +242,6 @@ class graph_view_t const& adj_matrix_partition_offsets, std::vector const& adj_matrix_partition_indices, @@ -330,8 +324,6 @@ class graph_view_t #include - +#include +#include #include namespace cugraph { @@ -44,29 +45,118 @@ void create_graph_t(graph_container_t& graph_container, bool multi_gpu) { + // FIXME: This is soon-to-be legacy code left in place until the new graph_t + // class is supported everywhere else. Remove everything down to the comment + // line after the return stmnt. + // Keep new code below return stmnt enabled to ensure it builds. if (weightType == numberTypeEnum::floatType) { - graph_container.graph_ptr_union.GraphCSRViewFloatPtr = new GraphCSRView( + auto g = new GraphCSRView( reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), num_vertices, num_edges); + graph_container.graph_ptr_union.GraphCSRViewFloatPtr = g; graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; - graph_container.graph_ptr_union.GraphCSRViewFloatPtr->set_local_data( - local_vertices, local_edges, local_offsets); - graph_container.graph_ptr_union.GraphCSRViewFloatPtr->set_handle(const_cast(&handle)); + g->set_local_data(local_vertices, local_edges, local_offsets); + g->set_handle(const_cast(&handle)); } else { - graph_container.graph_ptr_union.GraphCSRViewDoublePtr = new GraphCSRView( + auto g = new GraphCSRView( reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), num_vertices, num_edges); + graph_container.graph_ptr_union.GraphCSRViewDoublePtr = g; graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; - graph_container.graph_ptr_union.GraphCSRViewDoublePtr->set_local_data( - local_vertices, local_edges, local_offsets); - graph_container.graph_ptr_union.GraphCSRViewDoublePtr->set_handle(const_cast(&handle)); + g->set_local_data(local_vertices, local_edges, local_offsets); + g->set_handle(const_cast(&handle)); + } + + return; + //////////////////////////////////////////////////////////////////////////////////// + + bool do_expensive_check{false}; + bool sorted_by_global_degree_within_vertex_partition{false}; + experimental::graph_properties_t graph_props{.is_symmetric=false, .is_multigraph=false}; + + if (multi_gpu) { + std::vector adjmatrix_partition_offsets_vect; + std::vector adjmatrix_partition_indices_vect; + std::vector vertex_partition_segment_offsets_vect; + std::vector vertex_partition_offsets; + experimental::partition_t partition(vertex_partition_offsets, false, 0, 0, 0, 0); + + if (weightType == numberTypeEnum::floatType) { + std::vector adjmatrix_partition_weights_vect; + auto g = new experimental::graph_view_t(handle, + adjmatrix_partition_offsets_vect, + adjmatrix_partition_indices_vect, + adjmatrix_partition_weights_vect, + vertex_partition_segment_offsets_vect, + partition, + num_vertices, + num_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); + graph_container.graph_ptr_union.graph_view_t_float_mg_ptr = g; + graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float_mg; + + } else { + std::vector adjmatrix_partition_weights_vect; + auto g = new experimental::graph_view_t(handle, + adjmatrix_partition_offsets_vect, + adjmatrix_partition_indices_vect, + adjmatrix_partition_weights_vect, + vertex_partition_segment_offsets_vect, + partition, + num_vertices, + num_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); + graph_container.graph_ptr_union.graph_view_t_double_mg_ptr = g; + graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double_mg; + + } + + } else { + auto offsets_array = reinterpret_cast(offsets); + auto indices_array = reinterpret_cast(indices); + std::vector segment_offsets; + + if (weightType == numberTypeEnum::floatType) { + auto weights_array = reinterpret_cast(weights); + auto g = new experimental::graph_view_t(handle, + offsets_array, + indices_array, + weights_array, + segment_offsets, + num_vertices, + num_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); + graph_container.graph_ptr_union.graph_view_t_float_ptr = g; + graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float; + + } else { + auto weights_array = reinterpret_cast(weights); + auto g = new experimental::graph_view_t(handle, + offsets_array, + indices_array, + weights_array, + segment_offsets, + num_vertices, + num_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); + graph_container.graph_ptr_union.graph_view_t_double_ptr = g; + graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double; + } } } diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index bf057ce1f55..f82d7a3683b 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -200,19 +200,6 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": floatType "cugraph::cython::numberTypeEnum::floatType" doubleType "cugraph::cython::numberTypeEnum::doubleType" - # ctypedef enum graphTypeEnum: - # null "cugraph::cython::graphTypeEnum::null" - # GraphCSRViewFloat "cugraph::cython::graphTypeEnum::GraphCSRViewFloat" - # GraphCSRViewDouble "cugraph::cython::graphTypeEnum::GraphCSRViewDouble" - # graph_view_t_float "cugraph::cython::graphTypeEnum::graph_view_t_float" - # graph_view_t_double "cugraph::cython::graphTypeEnum::graph_view_t_double" - # graph_view_t_float_mg "cugraph::cython::graphTypeEnum::graph_view_t_float_mg" - # graph_view_t_double_mg "cugraph::cython::graphTypeEnum::graph_view_t_double_mg" - # graph_view_t_float_transposed "cugraph::cython::graphTypeEnum::graph_view_t_float_transposed" - # graph_view_t_double_transposed "cugraph::cython::graphTypeEnum::graph_view_t_double_transposed" - # graph_view_t_float_mg_transposed "cugraph::cython::graphTypeEnum::graph_view_t_float_mg_transposed" - # graph_view_t_double_mg_transposed "cugraph::cython::graphTypeEnum::graph_view_t_double_mg_transposed" - cdef struct graph_container_t: pass From 427da31e7dace80a42bb91fde0fd77a769f259c2 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 16 Sep 2020 02:46:03 -0500 Subject: [PATCH 45/74] C++ style fixes --- cpp/include/utilities/cython.hpp | 69 ++++++-------- cpp/src/utilities/cython.cpp | 154 ++++++++++++++++--------------- 2 files changed, 107 insertions(+), 116 deletions(-) diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 06e9912d8ce..8f3e8e7ee39 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -15,31 +15,29 @@ */ #pragma once -#include #include +#include #include namespace cugraph { namespace cython { -enum class numberTypeEnum : int { intType, - floatType, - doubleType -}; +enum class numberTypeEnum : int { intType, floatType, doubleType }; // FIXME: The GraphCSRView* types are not in use! Those are left in place in // case a legacy GraphCSRView class is needed, but these should be removed ASAP. -enum class graphTypeEnum : int { null, - GraphCSRViewFloat, - GraphCSRViewDouble, - graph_view_t_float, - graph_view_t_double, - graph_view_t_float_mg, - graph_view_t_double_mg, - graph_view_t_float_transposed, - graph_view_t_double_transposed, - graph_view_t_float_mg_transposed, - graph_view_t_double_mg_transposed +enum class graphTypeEnum : int { + null, + GraphCSRViewFloat, + GraphCSRViewDouble, + graph_view_t_float, + graph_view_t_double, + graph_view_t_float_mg, + graph_view_t_double_mg, + graph_view_t_float_transposed, + graph_view_t_double_transposed, + graph_view_t_float_mg_transposed, + graph_view_t_double_mg_transposed }; // "container" for a graph type instance which insulates the owner from the @@ -48,7 +46,6 @@ enum class graphTypeEnum : int { null, // greatly simplifies the Cython code since the Cython definition only needs to // define the container and not the various individual graph types in Cython. struct graph_container_t { - // FIXME: use std::variant (or a better alternative, ie. type erasure?) instead // of a union if possible union graphPtrUnion { @@ -65,44 +62,36 @@ struct graph_container_t { experimental::graph_view_t* graph_view_t_double_mg_transposed_ptr; }; - inline graph_container_t() : - graph_ptr_union{nullptr}, - graph_ptr_type{graphTypeEnum::null} {} + inline graph_container_t() : graph_ptr_union{nullptr}, graph_ptr_type{graphTypeEnum::null} {} - inline ~graph_container_t() { - switch(graph_ptr_type) { - case graphTypeEnum::GraphCSRViewFloat : - delete graph_ptr_union.GraphCSRViewFloatPtr; - break; - case graphTypeEnum::GraphCSRViewDouble : - delete graph_ptr_union.GraphCSRViewDoublePtr; - break; - case graphTypeEnum::graph_view_t_float : - delete graph_ptr_union.graph_view_t_float_ptr; - break; - case graphTypeEnum::graph_view_t_double : + inline ~graph_container_t() + { + switch (graph_ptr_type) { + case graphTypeEnum::GraphCSRViewFloat: delete graph_ptr_union.GraphCSRViewFloatPtr; break; + case graphTypeEnum::GraphCSRViewDouble: delete graph_ptr_union.GraphCSRViewDoublePtr; break; + case graphTypeEnum::graph_view_t_float: delete graph_ptr_union.graph_view_t_float_ptr; break; + case graphTypeEnum::graph_view_t_double: delete graph_ptr_union.graph_view_t_double_ptr; break; - case graphTypeEnum::graph_view_t_float_mg : + case graphTypeEnum::graph_view_t_float_mg: delete graph_ptr_union.graph_view_t_float_mg_ptr; break; - case graphTypeEnum::graph_view_t_double_mg : + case graphTypeEnum::graph_view_t_double_mg: delete graph_ptr_union.graph_view_t_double_mg_ptr; break; - case graphTypeEnum::graph_view_t_float_transposed : + case graphTypeEnum::graph_view_t_float_transposed: delete graph_ptr_union.graph_view_t_float_transposed_ptr; break; - case graphTypeEnum::graph_view_t_double_transposed : + case graphTypeEnum::graph_view_t_double_transposed: delete graph_ptr_union.graph_view_t_double_transposed_ptr; break; - case graphTypeEnum::graph_view_t_float_mg_transposed : + case graphTypeEnum::graph_view_t_float_mg_transposed: delete graph_ptr_union.graph_view_t_float_mg_transposed_ptr; break; - case graphTypeEnum::graph_view_t_double_mg_transposed : + case graphTypeEnum::graph_view_t_double_mg_transposed: delete graph_ptr_union.graph_view_t_double_mg_transposed_ptr; break; - default : - break; + default: break; } graph_ptr_type = graphTypeEnum::null; } diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index 8b9960464fe..ca6cf1687da 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -15,10 +15,10 @@ */ #include -#include -#include #include +#include #include +#include namespace cugraph { namespace cython { @@ -44,32 +44,29 @@ void create_graph_t(graph_container_t& graph_container, bool transposed, bool multi_gpu) { - // FIXME: This is soon-to-be legacy code left in place until the new graph_t // class is supported everywhere else. Remove everything down to the comment // line after the return stmnt. // Keep new code below return stmnt enabled to ensure it builds. if (weightType == numberTypeEnum::floatType) { - auto g = new GraphCSRView( - reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_vertices, - num_edges); + auto g = new GraphCSRView(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); graph_container.graph_ptr_union.GraphCSRViewFloatPtr = g; - graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; + graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; g->set_local_data(local_vertices, local_edges, local_offsets); g->set_handle(const_cast(&handle)); } else { - auto g = new GraphCSRView( - reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_vertices, - num_edges); + auto g = new GraphCSRView(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); graph_container.graph_ptr_union.GraphCSRViewDoublePtr = g; - graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; + graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; g->set_local_data(local_vertices, local_edges, local_offsets); g->set_handle(const_cast(&handle)); } @@ -79,7 +76,7 @@ void create_graph_t(graph_container_t& graph_container, bool do_expensive_check{false}; bool sorted_by_global_degree_within_vertex_partition{false}; - experimental::graph_properties_t graph_props{.is_symmetric=false, .is_multigraph=false}; + experimental::graph_properties_t graph_props{.is_symmetric = false, .is_multigraph = false}; if (multi_gpu) { std::vector adjmatrix_partition_offsets_vect; @@ -90,36 +87,37 @@ void create_graph_t(graph_container_t& graph_container, if (weightType == numberTypeEnum::floatType) { std::vector adjmatrix_partition_weights_vect; - auto g = new experimental::graph_view_t(handle, - adjmatrix_partition_offsets_vect, - adjmatrix_partition_indices_vect, - adjmatrix_partition_weights_vect, - vertex_partition_segment_offsets_vect, - partition, - num_vertices, - num_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); + auto g = new experimental::graph_view_t( + handle, + adjmatrix_partition_offsets_vect, + adjmatrix_partition_indices_vect, + adjmatrix_partition_weights_vect, + vertex_partition_segment_offsets_vect, + partition, + num_vertices, + num_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); graph_container.graph_ptr_union.graph_view_t_float_mg_ptr = g; graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float_mg; } else { std::vector adjmatrix_partition_weights_vect; - auto g = new experimental::graph_view_t(handle, - adjmatrix_partition_offsets_vect, - adjmatrix_partition_indices_vect, - adjmatrix_partition_weights_vect, - vertex_partition_segment_offsets_vect, - partition, - num_vertices, - num_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); + auto g = new experimental::graph_view_t( + handle, + adjmatrix_partition_offsets_vect, + adjmatrix_partition_indices_vect, + adjmatrix_partition_weights_vect, + vertex_partition_segment_offsets_vect, + partition, + num_vertices, + num_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); graph_container.graph_ptr_union.graph_view_t_double_mg_ptr = g; graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double_mg; - } } else { @@ -128,34 +126,36 @@ void create_graph_t(graph_container_t& graph_container, std::vector segment_offsets; if (weightType == numberTypeEnum::floatType) { - auto weights_array = reinterpret_cast(weights); - auto g = new experimental::graph_view_t(handle, - offsets_array, - indices_array, - weights_array, - segment_offsets, - num_vertices, - num_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); + auto weights_array = reinterpret_cast(weights); + auto g = new experimental::graph_view_t( + handle, + offsets_array, + indices_array, + weights_array, + segment_offsets, + num_vertices, + num_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); graph_container.graph_ptr_union.graph_view_t_float_ptr = g; - graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float; + graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float; } else { auto weights_array = reinterpret_cast(weights); - auto g = new experimental::graph_view_t(handle, - offsets_array, - indices_array, - weights_array, - segment_offsets, - num_vertices, - num_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); + auto g = new experimental::graph_view_t( + handle, + offsets_array, + indices_array, + weights_array, + segment_offsets, + num_vertices, + num_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); graph_container.graph_ptr_union.graph_view_t_double_ptr = g; - graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double; + graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double; } } } @@ -171,19 +171,21 @@ weight_t call_louvain(raft::handle_t const& handle, weight_t final_modularity; if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { - std::pair results = louvain(handle, - *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), - parts, - max_level, - static_cast(resolution)); - final_modularity = results.second; + std::pair results = + louvain(handle, + *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), + parts, + max_level, + static_cast(resolution)); + final_modularity = results.second; } else { - std::pair results = louvain(handle, - *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), - parts, - max_level, - static_cast(resolution)); - final_modularity = results.second; + std::pair results = + louvain(handle, + *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), + parts, + max_level, + static_cast(resolution)); + final_modularity = results.second; } return final_modularity; From 9ba67f6b47da95c4e548c0278b34810442bcf077 Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Wed, 16 Sep 2020 14:45:41 -0500 Subject: [PATCH 46/74] Addressed code reviews on key_naming_t. --- cpp/include/partition_manager.hpp | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) diff --git a/cpp/include/partition_manager.hpp b/cpp/include/partition_manager.hpp index b817931e770..91b8b6b87a6 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/partition_manager.hpp @@ -41,24 +41,11 @@ std::string to_string(from_t const& value) struct key_naming_t { // simplified key (one per all row subcomms / one per all column sub-comms): // - key_naming_t(std::string const& row_suffix = std::string("_p_row"), - std::string const& col_suffix = std::string("_p_col"), - std::string const& prefix = std::string("comm")) - : row_suffix_(row_suffix), col_suffix_(col_suffix), prefix_(prefix), name_(prefix_) - { - } - - // more involved key naming, using row/col indices: - // - key_naming_t(int row_indx, - int col_indx, - std::string const& row_suffix = std::string("_p_row"), - std::string const& col_suffix = std::string("_p_col"), - std::string const& prefix = std::string("comm")) - : row_suffix_(row_suffix), - col_suffix_(col_suffix), - prefix_(prefix), - name_(prefix_ + "_" + to_string(row_indx) + "_" + to_string(col_indx)) + key_naming_t(void) + : row_suffix_(std::string("_p_row")), + col_suffix_(std::string("_p_col")), + prefix_(std::string("comm")), + name_(prefix_) { } From 9b948db870372960d869b112c78fb3b3882871ab Mon Sep 17 00:00:00 2001 From: Andrei Schaffer Date: Wed, 16 Sep 2020 15:06:36 -0500 Subject: [PATCH 47/74] Addressed review on prefix_ redundancy in key_naming_t. --- cpp/include/partition_manager.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/include/partition_manager.hpp b/cpp/include/partition_manager.hpp index 91b8b6b87a6..738d58af6a3 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/partition_manager.hpp @@ -44,8 +44,7 @@ struct key_naming_t { key_naming_t(void) : row_suffix_(std::string("_p_row")), col_suffix_(std::string("_p_col")), - prefix_(std::string("comm")), - name_(prefix_) + name_(std::string("comm")) { } @@ -56,7 +55,6 @@ struct key_naming_t { private: std::string const row_suffix_; std::string const col_suffix_; - std::string const prefix_; std::string name_; }; From 50446dd79d073a13fbe66ea9b266f60238116a12 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 16 Sep 2020 15:12:07 -0500 Subject: [PATCH 48/74] Addressed review feedback: passing const references for the container, added documentation, added deleted copy and assignment operators, added more legacy graph types for future expansion during the transition. --- cpp/include/utilities/cython.hpp | 109 ++++++++++++++---- cpp/src/utilities/cython.cpp | 59 ++++++---- python/cugraph/dask/community/louvain.pxd | 4 +- .../dask/community/louvain_wrapper.pyx | 32 +++-- python/cugraph/structure/graph_primtypes.pxd | 2 +- 5 files changed, 143 insertions(+), 63 deletions(-) diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 8f3e8e7ee39..c2c70c2f2fe 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -24,12 +24,17 @@ namespace cython { enum class numberTypeEnum : int { intType, floatType, doubleType }; -// FIXME: The GraphCSRView* types are not in use! Those are left in place in -// case a legacy GraphCSRView class is needed, but these should be removed ASAP. +// FIXME: The GraphC??View* types will not be used in the near future. Those are +// left in place as cython wrappers transition from the GraphC* classes to +// graph_* classes. Remove GraphC* classes once the transition is complete. enum class graphTypeEnum : int { null, GraphCSRViewFloat, GraphCSRViewDouble, + GraphCSCViewFloat, + GraphCSCViewDouble, + GraphCOOViewFloat, + GraphCOOViewDouble, graph_view_t_float, graph_view_t_double, graph_view_t_float_mg, @@ -52,6 +57,10 @@ struct graph_container_t { void* null; GraphCSRView* GraphCSRViewFloatPtr; GraphCSRView* GraphCSRViewDoublePtr; + GraphCSCView* GraphCSCViewFloatPtr; + GraphCSCView* GraphCSCViewDoublePtr; + GraphCOOView* GraphCOOViewFloatPtr; + GraphCOOView* GraphCOOViewDoublePtr; experimental::graph_view_t* graph_view_t_float_ptr; experimental::graph_view_t* graph_view_t_double_ptr; experimental::graph_view_t* graph_view_t_float_mg_ptr; @@ -64,11 +73,24 @@ struct graph_container_t { inline graph_container_t() : graph_ptr_union{nullptr}, graph_ptr_type{graphTypeEnum::null} {} + // The expected usage of a graph_container_t is for it to be created as part + // of a cython wrapper simply for passing a templated instantiation of a + // particular graph class from one call to another, and not to exist outside + // of the individual wrapper function (deleted when the instance goes out of + // scope once the wrapper function returns). Therefore, copys and assignments + // to an instance are not supported and these methods are deleted. + graph_container_t(const graph_container_t&) = delete; + graph_container_t& operator=(const graph_container_t&) = delete; + inline ~graph_container_t() { switch (graph_ptr_type) { case graphTypeEnum::GraphCSRViewFloat: delete graph_ptr_union.GraphCSRViewFloatPtr; break; case graphTypeEnum::GraphCSRViewDouble: delete graph_ptr_union.GraphCSRViewDoublePtr; break; + case graphTypeEnum::GraphCSCViewFloat: delete graph_ptr_union.GraphCSCViewFloatPtr; break; + case graphTypeEnum::GraphCSCViewDouble: delete graph_ptr_union.GraphCSCViewDoublePtr; break; + case graphTypeEnum::GraphCOOViewFloat: delete graph_ptr_union.GraphCOOViewFloatPtr; break; + case graphTypeEnum::GraphCOOViewDouble: delete graph_ptr_union.GraphCOOViewDoublePtr; break; case graphTypeEnum::graph_view_t_float: delete graph_ptr_union.graph_view_t_float_ptr; break; case graphTypeEnum::graph_view_t_double: delete graph_ptr_union.graph_view_t_double_ptr; @@ -100,29 +122,76 @@ struct graph_container_t { graphTypeEnum graph_ptr_type; }; -// Factory function for creating graph containers from basic types +// Factory function for populating an empty graph container with a new graph +// object from basic types, and sets the corresponding meta-data. Args are: +// +// graph_container_t& graph_container +// Reference to the graph_container_t instance to +// populate. populate_graph_container() can only be called on an "empty" +// container (ie. a container that has not been previously populated by +// populate_graph_container()) +// +// raft::handle_t const& handle +// Raft handle to be set on the new graph instance in the container +// +// void* offsets, indices, weights +// Pointer to an array of values representing offsets, indices, and weights +// respectively. The value types of the array are specified using +// numberTypeEnum values separately (see below) +// +// numberTypeEnum offsetType, indexType, weightType +// numberTypeEnum enum value describing the data type for the offsets, +// indices, and weights arrays respectively. These enum values are used to +// instantiate the proper templated graph type and for casting the arrays +// accordingly. +// +// int num_vertices, num_edges +// The number of vertices and edges respectively in the graph represented by +// the above arrays. +// +// int* local_vertices, local_edges +// Arrays containing the subset of vertices and edges respectively, used when +// the resulting graph object is applied to a distributed/MG algorithm. +// NOTE: these parameters are only needed for legacy GraphC??View* classes and +// may not be present in future versions. +// +// int* local_offsets +// Array containing the offsets between the local_* arrays and those for the +// full graph, allowing the array to start at position zero yet still be +// mapped to a position in the full array. +// NOTE: this parameter is only needed for legacy GraphC??View* classes and +// may not be present in future versions. +// +// bool transposed +// true if the resulting graph object should store a transposed adjacency +// matrix +// +// bool multi_gpu +// true if the resulting graph object is to be used for a multi-gpu +// application +// // FIXME: Should local_* values be void* as well? -void create_graph_t(graph_container_t& graph_container, - raft::handle_t const& handle, - void* offsets, - void* indices, - void* weights, - numberTypeEnum offsetType, - numberTypeEnum indexType, - numberTypeEnum weightType, - int num_vertices, - int num_edges, - int* local_vertices, - int* local_edges, - int* local_offsets, - bool transposed, - bool multi_gpu); +void populate_graph_container(graph_container_t& graph_container, + raft::handle_t const& handle, + void* offsets, + void* indices, + void* weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + int num_vertices, + int num_edges, + int* local_vertices, + int* local_edges, + int* local_offsets, + bool transposed, + bool multi_gpu); // Wrapper for calling Louvain using a graph container template weight_t call_louvain(raft::handle_t const& handle, - graph_container_t& graph_container, - int* parts, + graph_container_t const& graph_container, + void* parts, size_t max_level, weight_t resolution); diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index ca6cf1687da..551863f4b56 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -19,6 +19,7 @@ #include #include #include +#include namespace cugraph { namespace cython { @@ -28,26 +29,34 @@ namespace cython { // assumed it will delete it on destruction. // // FIXME: Should local_* values be void* as well? -void create_graph_t(graph_container_t& graph_container, - raft::handle_t const& handle, - void* offsets, - void* indices, - void* weights, - numberTypeEnum offsetType, - numberTypeEnum indexType, - numberTypeEnum weightType, - int num_vertices, - int num_edges, - int* local_vertices, - int* local_edges, - int* local_offsets, - bool transposed, - bool multi_gpu) +void populate_graph_container(graph_container_t& graph_container, + raft::handle_t const& handle, + void* offsets, + void* indices, + void* weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + int num_vertices, + int num_edges, + int* local_vertices, + int* local_edges, + int* local_offsets, + bool transposed, + bool multi_gpu) { + + CUGRAPH_EXPECTS(graph_container.graph_ptr_type == graphTypeEnum::null, + "populate_graph_container() can only be called on an empty container."); + // FIXME: This is soon-to-be legacy code left in place until the new graph_t // class is supported everywhere else. Remove everything down to the comment // line after the return stmnt. // Keep new code below return stmnt enabled to ensure it builds. + // + // FIXME: This is hardcoded to crete CSR types. Consider passing an additional + // arg (enum?) to this function to allow the caller to specify CSC or COO + // types as well when needed. if (weightType == numberTypeEnum::floatType) { auto g = new GraphCSRView(reinterpret_cast(offsets), reinterpret_cast(indices), @@ -163,18 +172,22 @@ void create_graph_t(graph_container_t& graph_container, // Wrapper for calling Louvain using a graph container template weight_t call_louvain(raft::handle_t const& handle, - graph_container_t& graph_container, - int* parts, + graph_container_t const& graph_container, + void* parts, size_t max_level, weight_t resolution) { weight_t final_modularity; + // FIXME: the only graph types currently in the container have ints for + // vertex_t and edge_t types. In the future, additional types for vertices and + // edges will be available, and when that happens, additional castings will be + // needed for the 'parts' arg in particular. For now, it is hardcoded to int. if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { std::pair results = louvain(handle, *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), - parts, + reinterpret_cast(parts), max_level, static_cast(resolution)); final_modularity = results.second; @@ -182,7 +195,7 @@ weight_t call_louvain(raft::handle_t const& handle, std::pair results = louvain(handle, *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), - parts, + reinterpret_cast(parts), max_level, static_cast(resolution)); final_modularity = results.second; @@ -193,14 +206,14 @@ weight_t call_louvain(raft::handle_t const& handle, // Explicit instantiations template float call_louvain(raft::handle_t const& handle, - graph_container_t& graph_container, - int* parts, + graph_container_t const& graph_container, + void* parts, size_t max_level, float resolution); template double call_louvain(raft::handle_t const& handle, - graph_container_t& graph_container, - int* parts, + graph_container_t const& graph_container, + void* parts, size_t max_level, double resolution); diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index e86942eedb1..13f6dcc8434 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -22,7 +22,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": cdef weight_t call_louvain[weight_t]( const handle_t &handle, - graph_container_t &g, - int *parts, + const graph_container_t &g, + void *parts, size_t max_level, weight_t resolution) except + diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index c3588d9bda7..b59682b85c4 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -89,35 +89,33 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): cdef double final_modularity_double = 1.0 cdef int num_level = 0 + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + weightTypeMap = {np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + cdef graph_container_t graph_container - # FIXME: This dict should not be needed, instead update create_graph_t() to - # take weights.dtype directly - # FIXME: Offsets and indices should also be void*, and have corresponding - # dtypes passed to create_graph_t() # FIXME: The excessive casting for the enum arg is needed to make cython # understand how to pass the enum value (this is the same pattern # used by cudf). This will not be needed with Cython 3.0 - weightTypeMap = {np.dtype("float32") : numberTypeEnum.floatType, - np.dtype("double") : numberTypeEnum.doubleType} - - create_graph_t(graph_container, handle_[0], - c_offsets, c_indices, c_weights, - ((numberTypeEnum.intType)), - ((numberTypeEnum.intType)), - ((weightTypeMap[weights.dtype])), - num_verts, num_local_edges, - c_local_verts, c_local_edges, c_local_offsets, - False, True) # store_transposed, multi_gpu + populate_graph_container(graph_container, handle_[0], + c_offsets, c_indices, c_weights, + ((numberTypeEnum.intType)), + ((numberTypeEnum.intType)), + ((weightTypeMap[weights.dtype])), + num_verts, num_local_edges, + c_local_verts, c_local_edges, c_local_offsets, + False, True) # store_transposed, multi_gpu if weights.dtype == np.float32: final_modularity_float = c_louvain.call_louvain[float]( - handle_[0], graph_container, c_partition, max_level, resolution) + handle_[0], graph_container, c_partition, max_level, resolution) final_modularity = final_modularity_float else: final_modularity_double = c_louvain.call_louvain[double]( - handle_[0], graph_container, c_partition, max_level, resolution) + handle_[0], graph_container, c_partition, max_level, resolution) final_modularity = final_modularity_double return df, final_modularity diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index f82d7a3683b..66e06e26b62 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -203,7 +203,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": cdef struct graph_container_t: pass - cdef void create_graph_t( + cdef void populate_graph_container( graph_container_t &graph_container, const handle_t &handle, void *offsets, From 7228ab27346268c8b6a040554b324a0f9690877c Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 16 Sep 2020 15:33:27 -0500 Subject: [PATCH 49/74] C++ style check fix --- cpp/src/utilities/cython.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index 551863f4b56..ce36414e079 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -45,7 +45,6 @@ void populate_graph_container(graph_container_t& graph_container, bool transposed, bool multi_gpu) { - CUGRAPH_EXPECTS(graph_container.graph_ptr_type == graphTypeEnum::null, "populate_graph_container() can only be called on an empty container."); From 82d2a3d6f1a6f0c8300652973b39601454e727c0 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Wed, 16 Sep 2020 16:20:20 -0500 Subject: [PATCH 50/74] Fixed description of local_* parameters. --- cpp/include/utilities/cython.hpp | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index c2c70c2f2fe..4b219bd2fca 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -150,15 +150,17 @@ struct graph_container_t { // the above arrays. // // int* local_vertices, local_edges -// Arrays containing the subset of vertices and edges respectively, used when -// the resulting graph object is applied to a distributed/MG algorithm. +// Arrays containing the number of vertices and number of edges, +// respectively. For example, if there are a total of 7 vertices, 16 edges, +// and the algorithm is distributed over 3 GPUs, the local_vertices may contain +// [2,2,3] and local_edges may contain [5,5,6]. // NOTE: these parameters are only needed for legacy GraphC??View* classes and // may not be present in future versions. // // int* local_offsets // Array containing the offsets between the local_* arrays and those for the -// full graph, allowing the array to start at position zero yet still be -// mapped to a position in the full array. +// global graph, allowing the array to start at position zero yet still be +// mapped to a position in the global array. // NOTE: this parameter is only needed for legacy GraphC??View* classes and // may not be present in future versions. // From 239481a05e2a41bd83018c368d697f732b332a83 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 18 Sep 2020 14:37:22 -0400 Subject: [PATCH 51/74] [REVIEW] update RAFT git tag (#1150) * update RAFT version * update change log --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 682c3607f7b..b28724a1add 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -16,6 +16,7 @@ ## Bug Fixes - PR #1131 Show style checker errors with set +e +- PR #1150 Update RAFT git tag # cuGraph 0.15.0 (26 Aug 2020) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c867f286ae3..533fc54ec0a 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -284,7 +284,7 @@ else(DEFINED ENV{RAFT_PATH}) ExternalProject_Add(raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 099e2b874b05555a78bed1666fa2d22f784e56a7 + GIT_TAG 516106e3b515b25c863776fcc51fb12df6c0a186 PREFIX ${RAFT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" From d03fab7454dadb9a19d254e1a99d39707439f922 Mon Sep 17 00:00:00 2001 From: Keith Kraus Date: Mon, 21 Sep 2020 16:35:23 -0400 Subject: [PATCH 52/74] [REVIEW] Remove RMM library dependency and CXX11 ABI handling (#1155) * remove RMM_LIBRARY and CXX11 ABI handling * changelog * formatting fix --- CHANGELOG.md | 2 ++ SOURCEBUILD.md | 17 --------- build.sh | 4 +-- cpp/CMakeLists.txt | 40 ++------------------- cpp/cmake/Modules/ConfigureArrow.cmake | 8 ----- cpp/cmake/Modules/ConfigureGoogleTest.cmake | 10 ------ cpp/tests/CMakeLists.txt | 6 ++-- cpp/tests/community/leiden_test.cpp | 2 -- 8 files changed, 9 insertions(+), 80 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b28724a1add..5cc739964b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,6 +17,8 @@ ## Bug Fixes - PR #1131 Show style checker errors with set +e - PR #1150 Update RAFT git tag +- PR #1155 Remove RMM library dependency and CXX11 ABI handling + # cuGraph 0.15.0 (26 Aug 2020) diff --git a/SOURCEBUILD.md b/SOURCEBUILD.md index 29aa20ad522..8acd90c4f7f 100644 --- a/SOURCEBUILD.md +++ b/SOURCEBUILD.md @@ -244,22 +244,5 @@ unset LD_LIBRARY_PATH Python API documentation can be generated from [docs](docs) directory. -## C++ ABI issues - -cuGraph builds with C++14 features. By default, we build cuGraph with the latest ABI (the ABI changed with C++11). The version of cuDF pointed to in -the conda installation above is build with the new ABI. - -If you see link errors indicating trouble finding functions that use C++ strings when trying to build cuGraph you may have an ABI incompatibility. - -There are a couple of complications that may make this a problem: -* if you need to link in a library built with the old ABI, you may need to build the entire tool chain from source using the old ABI. -* if you build cudf from source (for whatever reason), the default behavior for cudf (at least through version 0.5.x) is to build using the old ABI. You can build with the new ABI, but you need to follow the instructions in CUDF to explicitly turn that on. - -If you must build cugraph with the old ABI, you can use the following command (instead of the cmake call above): - -```bash -cmake .. -DCMAKE_INSTALL_PREFIX=$CONDA_PREFIX -DCMAKE_CXX11_ABI=OFF -``` - ## Attribution Portions adopted from https://github.com/pytorch/pytorch/blob/master/CONTRIBUTING.md diff --git a/build.sh b/build.sh index e0557344384..ae3ad575227 100755 --- a/build.sh +++ b/build.sh @@ -105,7 +105,6 @@ if (( ${NUMARGS} == 0 )) || hasArg libcugraph; then mkdir -p ${LIBCUGRAPH_BUILD_DIR} cd ${LIBCUGRAPH_BUILD_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ - -DCMAKE_CXX11_ABI=${BUILD_ABI} \ -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} .. make -j${PARALLEL_LEVEL} VERBOSE=${VERBOSE} ${INSTALL_TARGET} @@ -131,8 +130,7 @@ if (( ${NUMARGS} == 0 )) || hasArg docs; then mkdir -p ${LIBCUGRAPH_BUILD_DIR} cd ${LIBCUGRAPH_BUILD_DIR} cmake -DCMAKE_INSTALL_PREFIX=${INSTALL_PREFIX} \ - -DCMAKE_CXX11_ABI=${BUILD_ABI} \ - -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ + -DDISABLE_DEPRECATION_WARNING=${BUILD_DISABLE_DEPRECATION_WARNING} \ -DCMAKE_BUILD_TYPE=${BUILD_TYPE} .. fi diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 533fc54ec0a..52faf34be01 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -46,29 +46,6 @@ set(CMAKE_CUDA_STANDARD_REQUIRED ON) if(CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations") - -################################################################################################### -### C++ ABI changes. -### -### By default, cugraph builds with the new C++ ABI. In order to insure that thirdparty -### applications build with the properly setting (specifically RMM) we need to set -### the CMAKE_CXX11_ABI flag appropriately. -### -### If a user wants to build with the OLD ABI, then they need to define CMAKE_CXX11_ABI -### to be OFF (typically on the cmake command line). -### -### This block of code will configure the old ABI if the flag is set to OFF and -### do nothing (the default behavior of the C++14 compiler). -### - option(CMAKE_CXX11_ABI "Enable the GLIBCXX11 ABI" ON) - if(CMAKE_CXX11_ABI) - message(STATUS "CUGRAPH: Enabling the GLIBCXX11 ABI") - else() - message(STATUS "CUGRAPH: Disabling the GLIBCXX11 ABI") - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -D_GLIBCXX_USE_CXX11_ABI=0") - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -D_GLIBCXX_USE_CXX11_ABI=0") - endif(CMAKE_CXX11_ABI) endif(CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") @@ -172,19 +149,8 @@ find_path(RMM_INCLUDE "rmm" "$ENV{CONDA_PREFIX}/include/rmm" "$ENV{CONDA_PREFIX}/include") -find_library(RMM_LIBRARY "rmm" - HINTS - "$ENV{RMM_ROOT}/lib" - "$ENV{CONDA_PREFIX}/lib") - -message(STATUS "RMM: RMM_LIBRARY set to ${RMM_LIBRARY}") message(STATUS "RMM: RMM_INCLUDE set to ${RMM_INCLUDE}") -add_library(rmm SHARED IMPORTED ${RMM_LIBRARY}) -if (RMM_INCLUDE AND RMM_LIBRARY) - set_target_properties(rmm PROPERTIES IMPORTED_LOCATION ${RMM_LIBRARY}) -endif (RMM_INCLUDE AND RMM_LIBRARY) - ################################################################################################### # - Fetch Content --------------------------------------------------------------------------------- include(FetchContent) @@ -372,7 +338,7 @@ target_include_directories(cugraph # - link libraries -------------------------------------------------------------------------------- target_link_libraries(cugraph PRIVATE - ${RMM_LIBRARY} gunrock cublas cusparse curand cusolver cudart cuda ${LIBCYPHERPARSER_LIBRARY} ${MPI_CXX_LIBRARIES} ${NCCL_LIBRARIES}) + gunrock cublas cusparse curand cusolver cudart cuda ${LIBCYPHERPARSER_LIBRARY} ${MPI_CXX_LIBRARIES} ${NCCL_LIBRARIES}) if(OpenMP_CXX_FOUND) target_link_libraries(cugraph PRIVATE @@ -385,7 +351,7 @@ target_link_libraries(cugraph PRIVATE ### ... ### ### libgomp.so is included in the conda base environment and copied to every new conda -### environment. If a full file path is provided (e.g ${RMM_LIBRARY}), cmake +### environment. If a full file path is provided (e.g ${NCCL_LIBRARIES}), cmake ### extracts the directory path and adds the directory path to BUILD_RPATH (if BUILD_RPATH is not ### disabled). ### @@ -398,7 +364,7 @@ target_link_libraries(cugraph PRIVATE ### If a full path to libgomp.so is provided (which is the case with OpenMP::OpenMP_CXX), cmake ### checks whether there is any other libgomp.so with the different full path (after resolving ### soft links) in the search paths (implicit directoires + BUILD_RAPTH). There is one in the -### path included in BUILD_RPATH when ${RMM_LIBRARY} are added; this one can +### path included in BUILD_RPATH when ${NCCL_LIBRARIES} are added; this one can ### potentially hide the one in the provided full path and cmake generates a warning (and RPATH ### is searched before the directories in /etc/ld.so/conf; ld.so.conf does not coincide but ### overlaps with implicit directories). diff --git a/cpp/cmake/Modules/ConfigureArrow.cmake b/cpp/cmake/Modules/ConfigureArrow.cmake index 647f335959e..b27e53dd415 100644 --- a/cpp/cmake/Modules/ConfigureArrow.cmake +++ b/cpp/cmake/Modules/ConfigureArrow.cmake @@ -21,14 +21,6 @@ set(ARROW_CMAKE_ARGS " -DARROW_WITH_LZ4=OFF" " -DARROW_USE_GLOG=OFF" " -DCMAKE_VERBOSE_MAKEFILE=ON") -if(NOT CMAKE_CXX11_ABI) - message(STATUS "ARROW: Disabling the GLIBCXX11 ABI") - list(APPEND ARROW_CMAKE_ARGS " -DARROW_TENSORFLOW=ON") -elseif(CMAKE_CXX11_ABI) - message(STATUS "ARROW: Enabling the GLIBCXX11 ABI") - list(APPEND ARROW_CMAKE_ARGS " -DARROW_TENSORFLOW=OFF") -endif(NOT CMAKE_CXX11_ABI) - configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/Arrow.CMakeLists.txt.cmake" "${ARROW_ROOT}/CMakeLists.txt") diff --git a/cpp/cmake/Modules/ConfigureGoogleTest.cmake b/cpp/cmake/Modules/ConfigureGoogleTest.cmake index d62bee2b198..9fac40f4649 100644 --- a/cpp/cmake/Modules/ConfigureGoogleTest.cmake +++ b/cpp/cmake/Modules/ConfigureGoogleTest.cmake @@ -4,16 +4,6 @@ set(GTEST_CMAKE_ARGS "") #" -Dgtest_build_samples=ON" #" -DCMAKE_VERBOSE_MAKEFILE=ON") -if(NOT CMAKE_CXX11_ABI) - message(STATUS "GTEST: Disabling the GLIBCXX11 ABI") - list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_C_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0") - list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=0") -elseif(CMAKE_CXX11_ABI) - message(STATUS "GTEST: Enabling the GLIBCXX11 ABI") - list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_C_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=1") - list(APPEND GTEST_CMAKE_ARGS " -DCMAKE_CXX_FLAGS=-D_GLIBCXX_USE_CXX11_ABI=1") -endif(NOT CMAKE_CXX11_ABI) - configure_file("${CMAKE_SOURCE_DIR}/cmake/Templates/GoogleTest.CMakeLists.txt.cmake" "${GTEST_ROOT}/CMakeLists.txt") diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 6c5803dc008..1758dce30c3 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -46,7 +46,7 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE - gtest gmock_main gmock cugraph ${CUDF_LIBRARY} ${RMM_LIBRARY} ${CMAKE_EXTRA_LIBS} ${NCCL_LIBRARIES} cudart cuda cublas cusparse cusolver curand) + gtest gmock_main gmock cugraph ${CUDF_LIBRARY} ${CMAKE_EXTRA_LIBS} ${NCCL_LIBRARIES} cudart cuda cublas cusparse cusolver curand) if(OpenMP_CXX_FOUND) target_link_libraries(${CMAKE_TEST_NAME} PRIVATE @@ -59,7 +59,7 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) ### ... ### ### libgomp.so is included in the conda base environment and copied to every new conda -### environment. If a full file path is provided (e.g ${CUDF_LIBRARY} and ${RMM_LIBRARY}), cmake +### environment. If a full file path is provided (e.g ${CUDF_LIBRARY}), cmake ### extracts the directory path and adds the directory path to BUILD_RPATH (if BUILD_RPATH is not ### disabled). ### @@ -72,7 +72,7 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) ### If a full path to libgomp.so is provided (which is the case with OpenMP::OpenMP_CXX), cmake ### checks whether there is any other libgomp.so with the different full path (after resolving ### soft links) in the search paths (implicit directoires + BUILD_RAPTH). There is one in the -### path included in BUILD_RPATH when ${CUDF_LIBRARY} and ${RMM_LIBRARY} are added; this one can +### path included in BUILD_RPATH when ${CUDF_LIBRARY} is added; this one can ### potentially hide the one in the provided full path and cmake generates a warning (and RPATH ### is searched before the directories in /etc/ld.so/conf; ld.so.conf does not coincide but ### overlaps with implicit directories). diff --git a/cpp/tests/community/leiden_test.cpp b/cpp/tests/community/leiden_test.cpp index 2d4acf3765d..764ab8bf6cb 100644 --- a/cpp/tests/community/leiden_test.cpp +++ b/cpp/tests/community/leiden_test.cpp @@ -17,8 +17,6 @@ #include -#include - TEST(leiden_karate, success) { std::vector off_h = {0, 16, 25, 35, 41, 44, 48, 52, 56, 61, 63, 66, From 20236eecd35f498e64810372db80be5458be37b3 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Tue, 22 Sep 2020 07:33:44 -0500 Subject: [PATCH 53/74] [ENG] Graph container cleanup, added arg for instantiating legacy types and switch statements to factory function (#1152) * Minor update to comment to describe array sizes. * Changed graph container to use smart pointers, added arg for instantiating legacy types and switch statements for it to factory function. * Added PR 1152 to CHANGELOG.md * Removing unnecessary .get() call on unique_ptr instance * Using make_unique() instead of new * Updated to call drop() correctly after cudf API update. Co-authored-by: Rick Ratzel --- CHANGELOG.md | 1 + cpp/include/utilities/cython.hpp | 91 ++++++------- cpp/src/utilities/cython.cpp | 122 +++++++++++++----- .../community/subgraph_extraction_wrapper.pyx | 4 +- .../dask/community/louvain_wrapper.pyx | 4 +- python/cugraph/structure/graph_primtypes.pxd | 6 + python/cugraph/structure/hypergraph.py | 6 +- 7 files changed, 147 insertions(+), 87 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5cc739964b1..383867e2a6c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ - PR #1129 Refactored test to use common dataset and added additional doc pages - PR #1144 updated documentation and APIs - PR #1139 MNMG Louvain Python updates, Cython cleanup +- PR #1152 graph container cleanup, added arg for instantiating legacy types and switch statements to factory function ## Bug Fixes - PR #1131 Show style checker errors with set +e diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 4b219bd2fca..cda8c52c1d8 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -45,6 +45,9 @@ enum class graphTypeEnum : int { graph_view_t_double_mg_transposed }; +// Enum for the high-level type of GraphC??View* class to instantiate. +enum class legacyGraphTypeEnum : int { CSR, CSC, COO }; + // "container" for a graph type instance which insulates the owner from the // specifics of the actual graph type. This is intended to be used in Cython // code that only needs to pass a graph object to another wrapped C++ API. This @@ -54,24 +57,35 @@ struct graph_container_t { // FIXME: use std::variant (or a better alternative, ie. type erasure?) instead // of a union if possible union graphPtrUnion { + ~graphPtrUnion() {} + void* null; - GraphCSRView* GraphCSRViewFloatPtr; - GraphCSRView* GraphCSRViewDoublePtr; - GraphCSCView* GraphCSCViewFloatPtr; - GraphCSCView* GraphCSCViewDoublePtr; - GraphCOOView* GraphCOOViewFloatPtr; - GraphCOOView* GraphCOOViewDoublePtr; - experimental::graph_view_t* graph_view_t_float_ptr; - experimental::graph_view_t* graph_view_t_double_ptr; - experimental::graph_view_t* graph_view_t_float_mg_ptr; - experimental::graph_view_t* graph_view_t_double_mg_ptr; - experimental::graph_view_t* graph_view_t_float_transposed_ptr; - experimental::graph_view_t* graph_view_t_double_transposed_ptr; - experimental::graph_view_t* graph_view_t_float_mg_transposed_ptr; - experimental::graph_view_t* graph_view_t_double_mg_transposed_ptr; + std::unique_ptr> GraphCSRViewFloatPtr; + std::unique_ptr> GraphCSRViewDoublePtr; + std::unique_ptr> GraphCSCViewFloatPtr; + std::unique_ptr> GraphCSCViewDoublePtr; + std::unique_ptr> GraphCOOViewFloatPtr; + std::unique_ptr> GraphCOOViewDoublePtr; + std::unique_ptr> + graph_view_t_float_ptr; + std::unique_ptr> + graph_view_t_double_ptr; + std::unique_ptr> + graph_view_t_float_mg_ptr; + std::unique_ptr> + graph_view_t_double_mg_ptr; + std::unique_ptr> + graph_view_t_float_transposed_ptr; + std::unique_ptr> + graph_view_t_double_transposed_ptr; + std::unique_ptr> + graph_view_t_float_mg_transposed_ptr; + std::unique_ptr> + graph_view_t_double_mg_transposed_ptr; }; - inline graph_container_t() : graph_ptr_union{nullptr}, graph_ptr_type{graphTypeEnum::null} {} + graph_container_t() : graph_ptr_union{nullptr}, graph_ptr_type{graphTypeEnum::null} {} + ~graph_container_t() {} // The expected usage of a graph_container_t is for it to be created as part // of a cython wrapper simply for passing a templated instantiation of a @@ -82,42 +96,6 @@ struct graph_container_t { graph_container_t(const graph_container_t&) = delete; graph_container_t& operator=(const graph_container_t&) = delete; - inline ~graph_container_t() - { - switch (graph_ptr_type) { - case graphTypeEnum::GraphCSRViewFloat: delete graph_ptr_union.GraphCSRViewFloatPtr; break; - case graphTypeEnum::GraphCSRViewDouble: delete graph_ptr_union.GraphCSRViewDoublePtr; break; - case graphTypeEnum::GraphCSCViewFloat: delete graph_ptr_union.GraphCSCViewFloatPtr; break; - case graphTypeEnum::GraphCSCViewDouble: delete graph_ptr_union.GraphCSCViewDoublePtr; break; - case graphTypeEnum::GraphCOOViewFloat: delete graph_ptr_union.GraphCOOViewFloatPtr; break; - case graphTypeEnum::GraphCOOViewDouble: delete graph_ptr_union.GraphCOOViewDoublePtr; break; - case graphTypeEnum::graph_view_t_float: delete graph_ptr_union.graph_view_t_float_ptr; break; - case graphTypeEnum::graph_view_t_double: - delete graph_ptr_union.graph_view_t_double_ptr; - break; - case graphTypeEnum::graph_view_t_float_mg: - delete graph_ptr_union.graph_view_t_float_mg_ptr; - break; - case graphTypeEnum::graph_view_t_double_mg: - delete graph_ptr_union.graph_view_t_double_mg_ptr; - break; - case graphTypeEnum::graph_view_t_float_transposed: - delete graph_ptr_union.graph_view_t_float_transposed_ptr; - break; - case graphTypeEnum::graph_view_t_double_transposed: - delete graph_ptr_union.graph_view_t_double_transposed_ptr; - break; - case graphTypeEnum::graph_view_t_float_mg_transposed: - delete graph_ptr_union.graph_view_t_float_mg_transposed_ptr; - break; - case graphTypeEnum::graph_view_t_double_mg_transposed: - delete graph_ptr_union.graph_view_t_double_mg_transposed_ptr; - break; - default: break; - } - graph_ptr_type = graphTypeEnum::null; - } - graphPtrUnion graph_ptr_union; graphTypeEnum graph_ptr_type; }; @@ -131,13 +109,21 @@ struct graph_container_t { // container (ie. a container that has not been previously populated by // populate_graph_container()) // +// legacyGraphTypeEnum legacyType +// Specifies the type of graph when instantiating a legacy graph type +// (GraphCSRViewFloat, etc.). +// NOTE: this parameter will be removed when the transition to exclusinve use +// of the new 2D graph classes is complete. +// // raft::handle_t const& handle // Raft handle to be set on the new graph instance in the container // // void* offsets, indices, weights // Pointer to an array of values representing offsets, indices, and weights // respectively. The value types of the array are specified using -// numberTypeEnum values separately (see below) +// numberTypeEnum values separately (see below). offsets should be size +// num_vertices+1, indices should be size num_edges, weights should also be +// size num_edges // // numberTypeEnum offsetType, indexType, weightType // numberTypeEnum enum value describing the data type for the offsets, @@ -174,6 +160,7 @@ struct graph_container_t { // // FIXME: Should local_* values be void* as well? void populate_graph_container(graph_container_t& graph_container, + legacyGraphTypeEnum legacyType, raft::handle_t const& handle, void* offsets, void* indices, diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index ce36414e079..c3b494aa467 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -30,6 +30,7 @@ namespace cython { // // FIXME: Should local_* values be void* as well? void populate_graph_container(graph_container_t& graph_container, + legacyGraphTypeEnum legacyType, raft::handle_t const& handle, void* offsets, void* indices, @@ -52,33 +53,92 @@ void populate_graph_container(graph_container_t& graph_container, // class is supported everywhere else. Remove everything down to the comment // line after the return stmnt. // Keep new code below return stmnt enabled to ensure it builds. - // - // FIXME: This is hardcoded to crete CSR types. Consider passing an additional - // arg (enum?) to this function to allow the caller to specify CSC or COO - // types as well when needed. if (weightType == numberTypeEnum::floatType) { - auto g = new GraphCSRView(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_vertices, - num_edges); - graph_container.graph_ptr_union.GraphCSRViewFloatPtr = g; - graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; - g->set_local_data(local_vertices, local_edges, local_offsets); - g->set_handle(const_cast(&handle)); + switch (legacyType) { + case legacyGraphTypeEnum::CSR: { + graph_container.graph_ptr_union.GraphCSRViewFloatPtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); + graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; + (graph_container.graph_ptr_union.GraphCSRViewFloatPtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCSRViewFloatPtr) + ->set_handle(const_cast(&handle)); + } break; + case legacyGraphTypeEnum::CSC: { + graph_container.graph_ptr_union.GraphCSCViewFloatPtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); + graph_container.graph_ptr_type = graphTypeEnum::GraphCSCViewFloat; + (graph_container.graph_ptr_union.GraphCSCViewFloatPtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCSCViewFloatPtr) + ->set_handle(const_cast(&handle)); + } break; + case legacyGraphTypeEnum::COO: { + graph_container.graph_ptr_union.GraphCOOViewFloatPtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); + graph_container.graph_ptr_type = graphTypeEnum::GraphCOOViewFloat; + (graph_container.graph_ptr_union.GraphCOOViewFloatPtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCOOViewFloatPtr) + ->set_handle(const_cast(&handle)); + } break; + } } else { - auto g = new GraphCSRView(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_vertices, - num_edges); - graph_container.graph_ptr_union.GraphCSRViewDoublePtr = g; - graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; - g->set_local_data(local_vertices, local_edges, local_offsets); - g->set_handle(const_cast(&handle)); + switch (legacyType) { + case legacyGraphTypeEnum::CSR: { + graph_container.graph_ptr_union.GraphCSRViewDoublePtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); + graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; + (graph_container.graph_ptr_union.GraphCSRViewDoublePtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCSRViewDoublePtr) + ->set_handle(const_cast(&handle)); + } break; + case legacyGraphTypeEnum::CSC: { + graph_container.graph_ptr_union.GraphCSCViewDoublePtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); + graph_container.graph_ptr_type = graphTypeEnum::GraphCSCViewDouble; + (graph_container.graph_ptr_union.GraphCSCViewDoublePtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCSCViewDoublePtr) + ->set_handle(const_cast(&handle)); + } break; + case legacyGraphTypeEnum::COO: { + graph_container.graph_ptr_union.GraphCOOViewDoublePtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_vertices, + num_edges); + graph_container.graph_ptr_type = graphTypeEnum::GraphCOOViewDouble; + (graph_container.graph_ptr_union.GraphCOOViewDoublePtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCOOViewDoublePtr) + ->set_handle(const_cast(&handle)); + } break; + } } - return; //////////////////////////////////////////////////////////////////////////////////// @@ -107,7 +167,8 @@ void populate_graph_container(graph_container_t& graph_container, graph_props, sorted_by_global_degree_within_vertex_partition, do_expensive_check); - graph_container.graph_ptr_union.graph_view_t_float_mg_ptr = g; + graph_container.graph_ptr_union.graph_view_t_float_mg_ptr = + std::unique_ptr>(g); graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float_mg; } else { @@ -124,7 +185,8 @@ void populate_graph_container(graph_container_t& graph_container, graph_props, sorted_by_global_degree_within_vertex_partition, do_expensive_check); - graph_container.graph_ptr_union.graph_view_t_double_mg_ptr = g; + graph_container.graph_ptr_union.graph_view_t_double_mg_ptr = + std::unique_ptr>(g); graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double_mg; } @@ -146,8 +208,9 @@ void populate_graph_container(graph_container_t& graph_container, graph_props, sorted_by_global_degree_within_vertex_partition, do_expensive_check); - graph_container.graph_ptr_union.graph_view_t_float_ptr = g; - graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float; + graph_container.graph_ptr_union.graph_view_t_float_ptr = + std::unique_ptr>(g); + graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float; } else { auto weights_array = reinterpret_cast(weights); @@ -162,8 +225,9 @@ void populate_graph_container(graph_container_t& graph_container, graph_props, sorted_by_global_degree_within_vertex_partition, do_expensive_check); - graph_container.graph_ptr_union.graph_view_t_double_ptr = g; - graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double; + graph_container.graph_ptr_union.graph_view_t_double_ptr = + std::unique_ptr>(g); + graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double; } } } diff --git a/python/cugraph/community/subgraph_extraction_wrapper.pyx b/python/cugraph/community/subgraph_extraction_wrapper.pyx index 553bcaa4bfc..5dbb6ce1e27 100644 --- a/python/cugraph/community/subgraph_extraction_wrapper.pyx +++ b/python/cugraph/community/subgraph_extraction_wrapper.pyx @@ -75,7 +75,7 @@ def subgraph(input_graph, vertices): vertices_df['v'] = vertices vertices_df = vertices_df.reset_index(drop=True).reset_index() - df = df.merge(vertices_df, left_on='src', right_on='index', how='left').drop(['src', 'index']).rename(columns={'v': 'src'}, copy=False) - df = df.merge(vertices_df, left_on='dst', right_on='index', how='left').drop(['dst', 'index']).rename(columns={'v': 'dst'}, copy=False) + df = df.merge(vertices_df, left_on='src', right_on='index', how='left').drop(columns=['src', 'index']).rename(columns={'v': 'src'}, copy=False) + df = df.merge(vertices_df, left_on='dst', right_on='index', how='left').drop(columns=['dst', 'index']).rename(columns={'v': 'dst'}, copy=False) return df diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index b59682b85c4..6ffb55dd450 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -99,7 +99,9 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): # FIXME: The excessive casting for the enum arg is needed to make cython # understand how to pass the enum value (this is the same pattern # used by cudf). This will not be needed with Cython 3.0 - populate_graph_container(graph_container, handle_[0], + populate_graph_container(graph_container, + ((legacyGraphTypeEnum.CSR)), + handle_[0], c_offsets, c_indices, c_weights, ((numberTypeEnum.intType)), ((numberTypeEnum.intType)), diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 66e06e26b62..c315fe5333c 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -200,11 +200,17 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": floatType "cugraph::cython::numberTypeEnum::floatType" doubleType "cugraph::cython::numberTypeEnum::doubleType" + ctypedef enum legacyGraphTypeEnum: + CSR "cugraph::cython::legacyGraphTypeEnum::CSR" + CSC "cugraph::cython::legacyGraphTypeEnum::CSC" + COO "cugraph::cython::legacyGraphTypeEnum::COO" + cdef struct graph_container_t: pass cdef void populate_graph_container( graph_container_t &graph_container, + legacyGraphTypeEnum legacyType, const handle_t &handle, void *offsets, void *indices, diff --git a/python/cugraph/structure/hypergraph.py b/python/cugraph/structure/hypergraph.py index 9b1c4b55e61..a11c937d83d 100644 --- a/python/cugraph/structure/hypergraph.py +++ b/python/cugraph/structure/hypergraph.py @@ -311,11 +311,11 @@ def _create_hyper_nodes( ): nodes = events.copy(deep=False) if NODEID in nodes: - nodes.drop([NODEID], inplace=True) + nodes.drop(columns=[NODEID], inplace=True) if NODETYPE in nodes: - nodes.drop([NODETYPE], inplace=True) + nodes.drop(columns=[NODETYPE], inplace=True) if CATEGORY in nodes: - nodes.drop([CATEGORY], inplace=True) + nodes.drop(columns=[CATEGORY], inplace=True) nodes[NODETYPE] = EVENTID if not categorical_metadata \ else _str_scalar_to_category(len(nodes), EVENTID) nodes[CATEGORY] = "event" if not categorical_metadata \ From 38ae58f5e022bc06fba467b5b11e5d0677c0a3ff Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Tue, 22 Sep 2020 19:46:01 -0500 Subject: [PATCH 54/74] [REVIEW] Simple edge list generator (#1145) * added simple data gen * style * changelog * added 32 or 64 option for id type * style --- CHANGELOG.md | 1 + python/cugraph/tests/utils.py | 61 +++++++++++++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 383867e2a6c..0d71fb3cb53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - PR 1135 SG Updates to Louvain et. al. - PR 1132 Upgrade Thrust to latest commit - PR #1129 Refactored test to use common dataset and added additional doc pages +- PR #1145 Simple edge list generator - PR #1144 updated documentation and APIs - PR #1139 MNMG Louvain Python updates, Cython cleanup - PR #1152 graph container cleanup, added arg for instantiating legacy types and switch statements to factory function diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index e68f934c619..88f79f65b4d 100644 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -15,6 +15,7 @@ import cugraph import pandas as pd import networkx as nx +import numpy as np import dask_cudf import os from cugraph.dask.common.mg_utils import (get_client) @@ -139,3 +140,63 @@ def build_mg_batch_cu_and_nx_graphs(graph_file, directed=True): directed=directed) Gnx = generate_nx_graph_from_file(graph_file, directed=directed) return G, Gnx + + +def random_edgelist(e=1024, ef=16, + dtypes={"src": np.int32, "dst": np.int32, "val": float}, + drop_duplicates=True, seed=None): + """ Create a random edge list + + Parameters + ---------- + e : int + Number of edges + ef : int + Edge factor (average number of edges per vertex) + dtypes : dict + Mapping of column names to types. + Supported type is {"src": int, "dst": int, "val": float} + drop_duplicates + Drop duplicates + seed : int (optional) + Randomstate seed + + Examples + -------- + >>> from cugraph.tests import utils + >>> # genrates 20 df with 100M edges each and write to disk + >>> for x in range(20): + >>> df = utils.random_edgelist(e=100000000, ef=64, + >>> dtypes={'src':np.int32, 'dst':np.int32}, + >>> seed=x) + >>> df.to_csv('df'+str(x), header=False, index=False) + >>> #df.to_parquet('files_parquet/df'+str(x), index=False) + """ + state = np.random.RandomState(seed) + columns = dict((k, make[dt](e // ef, e, state)) + for k, dt in dtypes.items()) + + df = pd.DataFrame(columns) + if drop_duplicates: + df = df.drop_duplicates() + print("Generated "+str(df.shape[0])+" edges") + return cudf.from_pandas(df) + + +def make_int32(v, e, rstate): + return rstate.randint(low=0, high=v, size=e, dtype=np.int32) + + +def make_int64(v, e, rstate): + return rstate.randint(low=0, high=v, size=e, dtype=np.int64) + + +def make_float(v, e, rstate): + return rstate.rand(e) * 2 - 1 + + +make = { + float: make_float, + np.int32: make_int32, + np.int64: make_int64 +} From 84df6353f888899c7873b0f7ac5336d5a306a0c3 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Wed, 23 Sep 2020 18:58:07 -0500 Subject: [PATCH 55/74] [REVIEW] Parquet read and concat within workers (#1149) * added simple data gen * style * changelog * added 32 or 64 option for id type * style * concat_within_workers * parquet test concat_within_workers * changelog * disable mg test on ci * style ... --- CHANGELOG.md | 1 + python/cugraph/dask/common/part_utils.py | 43 ++++++++++++- python/cugraph/tests/dask/test_mg_utility.py | 65 +++++++++++++++++++- 3 files changed, 106 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d71fb3cb53..23297165ca3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,7 @@ - PR #1145 Simple edge list generator - PR #1144 updated documentation and APIs - PR #1139 MNMG Louvain Python updates, Cython cleanup +- PR #1149 Parquet read and concat within workers - PR #1152 graph container cleanup, added arg for instantiating legacy types and switch statements to factory function ## Bug Fixes diff --git a/python/cugraph/dask/common/part_utils.py b/python/cugraph/dask/common/part_utils.py index 45dc7ed7ef2..505272fa563 100644 --- a/python/cugraph/dask/common/part_utils.py +++ b/python/cugraph/dask/common/part_utils.py @@ -16,12 +16,14 @@ from dask.distributed import futures_of, default_client, wait from toolz import first import collections -import dask_cudf as dc +import dask_cudf from dask.array.core import Array as daskArray from dask_cudf.core import DataFrame as daskDataFrame from dask_cudf.core import Series as daskSeries from functools import reduce import cugraph.comms.comms as Comms +from dask.delayed import delayed +import cudf def workers_to_parts(futures): @@ -193,10 +195,47 @@ def load_balance_func(ddf_, by, client=None): for idx, wf in enumerate(worker_to_data.items())] wait(futures) - ddf = dc.from_delayed(futures) + ddf = dask_cudf.from_delayed(futures) ddf.divisions = divisions # Repartition the data ddf = repartition(ddf, cumsum_parts) return ddf + + +def concat_dfs(df_list): + """ + Concat a list of cudf dataframes + """ + return cudf.concat(df_list) + + +def get_delayed_dict(ddf): + """ + Returns a dicitionary with the dataframe tasks as keys and + the dataframe delayed objects as values + """ + df_delayed = {} + for delayed_obj in ddf.to_delayed(): + df_delayed[str(delayed_obj.key)] = delayed_obj + return df_delayed + + +def concat_within_workers(client, ddf): + """ + Concats all partitions within workers without transfers + """ + df_delayed = get_delayed_dict(ddf) + + result = [] + for worker, tasks in client.has_what().items(): + worker_task_list = [] + + for task in list(tasks): + if task in df_delayed: + worker_task_list.append(df_delayed[task]) + concat_tasks = delayed(concat_dfs)(worker_task_list) + result.append(client.persist(collections=concat_tasks, workers=worker)) + + return dask_cudf.from_delayed(result) diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index 704b1db849c..c8fd61a1b58 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -12,13 +12,19 @@ # limitations under the License. import cugraph.dask as dcg -from dask.distributed import Client +from dask.distributed import Client, default_client, futures_of, wait import gc import cugraph import dask_cudf import cugraph.comms as Comms from dask_cuda import LocalCUDACluster import pytest +from cugraph.dask.common.part_utils import concat_within_workers +from cugraph.dask.common.read_utils import get_n_workers +import os +import time +import numpy as np +from cugraph.tests import utils @pytest.fixture @@ -61,3 +67,60 @@ def test_compute_local_data(client_connection): assert global_num_edges == dg.number_of_edges() global_num_verts = data.local_data['verts'].sum() assert global_num_verts == dg.number_of_nodes() + + +@pytest.mark.skip(reason="MG not supported on CI") +def test_parquet_concat_within_workers(client_connection): + if not os.path.exists('test_files_parquet'): + print("Generate data... ") + os.mkdir('test_files_parquet') + for x in range(10): + if not os.path.exists('test_files_parquet/df'+str(x)): + df = utils.random_edgelist(e=100, + ef=16, + dtypes={'src': np.int32, + 'dst': np.int32}, + seed=x) + df.to_parquet('test_files_parquet/df'+str(x), index=False) + + n_gpu = get_n_workers() + + print("Read_parquet... ") + t1 = time.time() + ddf = dask_cudf.read_parquet('test_files_parquet/*', + dtype=['int32', 'int32']) + ddf = ddf.persist() + futures_of(ddf) + wait(ddf) + t1 = time.time()-t1 + print("*** Read Time: ", t1, "s") + print(ddf) + + assert ddf.npartitions > n_gpu + + print("Drop_duplicates... ") + t2 = time.time() + ddf.drop_duplicates(inplace=True) + ddf = ddf.persist() + futures_of(ddf) + wait(ddf) + t2 = time.time()-t2 + print("*** Drop duplicate time: ", t2, "s") + assert t2 < t1 + + print("Repartition... ") + t3 = time.time() + # Notice that ideally we would use : + # ddf = ddf.repartition(npartitions=n_gpu) + # However this is slower than reading and requires more memory + # Using custom concat instead + client = default_client() + ddf = concat_within_workers(client, ddf) + ddf = ddf.persist() + futures_of(ddf) + wait(ddf) + t3 = time.time()-t3 + print("*** repartition Time: ", t3, "s") + print(ddf) + + assert t3 < t1 From 85ec558995c63d71e1dcd5ac33d26e71ee48fb53 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Fri, 25 Sep 2020 07:56:00 -0400 Subject: [PATCH 56/74] ENH Adding support for NetworkX Graphs as inputs (#1147) * nx to graph code and test * updated pagerank * updates * changelog * updatees to return dictionary rather than a dataframe * added edge scores to dictionary * BC and Edge BC using Nx * next set of updates * ECG * k-truss and a new conversion from cugraph.Graph to NetworkX.Graph * clustering algos * k_truss function * fixed issues * bug fixing * WCC and SCC, upadted code based on DF having different names * core number * k_core * updateed docs * hits * Jaccard and Overlap * flake8 * reset * PR 1147 * update log * updated and fixed * reset * new functions * updated Jaccard * added k_truss * k_truss - fix typo * code review fixes * fix * removed MG testing for more that 1 GPU * updated docs * redid core number test * updated docs * updating docs * flake error fix * skipping Nx test * changed to Graph vs DiGraph * skip MG test Co-authored-by: BradReesWork --- CHANGELOG.md | 1 + python/cugraph/__init__.py | 3 + .../centrality/betweenness_centrality.py | 41 +++-- python/cugraph/centrality/katz_centrality.py | 13 +- python/cugraph/community/__init__.py | 1 + python/cugraph/community/ecg.py | 22 ++- python/cugraph/community/ktruss_subgraph.py | 41 +++++ python/cugraph/community/leiden.py | 20 ++- python/cugraph/community/louvain.py | 25 ++- .../cugraph/community/spectral_clustering.py | 16 +- python/cugraph/community/triangle_count.py | 8 +- python/cugraph/components/connectivity.py | 34 +++- python/cugraph/cores/core_number.py | 19 +- python/cugraph/cores/k_core.py | 9 +- python/cugraph/link_analysis/hits.py | 10 ++ python/cugraph/link_analysis/pagerank.py | 20 ++- python/cugraph/link_prediction/__init__.py | 2 + python/cugraph/link_prediction/jaccard.py | 22 ++- python/cugraph/link_prediction/overlap.py | 26 +++ .../cugraph/tests/dask/test_mg_replication.py | 3 +- python/cugraph/tests/dask/test_mg_utility.py | 1 + python/cugraph/tests/test_balanced_cut.py | 40 ++++- .../tests/test_betweenness_centrality.py | 26 +++ python/cugraph/tests/test_connectivity.py | 58 +++++-- python/cugraph/tests/test_core_number.py | 48 ++++- python/cugraph/tests/test_ecg.py | 18 ++ .../tests/test_edge_betweenness_centrality.py | 25 +++ python/cugraph/tests/test_graph.py | 2 +- python/cugraph/tests/test_hits.py | 17 ++ python/cugraph/tests/test_jaccard.py | 21 +++ python/cugraph/tests/test_k_core.py | 14 ++ python/cugraph/tests/test_k_truss_subgraph.py | 38 ++-- python/cugraph/tests/test_katz_centrality.py | 32 ++++ python/cugraph/tests/test_leiden.py | 59 +++++-- python/cugraph/tests/test_nx_convert.py | 66 +++++++ python/cugraph/tests/test_pagerank.py | 121 +++++++++---- python/cugraph/tests/test_triangle_count.py | 18 ++ python/cugraph/utilities/__init__.py | 6 + python/cugraph/utilities/nx_factory.py | 164 ++++++++++++++++++ 39 files changed, 962 insertions(+), 148 deletions(-) create mode 100644 python/cugraph/tests/test_nx_convert.py create mode 100644 python/cugraph/utilities/nx_factory.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 23297165ca3..a05aa7711b9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## New Features - PR #1098 Add new graph classes to support 2D partitioning - PR #1124 Sub-communicator initialization for 2D partitioning support +- PR #1147 Added support for NetworkX graphs as input type ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index d0d33e1baca..ee055b4a12e 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -14,6 +14,7 @@ from cugraph.community import ( ecg, ktruss_subgraph, + k_truss, louvain, leiden, spectralBalancedCutClustering, @@ -41,6 +42,7 @@ ) from cugraph.cores import core_number, k_core + from cugraph.components import ( weakly_connected_components, strongly_connected_components, @@ -52,6 +54,7 @@ jaccard, jaccard_coefficient, overlap, + overlap_coefficient, jaccard_w, overlap_w, ) diff --git a/python/cugraph/centrality/betweenness_centrality.py b/python/cugraph/centrality/betweenness_centrality.py index bd5ebbcc935..634cc2aa7a2 100644 --- a/python/cugraph/centrality/betweenness_centrality.py +++ b/python/cugraph/centrality/betweenness_centrality.py @@ -16,6 +16,8 @@ import cudf from cugraph.centrality import betweenness_centrality_wrapper from cugraph.centrality import edge_betweenness_centrality_wrapper +from cugraph.utilities import df_edge_score_to_dictionary +from cugraph.utilities import df_score_to_dictionary import cugraph @@ -42,9 +44,8 @@ def betweenness_centrality( Parameters ---------- - G : cuGraph.Graph - cuGraph graph descriptor with connectivity information. The graph can - be either directed (DiGraph) or undirected (Graph). + G : cuGraph.Graph or networkx.Graph + The graph can be either directed (DiGraph) or undirected (Graph). Weights in the graph are ignored, the current implementation uses BFS traversals. Use weight parameter if weights need to be considered (currently not supported) @@ -91,11 +92,11 @@ def betweenness_centrality( Returns ------- - df : cudf.DataFrame + df : cudf.DataFrame or Dictionary if using NetworkX GPU data frame containing two cudf.Series of size V: the vertex identifiers and the corresponding betweenness centrality values. Please note that the resulting the 'vertex' column might not be - in ascending order. + in ascending order. The Dictionary conatains the same two columns df['vertex'] : cudf.Series Contains the vertex identifiers @@ -116,8 +117,6 @@ def betweenness_centrality( # NOTE: cuDF doesn't currently support sampling, but there is a python # workaround. - vertices = _initialize_vertices(G, k, seed) - if weight is not None: raise NotImplementedError( "weighted implementation of betweenness " @@ -127,14 +126,22 @@ def betweenness_centrality( if result_dtype not in [np.float32, np.float64]: raise TypeError("result type can only be np.float32 or np.float64") + G, isNx = cugraph.utilities.check_nx_graph(G) + + vertices = _initialize_vertices(G, k, seed) + df = betweenness_centrality_wrapper.betweenness_centrality( G, normalized, endpoints, weight, vertices, result_dtype ) if G.renumbered: - return G.unrenumber(df, "vertex") + df = G.unrenumber(df, "vertex") - return df + if isNx is True: + dict = df_score_to_dictionary(df, 'betweenness_centrality') + return dict + else: + return df def edge_betweenness_centrality( @@ -153,9 +160,8 @@ def edge_betweenness_centrality( Parameters ---------- - G : cuGraph.Graph - cuGraph graph descriptor with connectivity information. The graph can - be either directed (DiGraph) or undirected (Graph). + G : cuGraph.Graph or networkx.Graph + The graph can be either directed (DiGraph) or undirected (Graph). Weights in the graph are ignored, the current implementation uses BFS traversals. Use weight parameter if weights need to be considered (currently not supported) @@ -197,7 +203,7 @@ def edge_betweenness_centrality( Returns ------- - df : cudf.DataFrame + df : cudf.DataFrame or Dictionary if using NetworkX GPU data frame containing three cudf.Series of size E: the vertex identifiers of the sources, the vertex identifies of the destinations and the corresponding betweenness centrality values. @@ -228,7 +234,6 @@ def edge_betweenness_centrality( >>> ebc = cugraph.edge_betweenness_centrality(G) """ - vertices = _initialize_vertices(G, k, seed) if weight is not None: raise NotImplementedError( "weighted implementation of betweenness " @@ -237,6 +242,9 @@ def edge_betweenness_centrality( if result_dtype not in [np.float32, np.float64]: raise TypeError("result type can only be np.float32 or np.float64") + G, isNx = cugraph.utilities.check_nx_graph(G) + vertices = _initialize_vertices(G, k, seed) + df = edge_betweenness_centrality_wrapper.edge_betweenness_centrality( G, normalized, weight, vertices, result_dtype ) @@ -250,7 +258,10 @@ def edge_betweenness_centrality( df[["src", "dst"]][lower_triangle] = df[["dst", "src"]][lower_triangle] df = df.groupby(by=["src", "dst"]).sum().reset_index() - return df + if isNx is True: + return df_edge_score_to_dictionary(df, 'betweenness_centrality') + else: + return df # In order to compare with pre-set sources, diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index d57682c726c..118825de4d7 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -12,6 +12,7 @@ # limitations under the License. from cugraph.centrality import katz_centrality_wrapper +import cugraph def katz_centrality( @@ -30,7 +31,7 @@ def katz_centrality( Parameters ---------- - G : cuGraph.Graph + G : cuGraph.Graph or networkx.Graph cuGraph graph descriptor with connectivity information. The graph can contain either directed (DiGraph) or undirected edges (Graph). alpha : float @@ -72,7 +73,7 @@ def katz_centrality( Returns ------- - df : cudf.DataFrame + df : cudf.DataFrame or Dictionary if using NetworkX GPU data frame containing two cudf.Series of size V: the vertex identifiers and the corresponding katz centrality values. @@ -90,6 +91,8 @@ def katz_centrality( >>> kc = cugraph.katz_centrality(G) """ + G, isNx = cugraph.utilities.check_nx_graph(G) + if nstart is not None: if G.renumbered is True: nstart = G.add_internal_vertex_id(nstart, 'vertex', 'vertex') @@ -101,4 +104,8 @@ def katz_centrality( if G.renumbered: df = G.unrenumber(df, "vertex") - return df + if isNx is True: + dict = cugraph.utilities.df_score_to_dictionary(df, 'katz_centrality') + return dict + else: + return df diff --git a/python/cugraph/community/__init__.py b/python/cugraph/community/__init__.py index 31e6f097a7a..d3bb6472894 100644 --- a/python/cugraph/community/__init__.py +++ b/python/cugraph/community/__init__.py @@ -24,3 +24,4 @@ from cugraph.community.subgraph_extraction import subgraph from cugraph.community.triangle_count import triangles from cugraph.community.ktruss_subgraph import ktruss_subgraph +from cugraph.community.ktruss_subgraph import k_truss diff --git a/python/cugraph/community/ecg.py b/python/cugraph/community/ecg.py index 85d97b50a8e..2e9da6bd2e5 100644 --- a/python/cugraph/community/ecg.py +++ b/python/cugraph/community/ecg.py @@ -12,9 +12,11 @@ # limitations under the License. from cugraph.community import ecg_wrapper +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_score_to_dictionary -def ecg(input_graph, min_weight=0.05, ensemble_size=16): +def ecg(input_graph, min_weight=0.05, ensemble_size=16, weight=None): """ Compute the Ensemble Clustering for Graphs (ECG) partition of the input graph. ECG runs truncated Louvain on an ensemble of permutations of the @@ -26,8 +28,8 @@ def ecg(input_graph, min_weight=0.05, ensemble_size=16): Parameters ---------- - input_graph : cugraph.Graph - cuGraph graph descriptor, should contain the connectivity information + input_graph : cugraph.Graph or NetworkX Graph + The graph descriptor should contain the connectivity information and weights. The adjacency list will be computed if not already present. @@ -41,9 +43,14 @@ def ecg(input_graph, min_weight=0.05, ensemble_size=16): The default value is 16, larger values may produce higher quality partitions for some graphs. + weight : str + This parameter is here for NetworkX compatibility and + represents which NetworkX data column represents Edge weights. + Default is None + Returns ------- - parts : cudf.DataFrame + parts : cudf.DataFrame or python dictionary GPU data frame of size V containing two columns, the vertex id and the partition id it is assigned to. @@ -63,9 +70,14 @@ def ecg(input_graph, min_weight=0.05, ensemble_size=16): """ + input_graph, isNx = check_nx_graph(input_graph, weight) + parts = ecg_wrapper.ecg(input_graph, min_weight, ensemble_size) if input_graph.renumbered: parts = input_graph.unrenumber(parts, "vertex") - return parts + if isNx is True: + return df_score_to_dictionary(parts, 'partition') + else: + return parts diff --git a/python/cugraph/community/ktruss_subgraph.py b/python/cugraph/community/ktruss_subgraph.py index 891dd0a7625..8e4f1471955 100644 --- a/python/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/community/ktruss_subgraph.py @@ -13,6 +13,47 @@ from cugraph.community import ktruss_subgraph_wrapper from cugraph.structure.graph import Graph +from cugraph.utilities import check_nx_graph +from cugraph.utilities import cugraph_to_nx + + +def k_truss(G, k): + """ + Returns the K-Truss subgraph of a graph for a specific k. + + The k-truss of a graph is a subgraph where each edge is part of at least + (k−2) triangles. K-trusses are used for finding tighlty knit groups of + vertices in a graph. A k-truss is a relaxation of a k-clique in the graph + and was define in [1]. Finding cliques is computationally demanding and + finding the maximal k-clique is known to be NP-Hard. + + Parameters + ---------- + G : cuGraph.Graph or networkx.Graph + cuGraph graph descriptor with connectivity information. k-Trusses are + defined for only undirected graphs as they are defined for + undirected triangle in a graph. + + k : int + The desired k to be used for extracting the k-truss subgraph. + + Returns + ------- + G_truss : cuGraph.Graph or networkx.Graph + A cugraph graph descriptor with the k-truss subgraph for the given k. + The networkx graph will NOT have all attributes copied over + """ + + G, isNx = check_nx_graph(G) + + if isNx is True: + k_sub = ktruss_subgraph(G, k) + S = cugraph_to_nx(k_sub) + return S + else: + return ktruss_subgraph(G, k) + +# FIXME: merge this function with k_truss def ktruss_subgraph(G, k, use_weights=True): diff --git a/python/cugraph/community/leiden.py b/python/cugraph/community/leiden.py index 355b2939617..8c1b79b8b63 100644 --- a/python/cugraph/community/leiden.py +++ b/python/cugraph/community/leiden.py @@ -13,9 +13,11 @@ from cugraph.community import leiden_wrapper from cugraph.structure.graph import Graph +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_score_to_dictionary -def leiden(input_graph, max_iter=100, resolution=1.): +def leiden(G, max_iter=100, resolution=1.): """ Compute the modularity optimizing partition of the input graph using the Leiden algorithm @@ -28,7 +30,7 @@ def leiden(input_graph, max_iter=100, resolution=1.): Parameters ---------- - input_graph : cugraph.Graph + G : cugraph.Graph cuGraph graph descriptor of type Graph The adjacency list will be computed if not already present. @@ -70,15 +72,19 @@ def leiden(input_graph, max_iter=100, resolution=1.): >>> G.from_cudf_edgelist(M, source='0', destination='1') >>> parts, modularity_score = cugraph.leiden(G) """ + G, isNx = check_nx_graph(G) - if type(input_graph) is not Graph: - raise Exception("input graph must be undirected") + if type(G) is not Graph: + raise Exception(f"input graph must be undirected was {type(G)}") parts, modularity_score = leiden_wrapper.leiden( - input_graph, max_iter, resolution + G, max_iter, resolution ) - if input_graph.renumbered: - parts = input_graph.unrenumber(parts, "vertex") + if G.renumbered: + parts = G.unrenumber(parts, "vertex") + + if isNx is True: + parts = df_score_to_dictionary(parts, "partition") return parts, modularity_score diff --git a/python/cugraph/community/louvain.py b/python/cugraph/community/louvain.py index 0d1fd9ec084..d4d56a1100c 100644 --- a/python/cugraph/community/louvain.py +++ b/python/cugraph/community/louvain.py @@ -13,9 +13,11 @@ from cugraph.community import louvain_wrapper from cugraph.structure.graph import Graph +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_score_to_dictionary -def louvain(input_graph, max_iter=100, resolution=1.): +def louvain(G, max_iter=100, resolution=1.): """ Compute the modularity optimizing partition of the input graph using the Louvain method @@ -28,10 +30,10 @@ def louvain(input_graph, max_iter=100, resolution=1.): Parameters ---------- - input_graph : cugraph.Graph - cuGraph graph descriptor of type Graph - - The adjacency list will be computed if not already present. + G : cugraph.Graph or NetworkX Graph + The graph descriptor should contain the connectivity information + and weights. The adjacency list will be computed if not already + present. max_iter : integer This controls the maximum number of levels/iterations of the Louvain @@ -71,14 +73,19 @@ def louvain(input_graph, max_iter=100, resolution=1.): >>> parts, modularity_score = cugraph.louvain(G) """ - if type(input_graph) is not Graph: + G, isNx = check_nx_graph(G) + + if type(G) is not Graph: raise Exception("input graph must be undirected") parts, modularity_score = louvain_wrapper.louvain( - input_graph, max_iter, resolution + G, max_iter, resolution ) - if input_graph.renumbered: - parts = input_graph.unrenumber(parts, "vertex") + if G.renumbered: + parts = G.unrenumber(parts, "vertex") + + if isNx is True: + parts = df_score_to_dictionary(parts, "partition") return parts, modularity_score diff --git a/python/cugraph/community/spectral_clustering.py b/python/cugraph/community/spectral_clustering.py index 92f8920199b..b5f175e8237 100644 --- a/python/cugraph/community/spectral_clustering.py +++ b/python/cugraph/community/spectral_clustering.py @@ -12,6 +12,8 @@ # limitations under the License. from cugraph.community import spectral_clustering_wrapper +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_score_to_dictionary def spectralBalancedCutClustering( @@ -29,7 +31,7 @@ def spectralBalancedCutClustering( Parameters ---------- - G : cugraph.Graph + G : cugraph.Graph or networkx.Graph cuGraph graph descriptor num_clusters : integer Specifies the number of clusters to find @@ -71,6 +73,8 @@ def spectralBalancedCutClustering( >>> df = cugraph.spectralBalancedCutClustering(G, 5) """ + G, isNx = check_nx_graph(G) + df = spectral_clustering_wrapper.spectralBalancedCutClustering( G, num_clusters, @@ -84,6 +88,9 @@ def spectralBalancedCutClustering( if G.renumbered: df = G.unrenumber(df, "vertex") + if isNx is True: + df = df_score_to_dictionary(df, "cluster") + return df @@ -141,6 +148,8 @@ def spectralModularityMaximizationClustering( >>> df = cugraph.spectralModularityMaximizationClustering(G, 5) """ + G, isNx = check_nx_graph(G) + df = spectral_clustering_wrapper.spectralModularityMaximizationClustering( G, num_clusters, @@ -154,6 +163,9 @@ def spectralModularityMaximizationClustering( if G.renumbered: df = G.unrenumber(df, "vertex") + if isNx is True: + df = df_score_to_dictionary(df, "cluster") + return df @@ -250,6 +262,8 @@ def analyzeClustering_edge_cut(G, n_clusters, clustering, >>> 'vertex', 'cluster') """ + G, isNx = check_nx_graph(G) + if G.renumbered: clustering = G.add_internal_vertex_id(clustering, vertex_col_name, diff --git a/python/cugraph/community/triangle_count.py b/python/cugraph/community/triangle_count.py index 586d16bb20b..ff4dc9a5c5f 100644 --- a/python/cugraph/community/triangle_count.py +++ b/python/cugraph/community/triangle_count.py @@ -13,6 +13,7 @@ from cugraph.community import triangle_count_wrapper from cugraph.structure.graph import Graph +from cugraph.utilities import check_nx_graph def triangles(G): @@ -20,9 +21,12 @@ def triangles(G): Compute the number of triangles (cycles of length three) in the input graph. + Unlike NetworkX, this algorithm simply returns the total number of + triangle and not the number per vertex. + Parameters ---------- - G : cugraph.graph + G : cugraph.graph or networkx.Graph cuGraph graph descriptor, should contain the connectivity information, (edge weights are not used in this algorithm) @@ -43,6 +47,8 @@ def triangles(G): >>> count = cugraph.triangles(G) """ + G, _ = check_nx_graph(G) + if type(G) is not Graph: raise Exception("input graph must be undirected") diff --git a/python/cugraph/components/connectivity.py b/python/cugraph/components/connectivity.py index 522eff78c20..f0b40601ab9 100644 --- a/python/cugraph/components/connectivity.py +++ b/python/cugraph/components/connectivity.py @@ -12,6 +12,8 @@ # limitations under the License. from cugraph.components import connectivity_wrapper +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_score_to_dictionary def weakly_connected_components(G): @@ -21,7 +23,7 @@ def weakly_connected_components(G): Parameters ---------- - G : cugraph.Graph + G : cugraph.Graph or networkx.Graph cuGraph graph descriptor, should contain the connectivity information as an edge list (edge weights are not used for this algorithm). Currently, the graph should be undirected where an undirected edge is @@ -32,8 +34,13 @@ def weakly_connected_components(G): Returns ------- df : cudf.DataFrame - df['labels'][i] gives the label id of the i'th vertex - df['vertices'][i] gives the vertex id of the i'th vertex + GPU data frame containing two cudf.Series of size V: the vertex + identifiers and the corresponding component identifier. + + df['vertices'] + Contains the vertex identifier + df['labels'] + The component identifier Examples -------- @@ -46,11 +53,16 @@ def weakly_connected_components(G): >>> df = cugraph.weakly_connected_components(G) """ + G, isNx = check_nx_graph(G) + df = connectivity_wrapper.weakly_connected_components(G) if G.renumbered: df = G.unrenumber(df, "vertices") + if isNx is True: + df = df_score_to_dictionary(df, "labels", "vertices") + return df @@ -61,7 +73,7 @@ def strongly_connected_components(G): Parameters ---------- - G : cugraph.Graph + G : cugraph.Graph or networkx.Graph cuGraph graph descriptor, should contain the connectivity information as an edge list (edge weights are not used for this algorithm). The graph can be either directed or undirected where an undirected edge is @@ -72,8 +84,13 @@ def strongly_connected_components(G): Returns ------- df : cudf.DataFrame - df['labels'][i] gives the label id of the i'th vertex - df['vertices'][i] gives the vertex id of the i'th vertex + GPU data frame containing two cudf.Series of size V: the vertex + identifiers and the corresponding component identifier. + + df['vertices'] + Contains the vertex identifier + df['labels'] + The component identifier Examples -------- @@ -86,9 +103,14 @@ def strongly_connected_components(G): >>> df = cugraph.strongly_connected_components(G) """ + G, isNx = check_nx_graph(G) + df = connectivity_wrapper.strongly_connected_components(G) if G.renumbered: df = G.unrenumber(df, "vertices") + if isNx is True: + df = df_score_to_dictionary(df, "labels", "vertices") + return df diff --git a/python/cugraph/cores/core_number.py b/python/cugraph/cores/core_number.py index 6476a863d2d..02f1b67ee35 100644 --- a/python/cugraph/cores/core_number.py +++ b/python/cugraph/cores/core_number.py @@ -12,6 +12,8 @@ # limitations under the License. from cugraph.cores import core_number_wrapper +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_score_to_dictionary def core_number(G): @@ -24,15 +26,15 @@ def core_number(G): Parameters ---------- - graph : cuGraph.Graph - cuGraph graph descriptor with connectivity information. The graph - should contain undirected edges where undirected edges are represented - as directed edges in both directions. While this graph can contain edge - weights, they don't participate in the calculation of the core numbers. + graph : cuGraph.Graph or networkx.Graph + The graph should contain undirected edges where undirected edges are + represented as directed edges in both directions. While this graph + can contain edge weights, they don't participate in the calculation + of the core numbers. Returns ------- - df : cudf.DataFrame + df : cudf.DataFrame or python dictionary (in NetworkX input) GPU data frame containing two cudf.Series of size V: the vertex identifiers and the corresponding core number values. @@ -50,9 +52,14 @@ def core_number(G): >>> cn = cugraph.core_number(G) """ + G, isNx = check_nx_graph(G) + df = core_number_wrapper.core_number(G) if G.renumbered: df = G.unrenumber(df, "vertex") + if isNx is True: + df = df_score_to_dictionary(df, 'core_number') + return df diff --git a/python/cugraph/cores/k_core.py b/python/cugraph/cores/k_core.py index 8c6c05c3178..ebf12f60cda 100644 --- a/python/cugraph/cores/k_core.py +++ b/python/cugraph/cores/k_core.py @@ -12,6 +12,8 @@ # limitations under the License. from cugraph.cores import k_core_wrapper, core_number_wrapper +from cugraph.utilities import cugraph_to_nx +from cugraph.utilities import check_nx_graph def k_core(G, k=None, core_number=None): @@ -23,7 +25,7 @@ def k_core(G, k=None, core_number=None): Parameters ---------- - G : cuGraph.Graph + G : cuGraph.Graph or networkx.Graph cuGraph graph descriptor with connectivity information. The graph should contain undirected edges where undirected edges are represented as directed edges in both directions. While this graph can contain edge @@ -56,6 +58,8 @@ def k_core(G, k=None, core_number=None): >>> KCoreGraph = cugraph.k_core(G) """ + G, isNx = check_nx_graph(G) + mytype = type(G) KCoreGraph = mytype() @@ -88,4 +92,7 @@ def k_core(G, k=None, core_number=None): k_core_df, source="src", destination="dst" ) + if isNx is True: + KCoreGraph = cugraph_to_nx(KCoreGraph) + return KCoreGraph diff --git a/python/cugraph/link_analysis/hits.py b/python/cugraph/link_analysis/hits.py index c3b8a93c8ac..29827e1dd31 100644 --- a/python/cugraph/link_analysis/hits.py +++ b/python/cugraph/link_analysis/hits.py @@ -12,6 +12,8 @@ # limitations under the License. from cugraph.link_analysis import hits_wrapper +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_score_to_dictionary def hits(G, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): @@ -72,9 +74,17 @@ def hits(G, max_iter=100, tol=1.0e-5, nstart=None, normalized=True): >>> hits = cugraph.hits(G, max_iter = 50) """ + G, isNx = check_nx_graph(G) + df = hits_wrapper.hits(G, max_iter, tol) if G.renumbered: df = G.unrenumber(df, "vertex") + if isNx is True: + d1 = df_score_to_dictionary(df[["vertex", "hubs"]], "hubs") + d2 = df_score_to_dictionary(df[["vertex", "authorities"]], + "authorities") + df = (d1, d2) + return df diff --git a/python/cugraph/link_analysis/pagerank.py b/python/cugraph/link_analysis/pagerank.py index 69106f3bf2b..69133d62af7 100644 --- a/python/cugraph/link_analysis/pagerank.py +++ b/python/cugraph/link_analysis/pagerank.py @@ -13,10 +13,12 @@ from cugraph.link_analysis import pagerank_wrapper from cugraph.structure.graph import null_check +import cugraph def pagerank( - G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-5, nstart=None + G, alpha=0.85, personalization=None, max_iter=100, tol=1.0e-5, nstart=None, + weight=None, dangling=None ): """ Find the PageRank score for every vertex in a graph. cuGraph computes an @@ -28,7 +30,7 @@ def pagerank( Parameters ---------- - graph : cugraph.Graph + graph : cugraph.Graph or networkx.Graph cuGraph graph descriptor, should contain the connectivity information as an edge list (edge weights are not used for this algorithm). The transposed adjacency list will be computed if not already present. @@ -67,6 +69,13 @@ def pagerank( nstart['values'] : cudf.Series Pagerank values for vertices + weight : str + Edge data column to use. Default is None + This version of PageRank current does not use edge weight. + This parameter is here for NetworkX compatibility + dangling : dict + This parameter is here for NetworkX compatibility and ignored + Returns ------- PageRank : cudf.DataFrame @@ -88,6 +97,8 @@ def pagerank( >>> pr = cugraph.pagerank(G, alpha = 0.85, max_iter = 500, tol = 1.0e-05) """ + G, isNx = cugraph.utilities.check_nx_graph(G, weight) + if personalization is not None: null_check(personalization["vertex"]) null_check(personalization["values"]) @@ -109,4 +120,7 @@ def pagerank( if G.renumbered: df = G.unrenumber(df, "vertex") - return df + if isNx is True: + return cugraph.utilities.df_score_to_dictionary(df, 'pagerank') + else: + return df diff --git a/python/cugraph/link_prediction/__init__.py b/python/cugraph/link_prediction/__init__.py index 70e55591639..f787ae10dd9 100644 --- a/python/cugraph/link_prediction/__init__.py +++ b/python/cugraph/link_prediction/__init__.py @@ -16,3 +16,5 @@ from cugraph.link_prediction.overlap import overlap from cugraph.link_prediction.wjaccard import jaccard_w from cugraph.link_prediction.woverlap import overlap_w +from cugraph.link_prediction.jaccard import jaccard_coefficient +from cugraph.link_prediction.overlap import overlap_coefficient diff --git a/python/cugraph/link_prediction/jaccard.py b/python/cugraph/link_prediction/jaccard.py index 27d3b1458a5..71cf0925342 100644 --- a/python/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/link_prediction/jaccard.py @@ -11,10 +11,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd +import cudf from cugraph.structure.graph import Graph from cugraph.link_prediction import jaccard_wrapper from cugraph.structure.graph import null_check -import cudf +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_edge_score_to_dictionary def jaccard(input_graph, vertex_pair=None): @@ -173,4 +176,19 @@ def jaccard_coefficient(G, ebunch=None): >>> G.from_cudf_edgelist(gdf, source='0', destination='1') >>> df = cugraph.jaccard_coefficient(G) """ - return jaccard(G, ebunch) + vertex_pair = None + + G, isNx = check_nx_graph(G) + + if isNx is True and ebunch is not None: + vertex_pair = cudf.from_pandas(pd.DataFrame(ebunch)) + + df = jaccard(G, vertex_pair) + + if isNx is True: + df = df_edge_score_to_dictionary(df, + k="jaccard_coeff", + src="source", + dst="destination") + + return df diff --git a/python/cugraph/link_prediction/overlap.py b/python/cugraph/link_prediction/overlap.py index c9aa216095e..a5ca1e22979 100644 --- a/python/cugraph/link_prediction/overlap.py +++ b/python/cugraph/link_prediction/overlap.py @@ -11,9 +11,35 @@ # See the License for the specific language governing permissions and # limitations under the License. +import pandas as pd from cugraph.link_prediction import overlap_wrapper from cugraph.structure.graph import null_check import cudf +from cugraph.utilities import check_nx_graph +from cugraph.utilities import df_edge_score_to_dictionary + + +def overlap_coefficient(G, ebunch=None): + """ + NetworkX similar API. See 'jaccard' for a description + + """ + vertex_pair = None + + G, isNx = check_nx_graph(G) + + if isNx is True and ebunch is not None: + vertex_pair = cudf.from_pandas(pd.DataFrame(ebunch)) + + df = overlap(G, vertex_pair) + + if isNx is True: + df = df_edge_score_to_dictionary(df, + k="overlap_coeff", + src="source", + dst="destination") + + return df def overlap(input_graph, vertex_pair=None): diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index 061bcf83f20..4932e0fd970 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -22,7 +22,8 @@ DATASETS_OPTIONS = utils.DATASETS_SMALL DIRECTED_GRAPH_OPTIONS = [False, True] -MG_DEVICE_COUNT_OPTIONS = [1, 2, 3, 4] +# MG_DEVICE_COUNT_OPTIONS = [1, 2, 3, 4] +MG_DEVICE_COUNT_OPTIONS = [1] @pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index c8fd61a1b58..a26101b9f7a 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -40,6 +40,7 @@ def client_connection(): cluster.close() +@pytest.mark.skip(reason="skipping MG testing on a SG system") def test_compute_local_data(client_connection): gc.collect() diff --git a/python/cugraph/tests/test_balanced_cut.py b/python/cugraph/tests/test_balanced_cut.py index 9bee231d99e..f0fc7152e56 100644 --- a/python/cugraph/tests/test_balanced_cut.py +++ b/python/cugraph/tests/test_balanced_cut.py @@ -15,7 +15,8 @@ import random import pytest - +import networkx as nx +import pandas as pd import cudf import cugraph from cugraph.tests import utils @@ -120,3 +121,40 @@ def test_digraph_rejected(): with pytest.raises(Exception): cugraph_call(G, 2) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("partitions", PARTITIONS) +def test_edge_cut_clustering_with_edgevals_nx(graph_file, partitions): + gc.collect() + + # Read in the graph and create a NetworkX Graph + # FIXME: replace with utils.generate_nx_graph_from_file() + NM = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + G = nx.from_pandas_edgelist( + NM, create_using=nx.Graph(), source="0", target="1", + edge_attr="weight" + ) + + # Get the edge_cut score for partitioning versus random assignment + df = cugraph.spectralBalancedCutClustering( + G, partitions, num_eigen_vects=partitions + ) + + pdf = pd.DataFrame.from_dict(df, orient='index').reset_index() + pdf.columns = ["vertex", "cluster"] + gdf = cudf.from_pandas(pdf) + + cu_score = cugraph.analyzeClustering_edge_cut( + G, partitions, gdf, 'vertex', 'cluster' + ) + + df = set(gdf["vertex"].to_array()) + + Gcu = cugraph.utilities.convert_from_nx(G) + rand_vid, rand_score = random_call(Gcu, partitions) + + # Assert that the partitioning has better edge_cut than the random + # assignment + print(cu_score, rand_score) + assert cu_score < rand_score diff --git a/python/cugraph/tests/test_betweenness_centrality.py b/python/cugraph/tests/test_betweenness_centrality.py index 1ef1601edd5..73a706f877d 100644 --- a/python/cugraph/tests/test_betweenness_centrality.py +++ b/python/cugraph/tests/test_betweenness_centrality.py @@ -64,6 +64,7 @@ def calc_betweenness_centrality( result_dtype=np.float64, use_k_full=False, multi_gpu_batch=False, + ): """ Generate both cugraph and networkx betweenness centrality @@ -474,3 +475,28 @@ def test_betweenness_invalid_dtype( result_dtype=result_dtype, ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +def test_betweenness_centrality_nx(graph_file): + prepare_test() + + Gnx = utils.generate_nx_graph_from_file(graph_file) + + nx_bc = nx.betweenness_centrality(Gnx) + cu_bc = cugraph.betweenness_centrality(Gnx) + + # Calculating mismatch + networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) + cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) + err = 0 + assert len(cugraph_bc) == len(networkx_bc) + for i in range(len(cugraph_bc)): + if ( + abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 + and cugraph_bc[i][0] == networkx_bc[i][0] + ): + err = err + 1 + print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") + print("Mismatches:", err) + assert err < (0.01 * len(cugraph_bc)) diff --git a/python/cugraph/tests/test_connectivity.py b/python/cugraph/tests/test_connectivity.py index 508be9bb58d..fdc1ca6d8fd 100644 --- a/python/cugraph/tests/test_connectivity.py +++ b/python/cugraph/tests/test_connectivity.py @@ -15,6 +15,7 @@ import time from collections import defaultdict import pytest +import pandas as pd import cugraph from cugraph.tests import utils @@ -35,22 +36,12 @@ def networkx_weak_call(M): - """M = M.tocsr() - if M is None: - raise TypeError('Could not read the input graph') - if M.shape[0] != M.shape[1]: - raise TypeError('Shape is not square') - - Gnx = nx.DiGraph(M)""" Gnx = nx.from_pandas_edgelist( M, source="0", target="1", create_using=nx.DiGraph() ) # Weakly Connected components call: - print("Solving... ") t1 = time.time() - - # same parameters as in NVGRAPH result = nx.weakly_connected_components(Gnx) t2 = time.time() - t1 print("Time : " + str(t2)) @@ -60,7 +51,6 @@ def networkx_weak_call(M): def cugraph_weak_call(cu_M): - # cugraph Pagerank Call G = cugraph.DiGraph() G.from_cudf_edgelist(cu_M, source="0", destination="1") t1 = time.time() @@ -79,14 +69,9 @@ def networkx_strong_call(M): M, source="0", target="1", create_using=nx.DiGraph() ) - # Weakly Connected components call: - print("Solving... ") t1 = time.time() - - # same parameters as in NVGRAPH result = nx.strongly_connected_components(Gnx) t2 = time.time() - t1 - print("Time : " + str(t2)) labels = sorted(result) @@ -205,3 +190,44 @@ def test_strong_cc(graph_file): cg_vertices = sorted(lst_cg_components[idx]) assert nx_vertices == cg_vertices + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_weak_cc_nx(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.DiGraph() + ) + + nx_wcc = nx.weakly_connected_components(Gnx) + nx_result = sorted(nx_wcc) + + cu_wcc = cugraph.weakly_connected_components(Gnx) + pdf = pd.DataFrame.from_dict(cu_wcc, orient='index').reset_index() + pdf.columns = ["vertex", "labels"] + cu_result = pdf["labels"].nunique() + + assert len(nx_result) == cu_result + + +@pytest.mark.parametrize("graph_file", utils.STRONGDATASETS) +def test_strong_cc_nx(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.DiGraph() + ) + + nx_scc = nx.strongly_connected_components(Gnx) + nx_result = sorted(nx_scc) + + cu_scc = cugraph.strongly_connected_components(Gnx) + + pdf = pd.DataFrame.from_dict(cu_scc, orient='index').reset_index() + pdf.columns = ["vertex", "labels"] + cu_result = pdf["labels"].nunique() + + assert len(nx_result) == cu_result diff --git a/python/cugraph/tests/test_core_number.py b/python/cugraph/tests/test_core_number.py index c1b8702836f..edbc7b0597b 100644 --- a/python/cugraph/tests/test_core_number.py +++ b/python/cugraph/tests/test_core_number.py @@ -15,6 +15,7 @@ import pytest import cugraph from cugraph.tests import utils +from cugraph.utilities import df_score_to_dictionary # Temporarily suppress warnings till networkX fixes deprecation warnings # (Using or importing the ABCs from 'collections' instead of from @@ -31,19 +32,38 @@ print("Networkx version : {} ".format(nx.__version__)) -def calc_core_number(graph_file): +def calc_nx_core_number(graph_file): + NM = utils.read_csv_for_nx(graph_file) + Gnx = nx.from_pandas_edgelist( + NM, source="0", target="1", create_using=nx.Graph() + ) + nc = nx.core_number(Gnx) + return nc + + +def calc_cg_core_number(graph_file): M = utils.read_csv_file(graph_file) - G = cugraph.DiGraph() + G = cugraph.Graph() G.from_cudf_edgelist(M, source="0", destination="1") cn = cugraph.core_number(G) - cn = cn.sort_values("vertex").reset_index(drop=True) + return cn + +def calc_core_number(graph_file): NM = utils.read_csv_for_nx(graph_file) Gnx = nx.from_pandas_edgelist( NM, source="0", target="1", create_using=nx.Graph() ) nc = nx.core_number(Gnx) + + M = utils.read_csv_file(graph_file) + G = cugraph.Graph() + G.from_cudf_edgelist(M, source="0", destination="1") + + cn = cugraph.core_number(G) + cn = cn.sort_values("vertex").reset_index(drop=True) + pdf = [nc[k] for k in sorted(nc.keys())] cn["nx_core_number"] = pdf cn = cn.rename(columns={"core_number": "cu_core_number"}, copy=False) @@ -62,6 +82,24 @@ def calc_core_number(graph_file): def test_core_number(graph_file): gc.collect() - cn = calc_core_number(graph_file) + nx_num = calc_nx_core_number(graph_file) + cg_num = calc_cg_core_number(graph_file) + + # convert cugraph dataframe to a dictionary + cg_num_dic = df_score_to_dictionary(cg_num, k="core_number") + + assert cg_num_dic == nx_num + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_core_number_nx(graph_file): + gc.collect() + + NM = utils.read_csv_for_nx(graph_file) + Gnx = nx.from_pandas_edgelist( + NM, source="0", target="1", create_using=nx.Graph() + ) + nc = nx.core_number(Gnx) + cc = cugraph.core_number(Gnx) - assert cn["cu_core_number"].equals(cn["nx_core_number"]) + assert nc == cc diff --git a/python/cugraph/tests/test_ecg.py b/python/cugraph/tests/test_ecg.py index b5c590a689a..4dc01c389cc 100644 --- a/python/cugraph/tests/test_ecg.py +++ b/python/cugraph/tests/test_ecg.py @@ -15,6 +15,7 @@ import pytest +import networkx as nx import cugraph from cugraph.tests import utils @@ -66,3 +67,20 @@ def test_ecg_clustering(graph_file, min_weight, ensemble_size): # Assert that the partitioning has better modularity than the random # assignment assert cu_score > (0.95 * golden_score) + + +@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("min_weight", MIN_WEIGHTS) +@pytest.mark.parametrize("ensemble_size", ENSEMBLE_SIZES) +def test_ecg_clustering_nx(graph_file, min_weight, ensemble_size): + gc.collect() + + # Read in the graph and get a NetworkX graph + M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + G = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.Graph() + ) + + # Get the modularity score for partitioning versus random assignment + _ = cugraph.ecg(G, min_weight, ensemble_size, "weight") diff --git a/python/cugraph/tests/test_edge_betweenness_centrality.py b/python/cugraph/tests/test_edge_betweenness_centrality.py index e23fdc210ff..6165705a9b0 100644 --- a/python/cugraph/tests/test_edge_betweenness_centrality.py +++ b/python/cugraph/tests/test_edge_betweenness_centrality.py @@ -458,3 +458,28 @@ def test_edge_betweenness_invalid_dtype( result_dtype=result_dtype, ) compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc") + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_SMALL) +def test_edge_betweenness_centrality_nx(graph_file): + prepare_test() + + Gnx = utils.generate_nx_graph_from_file(graph_file) + + nx_bc = nx.edge_betweenness_centrality(Gnx) + cu_bc = cugraph.edge_betweenness_centrality(Gnx) + + # Calculating mismatch + networkx_bc = sorted(nx_bc.items(), key=lambda x: x[0]) + cugraph_bc = sorted(cu_bc.items(), key=lambda x: x[0]) + err = 0 + assert len(cugraph_bc) == len(networkx_bc) + for i in range(len(cugraph_bc)): + if ( + abs(cugraph_bc[i][1] - networkx_bc[i][1]) > 0.01 + and cugraph_bc[i][0] == networkx_bc[i][0] + ): + err = err + 1 + print(f"{cugraph_bc[i][1]} and {cugraph_bc[i][1]}") + print("Mismatches:", err) + assert err < (0.01 * len(cugraph_bc)) diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index 44c856cf3dc..23c8bb56939 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -384,12 +384,12 @@ def test_view_edge_list_for_Graph(graph_file): # Test +@pytest.mark.skip(reason="skipping while new Nx framework is being worked") @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_networkx_compatibility(graph_file): gc.collect() # test from_cudf_edgelist() - M = utils.read_csv_for_nx(graph_file) df = pd.DataFrame() diff --git a/python/cugraph/tests/test_hits.py b/python/cugraph/tests/test_hits.py index c8a9274e078..30b6f20f478 100644 --- a/python/cugraph/tests/test_hits.py +++ b/python/cugraph/tests/test_hits.py @@ -137,3 +137,20 @@ def test_hits(graph_file, max_iter, tol): assert cugraph_hits["authorities"].is_monotonic_decreasing assert cugraph_hits["nx_authorities"].is_monotonic_decreasing + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +@pytest.mark.parametrize("max_iter", MAX_ITERATIONS) +@pytest.mark.parametrize("tol", TOLERANCE) +def test_hits_nx(graph_file, max_iter, tol): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.DiGraph() + ) + nx_hubs, nx_authorities = nx.hits(Gnx, max_iter, tol, normalized=True) + cg_hubs, cg_authorities = cugraph.hits(Gnx, max_iter, tol, normalized=True) + + # assert nx_hubs == cg_hubs + # assert nx_authorities == cg_authorities diff --git a/python/cugraph/tests/test_jaccard.py b/python/cugraph/tests/test_jaccard.py index 7cb7b274434..d5de073189d 100644 --- a/python/cugraph/tests/test_jaccard.py +++ b/python/cugraph/tests/test_jaccard.py @@ -197,3 +197,24 @@ def test_jaccard_two_hop_edge_vals(graph_file): for i in range(len(df)): diff = abs(nx_coeff[i] - df["jaccard_coeff"].iloc[i]) assert diff < 1.0e-6 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_jaccard_nx(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.Graph() + ) + + nx_j = nx.jaccard_coefficient(Gnx) + nv_js = sorted(nx_j, key=len, reverse=True) + + cg_j = cugraph.jaccard_coefficient(Gnx) + + assert len(nv_js) > len(cg_j) + + # FIXME: Nx does a full all-pair Jaccard. + # cuGraph does a limited 1-hop Jaccard + # assert nx_j == cg_j diff --git a/python/cugraph/tests/test_k_core.py b/python/cugraph/tests/test_k_core.py index 59f0b3fb301..c05cb1dd86e 100644 --- a/python/cugraph/tests/test_k_core.py +++ b/python/cugraph/tests/test_k_core.py @@ -89,3 +89,17 @@ def test_core_number_Graph(graph_file): cu_kcore, nx_kcore = calc_k_cores(graph_file, False) assert compare_edges(cu_kcore, nx_kcore) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_core_number_Graph_nx(graph_file): + gc.collect() + + NM = utils.read_csv_for_nx(graph_file) + Gnx = nx.from_pandas_edgelist( + NM, source="0", target="1", create_using=nx.Graph() + ) + nc = nx.k_core(Gnx) + cc = cugraph.k_core(Gnx) + + assert nx.is_isomorphic(nc, cc) diff --git a/python/cugraph/tests/test_k_truss_subgraph.py b/python/cugraph/tests/test_k_truss_subgraph.py index 314a4f62618..e9ccac81cf6 100644 --- a/python/cugraph/tests/test_k_truss_subgraph.py +++ b/python/cugraph/tests/test_k_truss_subgraph.py @@ -46,16 +46,7 @@ def ktruss_ground_truth(graph_file): return df -def cugraph_k_truss_subgraph(graph_file, k): - cu_M = utils.read_csv_file(graph_file) - G = cugraph.Graph() - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - k_subgraph = cugraph.ktruss_subgraph(G, k) - return k_subgraph - - -def compare_k_truss(graph_file, k, ground_truth_file): - k_truss_cugraph = cugraph_k_truss_subgraph(graph_file, k) +def compare_k_truss(k_truss_cugraph, k, ground_truth_file): k_truss_nx = ktruss_ground_truth(ground_truth_file) edgelist_df = k_truss_cugraph.view_edge_list() @@ -82,4 +73,29 @@ def compare_k_truss(graph_file, k, ground_truth_file): def test_ktruss_subgraph_Graph(graph_file, nx_ground_truth): gc.collect() - compare_k_truss(graph_file, 5, nx_ground_truth) + k = 5 + cu_M = utils.read_csv_file(graph_file) + G = cugraph.Graph() + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + k_subgraph = cugraph.ktruss_subgraph(G, k) + + compare_k_truss(k_subgraph, k, nx_ground_truth) + + +@pytest.mark.parametrize("graph_file, nx_ground_truth", utils.DATASETS_KTRUSS) +def test_ktruss_subgraph_Graph_nx(graph_file, nx_ground_truth): + gc.collect() + + k = 5 + M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + G = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.Graph() + ) + k_subgraph = cugraph.k_truss(G, k) + df = nx.to_pandas_edgelist(k_subgraph) + + k_truss_nx = nx.k_truss(G, k) + nx_df = nx.to_pandas_edgelist(k_truss_nx) + + assert len(df) == len(nx_df) diff --git a/python/cugraph/tests/test_katz_centrality.py b/python/cugraph/tests/test_katz_centrality.py index 62f30e22a57..a2a03c1518b 100644 --- a/python/cugraph/tests/test_katz_centrality.py +++ b/python/cugraph/tests/test_katz_centrality.py @@ -80,3 +80,35 @@ def test_katz_centrality(graph_file): topKCU = topKVertices(katz_scores, "cu_katz", 10) assert topKNX.equals(topKCU) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_katz_centrality_nx(graph_file): + gc.collect() + + NM = utils.read_csv_for_nx(graph_file) + Gnx = nx.from_pandas_edgelist( + NM, create_using=nx.DiGraph(), source="0", target="1" + ) + + G = cugraph.utilities.convert_from_nx(Gnx) + largest_out_degree = G.degrees().nlargest(n=1, columns="out_degree") + largest_out_degree = largest_out_degree["out_degree"].iloc[0] + katz_alpha = 1 / (largest_out_degree + 1) + + nk = nx.katz_centrality(Gnx, alpha=katz_alpha) + ck = cugraph.katz_centrality(Gnx, alpha=None, max_iter=1000) + + # Calculating mismatch + nk = sorted(nk.items(), key=lambda x: x[0]) + ck = sorted(ck.items(), key=lambda x: x[0]) + err = 0 + assert len(ck) == len(nk) + for i in range(len(ck)): + if ( + abs(ck[i][1] - nk[i][1]) > 0.1 + and ck[i][0] == nk[i][0] + ): + err = err + 1 + print("Mismatches:", err) + assert err < (0.1 * len(ck)) diff --git a/python/cugraph/tests/test_leiden.py b/python/cugraph/tests/test_leiden.py index 7f7b4b577fe..d6a7f86b5c5 100644 --- a/python/cugraph/tests/test_leiden.py +++ b/python/cugraph/tests/test_leiden.py @@ -16,6 +16,7 @@ import pytest +import networkx as nx import cugraph from cugraph.tests import utils @@ -30,34 +31,24 @@ warnings.filterwarnings("ignore", category=DeprecationWarning) -def cugraph_leiden(cu_M, edgevals=False): +def cugraph_leiden(G, edgevals=False): - G = cugraph.Graph() - if edgevals: - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - else: - G.from_cudf_edgelist(cu_M, source="0", destination="1") # cugraph Louvain Call t1 = time.time() parts, mod = cugraph.leiden(G) t2 = time.time() - t1 - print("Cugraph Time : " + str(t2)) + print("Cugraph Leiden Time : " + str(t2)) return parts, mod -def cugraph_louvain(cu_M, edgevals=False): +def cugraph_louvain(G, edgevals=False): - G = cugraph.Graph() - if edgevals: - G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") - else: - G.from_cudf_edgelist(cu_M, source="0", destination="1") # cugraph Louvain Call t1 = time.time() parts, mod = cugraph.louvain(G) t2 = time.time() - t1 - print("Cugraph Time : " + str(t2)) + print("Cugraph Louvain Time : " + str(t2)) return parts, mod @@ -65,10 +56,46 @@ def cugraph_louvain(cu_M, edgevals=False): @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_leiden(graph_file): gc.collect() + edgevals = True cu_M = utils.read_csv_file(graph_file) - leiden_parts, leiden_mod = cugraph_leiden(cu_M, edgevals=True) - louvain_parts, louvain_mod = cugraph_louvain(cu_M, edgevals=True) + + G = cugraph.Graph() + if edgevals: + G.from_cudf_edgelist(cu_M, source="0", destination="1", edge_attr="2") + else: + G.from_cudf_edgelist(cu_M, source="0", destination="1") + + leiden_parts, leiden_mod = cugraph_leiden(G, edgevals=True) + louvain_parts, louvain_mod = cugraph_louvain(G, edgevals=True) + + # Calculating modularity scores for comparison + assert leiden_mod >= (0.99 * louvain_mod) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_leiden_nx(graph_file): + gc.collect() + edgevals = True + + NM = utils.read_csv_for_nx(graph_file) + + if edgevals: + G = nx.from_pandas_edgelist(NM, + create_using=nx.Graph(), + source="0", + target="1" + ) + else: + G = nx.from_pandas_edgelist(NM, + create_using=nx.Graph(), + source="0", + target="1", + edge_attr="2" + ) + + leiden_parts, leiden_mod = cugraph_leiden(G, edgevals=True) + louvain_parts, louvain_mod = cugraph_louvain(G, edgevals=True) # Calculating modularity scores for comparison assert leiden_mod >= (0.99 * louvain_mod) diff --git a/python/cugraph/tests/test_nx_convert.py b/python/cugraph/tests/test_nx_convert.py new file mode 100644 index 00000000000..5c47c9eb5e2 --- /dev/null +++ b/python/cugraph/tests/test_nx_convert.py @@ -0,0 +1,66 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc +import pytest + +import cugraph +from cugraph.tests import utils + +# Temporarily suppress warnings till networkX fixes deprecation warnings +# (Using or importing the ABCs from 'collections' instead of from +# 'collections.abc' is deprecated, and in 3.8 it will stop working) for +# python 3.7. Also, this import networkx needs to be relocated in the +# third-party group once this gets fixed. +import warnings + +with warnings.catch_warnings(): + warnings.filterwarnings("ignore", category=DeprecationWarning) + import networkx as nx + + +# Test +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_nx_convert(graph_file): + gc.collect() + + # read data and create a Nx Graph + nx_df = utils.read_csv_for_nx(graph_file) + nxG = nx.from_pandas_edgelist(nx_df, "0", "1") + + cuG = cugraph.utilities.convert_from_nx(nxG) + + assert nxG.number_of_nodes() == cuG.number_of_nodes() + assert nxG.number_of_edges() == cuG.number_of_edges() + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_nx_convert_multicol(graph_file): + gc.collect() + + # read data and create a Nx Graph + nx_df = utils.read_csv_for_nx(graph_file) + + G = nx.DiGraph() + + for row in nx_df.iterrows(): + G.add_edge( + row[1]["0"], row[1]["1"], count=[row[1]["0"], row[1]["1"]] + ) + + nxG = nx.from_pandas_edgelist(nx_df, "0", "1") + + cuG = cugraph.utilities.convert_from_nx(nxG) + + assert nxG.number_of_nodes() == cuG.number_of_nodes() + assert nxG.number_of_edges() == cuG.number_of_edges() diff --git a/python/cugraph/tests/test_pagerank.py b/python/cugraph/tests/test_pagerank.py index b58ec2d9bc9..5a5de379f9d 100644 --- a/python/cugraph/tests/test_pagerank.py +++ b/python/cugraph/tests/test_pagerank.py @@ -46,10 +46,8 @@ def cudify(d): return cuD -def cugraph_call(cu_M, max_iter, tol, alpha, personalization, nstart): +def cugraph_call(G, max_iter, tol, alpha, personalization, nstart): # cugraph Pagerank Call - G = cugraph.DiGraph() - G.from_cudf_edgelist(cu_M, source="0", destination="1") t1 = time.time() df = cugraph.pagerank( G, @@ -74,26 +72,32 @@ def cugraph_call(cu_M, max_iter, tol, alpha, personalization, nstart): return sorted_pr +# need a different function since the Nx version returns a dictionary +def cugraph_nx_call(G, max_iter, tol, alpha, personalization, nstart): + # cugraph Pagerank Call + t1 = time.time() + pr = cugraph.pagerank( + G, + alpha=alpha, + max_iter=max_iter, + tol=tol, + personalization=personalization, + nstart=nstart, + ) + t2 = time.time() - t1 + print("Cugraph Time : " + str(t2)) + + return pr + + # The function selects personalization_perc% of accessible vertices in graph M # and randomly assigns them personalization values -def networkx_call(M, max_iter, tol, alpha, personalization_perc): - """nnz_per_row = {r: 0 for r in range(M.get_shape()[0])} - for nnz in range(M.getnnz()): - nnz_per_row[M.row[nnz]] = 1 + nnz_per_row[M.row[nnz]] - for nnz in range(M.getnnz()): - M.data[nnz] = 1.0/float(nnz_per_row[M.row[nnz]]) - - M = M.tocsr() - if M is None: - raise TypeError('Could not read the input graph') - if M.shape[0] != M.shape[1]: - raise TypeError('Shape is not square') - """ +def networkx_call(Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx): + personalization = None if personalization_perc != 0: personalization = {} - nnz_vtx = np.unique(M) - print(nnz_vtx) + # print(nnz_vtx) personalization_count = int( (nnz_vtx.size * personalization_perc) / 100.0 ) @@ -101,34 +105,18 @@ def networkx_call(M, max_iter, tol, alpha, personalization_perc): nnz_vtx = np.random.choice( nnz_vtx, min(nnz_vtx.size, personalization_count), replace=False ) - print(nnz_vtx) + # print(nnz_vtx) nnz_val = np.random.random(nnz_vtx.size) nnz_val = nnz_val / sum(nnz_val) - print(nnz_val) + # print(nnz_val) for vtx, val in zip(nnz_vtx, nnz_val): personalization[vtx] = val - # should be autosorted, but check just to make sure - """if not M.has_sorted_indices: - print('sort_indices ... ') - M.sort_indices() - """ - # in NVGRAPH tests we read as CSR and feed as CSC, - # so here we do this explicitly - print("Format conversion ... ") - - # Directed NetworkX graph - Gnx = nx.from_pandas_edgelist( - M, source="0", target="1", create_using=nx.DiGraph() - ) - z = {k: 1.0 / Gnx.number_of_nodes() for k in range(Gnx.number_of_nodes())} # Networkx Pagerank Call - print("Solving... ") t1 = time.time() - # same parameters as in NVGRAPH pr = nx.pagerank( Gnx, alpha=alpha, @@ -170,9 +158,15 @@ def test_pagerank( ): gc.collect() + # NetworkX PageRank M = utils.read_csv_for_nx(graph_file) + nnz_vtx = np.unique(M) + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.DiGraph() + ) + networkx_pr, networkx_prsn = networkx_call( - M, max_iter, tol, alpha, personalization_perc + Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx ) cu_nstart = None @@ -180,12 +174,62 @@ def test_pagerank( cu_nstart = cudify(networkx_pr) max_iter = 5 cu_prsn = cudify(networkx_prsn) + + # cuGraph PageRank cu_M = utils.read_csv_file(graph_file) - cugraph_pr = cugraph_call(cu_M, max_iter, tol, alpha, cu_prsn, cu_nstart) + G = cugraph.DiGraph() + G.from_cudf_edgelist(cu_M, source="0", destination="1") + + cugraph_pr = cugraph_call(G, max_iter, tol, alpha, cu_prsn, cu_nstart) # Calculating mismatch + networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0]) + err = 0 + assert len(cugraph_pr) == len(networkx_pr) + for i in range(len(cugraph_pr)): + if ( + abs(cugraph_pr[i][1] - networkx_pr[i][1]) > tol * 1.1 + and cugraph_pr[i][0] == networkx_pr[i][0] + ): + err = err + 1 + print("Mismatches:", err) + assert err < (0.01 * len(cugraph_pr)) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +@pytest.mark.parametrize("max_iter", MAX_ITERATIONS) +@pytest.mark.parametrize("tol", TOLERANCE) +@pytest.mark.parametrize("alpha", ALPHA) +@pytest.mark.parametrize("personalization_perc", PERSONALIZATION_PERC) +@pytest.mark.parametrize("has_guess", HAS_GUESS) +def test_pagerank_nx( + graph_file, max_iter, tol, alpha, personalization_perc, has_guess +): + gc.collect() + + # NetworkX PageRank + M = utils.read_csv_for_nx(graph_file) + nnz_vtx = np.unique(M) + Gnx = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.DiGraph() + ) + + networkx_pr, networkx_prsn = networkx_call( + Gnx, max_iter, tol, alpha, personalization_perc, nnz_vtx + ) + cu_nstart = None + if has_guess == 1: + cu_nstart = cudify(networkx_pr) + max_iter = 5 + cu_prsn = cudify(networkx_prsn) + + # cuGraph PageRank with Nx Graph + cugraph_pr = cugraph_nx_call(Gnx, max_iter, tol, alpha, cu_prsn, cu_nstart) + + # Calculating mismatch networkx_pr = sorted(networkx_pr.items(), key=lambda x: x[0]) + cugraph_pr = sorted(cugraph_pr.items(), key=lambda x: x[0]) err = 0 assert len(cugraph_pr) == len(networkx_pr) for i in range(len(cugraph_pr)): @@ -194,5 +238,6 @@ def test_pagerank( and cugraph_pr[i][0] == networkx_pr[i][0] ): err = err + 1 + print(f"{cugraph_pr[i][1]} and {cugraph_pr[i][1]}") print("Mismatches:", err) assert err < (0.01 * len(cugraph_pr)) diff --git a/python/cugraph/tests/test_triangle_count.py b/python/cugraph/tests/test_triangle_count.py index 975ddd82470..ff28f55838d 100644 --- a/python/cugraph/tests/test_triangle_count.py +++ b/python/cugraph/tests/test_triangle_count.py @@ -84,3 +84,21 @@ def test_triangles_edge_vals(graph_file): cu_count = cugraph_call(M, edgevals=True) nx_count = networkx_call(M) assert cu_count == nx_count + + +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_triangles_nx(graph_file): + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + G = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.Graph() + ) + + cu_count = cugraph.triangles(G) + dic = nx.triangles(G) + nx_count = 0 + for i in dic.keys(): + nx_count += dic[i] + + assert cu_count == nx_count diff --git a/python/cugraph/utilities/__init__.py b/python/cugraph/utilities/__init__.py index 19b7c347420..c42e28dd2cd 100644 --- a/python/cugraph/utilities/__init__.py +++ b/python/cugraph/utilities/__init__.py @@ -13,3 +13,9 @@ # from cugraph.utilities.grmat import grmat_gen # from cugraph.utilities.pointer_utils import device_of_gpu_pointer +from cugraph.utilities.nx_factory import convert_from_nx +from cugraph.utilities.nx_factory import check_nx_graph +from cugraph.utilities.nx_factory import df_score_to_dictionary +from cugraph.utilities.nx_factory import df_edge_score_to_dictionary +from cugraph.utilities.nx_factory import cugraph_to_nx +from cugraph.utilities.nx_factory import is_networkx_graph diff --git a/python/cugraph/utilities/nx_factory.py b/python/cugraph/utilities/nx_factory.py new file mode 100644 index 00000000000..e880df5f32e --- /dev/null +++ b/python/cugraph/utilities/nx_factory.py @@ -0,0 +1,164 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import cugraph +from cudf import from_pandas + + +def convert_from_nx(nxG, weight=None): + if type(nxG) == nx.classes.graph.Graph: + G = cugraph.Graph() + elif type(nxG) == nx.classes.digraph.DiGraph: + G = cugraph.DiGraph() + else: + raise ValueError("nxG does not appear to be a NetworkX graph type") + + pdf = nx.to_pandas_edgelist(nxG) + num_col = len(pdf.columns) + + if num_col < 2: + raise ValueError("NetworkX graph did not contain edges") + + if weight is None: + num_col == 2 + pdf = pdf[["source", "target"]] + + if num_col >= 3 and weight is not None: + pdf = pdf[["source", "target", weight]] + num_col = 3 + + gdf = from_pandas(pdf) + + if num_col == 2: + G.from_cudf_edgelist(gdf, "source", "target") + else: + G.from_cudf_edgelist(gdf, "source", "target", weight) + + del gdf + del pdf + + return G + + +def is_networkx_graph(G): + return isinstance(G, nx.classes.graph.Graph) + + +def check_nx_graph(G, weight=None): + """ + This is a convenience function that will ensure the proper graph type + + Parameters + ---------- + G : cudf.Graph or networkx.Graph + weight : str or None + which column to use for weight. Default is None + + Returns + ------- + G : cudf.Graph + returns a cugraph.Graph that is either the orginal input or + a conversion from NetworkX + + is_nx : Boolean + indicates rather or not the Graph was converted + """ + + if isinstance(G, nx.classes.graph.Graph): + return convert_from_nx(G, weight), True + else: + return G, False + + +def df_score_to_dictionary(df, k, v="vertex"): + """ + Convert a dataframe to a dictionary + + Parameters + ---------- + df : cudf.DataFrame + GPU data frame containing two cudf.Series of size V: the vertex + identifiers and the corresponding score values. + Please note that the resulting the 'vertex' column might not be + in ascending order. + + df['vertex'] : cudf.Series + Contains the vertex identifiers + df[..] : cudf.Series + Contains the scores of the vertices + + k : str + score column name + v : str + the vertex column name. Default is "vertex" + + + Returns + ------- + dict : Dictionary of vertices and score + + """ + df = df.sort_values(by=v) + return df.to_pandas().set_index(v).to_dict()[k] + + +def df_edge_score_to_dictionary(df, k, src="src", dst="dst"): + """ + Convert a dataframe to a dictionary + + Parameters + ---------- + df : cudf.DataFrame + GPU data frame containing two cudf.Series of size V: the vertex + identifiers and the corresponding score values. + Please note that the resulting the 'vertex' column might not be + in ascending order. + + df['vertex'] : cudf.Series + Contains the vertex identifiers + df[X] : cudf.Series + Contains the scores of the vertices + + k : str + score column name + + src : str + source column name + dst : str + destination column name + + + Returns + ------- + dict : Dictionary of vertices and score + + """ + pdf = df.sort_values(by=[src, dst]).to_pandas() + d = {} + for i in range(len(pdf)): + d[(pdf[src][i], pdf[dst][i])] = pdf[k][i] + + return d + + +def cugraph_to_nx(G): + pdf = G.view_edge_list().to_pandas() + num_col = len(pdf.columns) + + if num_col == 2: + Gnx = nx.from_pandas_edgelist(pdf, source="src", target="dst") + else: + Gnx = nx.from_pandas_edgelist(pdf, source="src", target="dst", + edge_attr="weights") + + return Gnx From 0e9312451f1cd934c02022699639a524925ebfe8 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Fri, 25 Sep 2020 12:47:12 -0400 Subject: [PATCH 57/74] Louvain API update to use graph_container_t (#1157) * update louvain API * try a more helpful error message to diagnose CI issue --- CHANGELOG.md | 1 + cpp/include/algorithms.hpp | 23 ++--- cpp/include/utilities/cython.hpp | 12 ++- cpp/src/community/louvain.cu | 73 +++++++++++++- cpp/src/experimental/louvain.cuh | 54 +++++++++++ cpp/src/utilities/cython.cpp | 94 +++++++++++++------ python/cugraph/community/louvain.pxd | 8 +- python/cugraph/community/louvain_wrapper.pyx | 59 +++++++----- python/cugraph/dask/community/louvain.pxd | 3 +- .../dask/community/louvain_wrapper.pyx | 5 +- python/cugraph/structure/graph_primtypes.pxd | 4 +- python/cugraph/tests/test_graph.py | 11 ++- python/cugraph/tests/test_louvain.py | 1 + 13 files changed, 259 insertions(+), 89 deletions(-) create mode 100644 cpp/src/experimental/louvain.cuh diff --git a/CHANGELOG.md b/CHANGELOG.md index a05aa7711b9..c640407fc6e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ - PR #1098 Add new graph classes to support 2D partitioning - PR #1124 Sub-communicator initialization for 2D partitioning support - PR #1147 Added support for NetworkX graphs as input type +- PR #1157 Louvain API update to use graph_container_t ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 457aebb27a1..1331f12ff07 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -616,33 +616,30 @@ void bfs(raft::handle_t const &handle, * * @throws cugraph::logic_error when an error occurs. * - * @tparam vertex_t Type of vertex identifiers. - * Supported value : int (signed, 32-bit) - * @tparam edge_t Type of edge identifiers. - * Supported value : int (signed, 32-bit) - * @tparam weight_t Type of edge weights. Supported values : float or double. + * @tparam graph_t Type of graph * * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, * @param[in] graph input graph object (CSR) * @param[out] clustering Pointer to device array where the clustering should be stored - * @param[in] max_iter (optional) maximum number of iterations to run (default 100) + * @param[in] max_level (optional) maximum number of levels to run (default 100) * @param[in] resolution (optional) The value of the resolution parameter to use. * Called gamma in the modularity formula, this changes the size * of the communities. Higher resolutions lead to more smaller * communities, lower resolutions lead to fewer larger - * communities. (default 1) + * communities. (default 1) * * @return a pair containing: * 1) number of levels of the returned clustering * 2) modularity of the returned clustering * */ -template -std::pair louvain(raft::handle_t const &handle, - GraphCSRView const &graph, - vertex_t *clustering, - size_t max_iter = 100, - weight_t resolution = weight_t{1}); +template +std::pair louvain( + raft::handle_t const &handle, + graph_t const &graph, + typename graph_t::vertex_type *clustering, + size_t max_level = 100, + typename graph_t::weight_type resolution = typename graph_t::weight_type{1}); /** * @brief Leiden implementation diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index cda8c52c1d8..b53ef8451d7 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -96,6 +96,8 @@ struct graph_container_t { graph_container_t(const graph_container_t&) = delete; graph_container_t& operator=(const graph_container_t&) = delete; + void get_vertex_identifiers(void* c_identifier); + graphPtrUnion graph_ptr_union; graphTypeEnum graph_ptr_type; }; @@ -178,11 +180,11 @@ void populate_graph_container(graph_container_t& graph_container, // Wrapper for calling Louvain using a graph container template -weight_t call_louvain(raft::handle_t const& handle, - graph_container_t const& graph_container, - void* parts, - size_t max_level, - weight_t resolution); +std::pair call_louvain(raft::handle_t const& handle, + graph_container_t const& graph_container, + void* parts, + size_t max_level, + weight_t resolution); } // namespace cython } // namespace cugraph diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 1f193e9a2f9..2360544dc29 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -15,25 +15,56 @@ */ #include +#include namespace cugraph { +namespace detail { + template std::pair louvain(raft::handle_t const &handle, - GraphCSRView const &graph, + GraphCSRView const &graph_view, vertex_t *clustering, size_t max_level, weight_t resolution) { - CUGRAPH_EXPECTS(graph.edge_data != nullptr, + CUGRAPH_EXPECTS(graph_view.edge_data != nullptr, "Invalid input argument: louvain expects a weighted graph"); CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); - Louvain> runner(handle, graph); + Louvain> runner(handle, graph_view); + return runner(clustering, max_level, resolution); +} + +template +std::pair louvain( + raft::handle_t const &handle, + experimental::graph_view_t const &graph_view, + vertex_t *clustering, + size_t max_level, + weight_t resolution) +{ + CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + experimental::Louvain> + runner(handle, graph_view); return runner(clustering, max_level, resolution); } +} // namespace detail + +template +std::pair louvain(raft::handle_t const &handle, + graph_t const &graph, + typename graph_t::vertex_type *clustering, + size_t max_level, + typename graph_t::weight_type resolution) +{ + CUGRAPH_EXPECTS(clustering != nullptr, "Invalid input argument: clustering is null"); + + return detail::louvain(handle, graph, clustering, max_level, resolution); +} + // Explicit template instantations template std::pair louvain( raft::handle_t const &, GraphCSRView const &, int32_t *, size_t, float); @@ -42,5 +73,41 @@ template std::pair louvain(raft::handle_t const &, int32_t *, size_t, double); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int32_t *, + size_t, + float); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int32_t *, + size_t, + double); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int32_t *, + size_t, + float); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int32_t *, + size_t, + double); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int64_t *, + size_t, + float); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int64_t *, + size_t, + double); } // namespace cugraph diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh new file mode 100644 index 00000000000..cadc685b119 --- /dev/null +++ b/cpp/src/experimental/louvain.cuh @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace cugraph { +namespace experimental { + +template +class Louvain { + public: + using graph_view_t = graph_view_type; + using vertex_t = typename graph_view_t::vertex_type; + using edge_t = typename graph_view_t::edge_type; + using weight_t = typename graph_view_t::weight_type; + using graph_t = experimental::graph_t; + + Louvain(raft::handle_t const &handle, graph_view_t const &graph_view) + : handle_(handle), current_graph_view_(graph_view) + { + } + + virtual std::pair operator()(vertex_t *d_cluster_vec, + size_t max_level, + weight_t resolution) + { + CUGRAPH_FAIL("unimplemented"); + } + + protected: + raft::handle_t const &handle_; + graph_view_t current_graph_view_; +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index c3b494aa467..166ce7792a7 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -232,53 +232,85 @@ void populate_graph_container(graph_container_t& graph_container, } } +void graph_container_t::get_vertex_identifiers(void* c_identifier) +{ + CUGRAPH_EXPECTS(graph_ptr_type != graphTypeEnum::null, + "get_vertex_identifiers() cannot be called on an uninitialized container"); + + switch (graph_ptr_type) { + case graphTypeEnum::GraphCSRViewFloat: { + graph_ptr_union.GraphCSRViewFloatPtr->get_vertex_identifiers( + static_cast(c_identifier)); + } break; + case graphTypeEnum::GraphCSCViewFloat: { + graph_ptr_union.GraphCSCViewFloatPtr->get_vertex_identifiers( + static_cast(c_identifier)); + } break; + case graphTypeEnum::GraphCOOViewFloat: { + graph_ptr_union.GraphCOOViewFloatPtr->get_vertex_identifiers( + static_cast(c_identifier)); + } break; + case graphTypeEnum::GraphCSRViewDouble: { + graph_ptr_union.GraphCSRViewDoublePtr->get_vertex_identifiers( + static_cast(c_identifier)); + } break; + case graphTypeEnum::GraphCSCViewDouble: { + graph_ptr_union.GraphCSCViewDoublePtr->get_vertex_identifiers( + static_cast(c_identifier)); + } break; + case graphTypeEnum::GraphCOOViewDouble: { + graph_ptr_union.GraphCOOViewDoublePtr->get_vertex_identifiers( + static_cast(c_identifier)); + } break; + default: { + CUGRAPH_FAIL("unexpected weight type"); + } + } +} + // Wrapper for calling Louvain using a graph container template -weight_t call_louvain(raft::handle_t const& handle, - graph_container_t const& graph_container, - void* parts, - size_t max_level, - weight_t resolution) +std::pair call_louvain(raft::handle_t const& handle, + graph_container_t const& graph_container, + void* parts, + size_t max_level, + weight_t resolution) { - weight_t final_modularity; + std::pair results; // FIXME: the only graph types currently in the container have ints for // vertex_t and edge_t types. In the future, additional types for vertices and // edges will be available, and when that happens, additional castings will be // needed for the 'parts' arg in particular. For now, it is hardcoded to int. if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { - std::pair results = - louvain(handle, - *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), - reinterpret_cast(parts), - max_level, - static_cast(resolution)); - final_modularity = results.second; + results = louvain(handle, + *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), + reinterpret_cast(parts), + max_level, + static_cast(resolution)); } else { - std::pair results = - louvain(handle, - *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), - reinterpret_cast(parts), - max_level, - static_cast(resolution)); - final_modularity = results.second; + results = louvain(handle, + *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), + reinterpret_cast(parts), + max_level, + static_cast(resolution)); } - return final_modularity; + return results; } // Explicit instantiations -template float call_louvain(raft::handle_t const& handle, - graph_container_t const& graph_container, - void* parts, - size_t max_level, - float resolution); +template std::pair call_louvain(raft::handle_t const& handle, + graph_container_t const& graph_container, + void* parts, + size_t max_level, + float resolution); -template double call_louvain(raft::handle_t const& handle, - graph_container_t const& graph_container, - void* parts, - size_t max_level, - double resolution); +template std::pair call_louvain(raft::handle_t const& handle, + graph_container_t const& graph_container, + void* parts, + size_t max_level, + double resolution); } // namespace cython } // namespace cugraph diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index 416ff7d331f..7b15b87f62b 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -21,11 +21,11 @@ from libcpp.utility cimport pair from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef pair[size_t, weight_t] louvain[vertex_t,edge_t,weight_t]( + cdef pair[size_t, weight_t] call_louvain[weight_t]( const handle_t &handle, - const GraphCSRView[vertex_t,edge_t,weight_t] &graph, - vertex_t *louvain_parts, + const graph_container_t &g, + void *louvain_parts, size_t max_level, weight_t resolution) except + diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx index 98d11f2e241..6a8c06b948d 100644 --- a/python/cugraph/community/louvain_wrapper.pyx +++ b/python/cugraph/community/louvain_wrapper.pyx @@ -16,7 +16,7 @@ # cython: embedsignature = True # cython: language_level = 3 -from cugraph.community.louvain cimport louvain as c_louvain +from cugraph.community cimport louvain as c_louvain from cugraph.structure.graph_primtypes cimport * from cugraph.structure import graph_primtypes_wrapper from libc.stdint cimport uintptr_t @@ -26,7 +26,7 @@ import rmm import numpy as np -def louvain(input_graph, max_iter, resolution): +def louvain(input_graph, max_level, resolution): """ Call louvain """ @@ -35,6 +35,7 @@ def louvain(input_graph, max_iter, resolution): cdef unique_ptr[handle_t] handle_ptr handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); weights = None final_modularity = None @@ -59,36 +60,50 @@ def louvain(input_graph, max_iter, resolution): cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0] cdef uintptr_t c_partition = df['partition'].__cuda_array_interface__['data'][0] cdef uintptr_t c_weights = weights.__cuda_array_interface__['data'][0] + cdef uintptr_t c_local_verts = NULL; + cdef uintptr_t c_local_edges = NULL; + cdef uintptr_t c_local_offsets = NULL; - cdef GraphCSRView[int,int,float] graph_float - cdef GraphCSRView[int,int,double] graph_double cdef float final_modularity_float = 1.0 cdef double final_modularity_double = 1.0 cdef int num_level = 0 - if weights.dtype == np.float32: - graph_float = GraphCSRView[int,int,float](c_offsets, c_indices, - c_weights, num_verts, num_edges) + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + weightTypeMap = {np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + cdef graph_container_t graph_container + + # FIXME: The excessive casting for the enum arg is needed to make cython + # understand how to pass the enum value (this is the same pattern + # used by cudf). This will not be needed with Cython 3.0 + populate_graph_container(graph_container, + ((legacyGraphTypeEnum.CSR)), + handle_[0], + c_offsets, c_indices, c_weights, + ((numberTypeEnum.intType)), + ((numberTypeEnum.intType)), + ((weightTypeMap[weights.dtype])), + num_verts, num_edges, + c_local_verts, c_local_edges, c_local_offsets, + False, True) # store_transposed, multi_gpu + + graph_container.get_vertex_identifiers(c_identifier) - graph_float.get_vertex_identifiers(c_identifier) - num_level, final_modularity_float = c_louvain(handle_ptr.get()[0], - graph_float, - c_partition, - max_iter, - resolution) + if weights.dtype == np.float32: + num_level, final_modularity_float = c_louvain.call_louvain[float](handle_[0], graph_container, + c_partition, + max_level, + resolution) final_modularity = final_modularity_float else: - graph_double = GraphCSRView[int,int,double](c_offsets, c_indices, - c_weights, num_verts, num_edges) - - graph_double.get_vertex_identifiers(c_identifier) - num_level, final_modularity_double = c_louvain(handle_ptr.get()[0], - graph_double, - c_partition, - max_iter, - resolution) + num_level, final_modularity_double = c_louvain.call_louvain[double](handle_[0], graph_container, + c_partition, + max_level, + resolution) final_modularity = final_modularity_double return df, final_modularity diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index 13f6dcc8434..1090ec18660 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -16,11 +16,12 @@ # cython: embedsignature = True # cython: language_level = 3 +from libcpp.utility cimport pair from cugraph.structure.graph_primtypes cimport * cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef weight_t call_louvain[weight_t]( + cdef pair[size_t, weight_t] call_louvain[weight_t]( const handle_t &handle, const graph_container_t &g, void *parts, diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 6ffb55dd450..ec17653e62b 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -17,7 +17,6 @@ # cython: language_level = 3 from libc.stdint cimport uintptr_t -from libcpp.pair cimport pair from cugraph.dask.community cimport louvain as c_louvain from cugraph.structure.graph_primtypes cimport * @@ -111,12 +110,12 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): False, True) # store_transposed, multi_gpu if weights.dtype == np.float32: - final_modularity_float = c_louvain.call_louvain[float]( + num_level, final_modularity_float = c_louvain.call_louvain[float]( handle_[0], graph_container, c_partition, max_level, resolution) final_modularity = final_modularity_float else: - final_modularity_double = c_louvain.call_louvain[double]( + num_level, final_modularity_double = c_louvain.call_louvain[double]( handle_[0], graph_container, c_partition, max_level, resolution) final_modularity = final_modularity_double diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index c315fe5333c..e051e3e0a4e 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -205,8 +205,8 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": CSC "cugraph::cython::legacyGraphTypeEnum::CSC" COO "cugraph::cython::legacyGraphTypeEnum::COO" - cdef struct graph_container_t: - pass + cdef cppclass graph_container_t: + void get_vertex_identifiers(void *) cdef void populate_graph_container( graph_container_t &graph_container, diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index 23c8bb56939..d8aadb56609 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -98,12 +98,13 @@ def compare_graphs(nx_graph, cu_graph): if len(edgelist_df.columns) > 2: df0 = cudf.from_pandas(nx.to_pandas_edgelist(nx_graph)) - df0 = df0.sort_values(by=["source", "target"]).reset_index(drop=True) - df1 = df.sort_values(by=["source", "target"]).reset_index(drop=True) - if not df0["weight"].equals(df1["weight"]): + merge = df.merge(df0, on=["source", "target"], + suffixes=("_cugraph", "_nx")) + print("merge = \n", merge) + print(merge[merge.weight_cugraph != merge.weight_nx]) + if not merge["weight_cugraph"].equals(merge["weight_nx"]): print('weights different') - print('df0 = \n', df0) - print('df1 = \n', df1) + print(merge[merge.weight_cugraph != merge.weight_nx]) return False return True diff --git a/python/cugraph/tests/test_louvain.py b/python/cugraph/tests/test_louvain.py index 49ef31603cd..d6b0030eb73 100644 --- a/python/cugraph/tests/test_louvain.py +++ b/python/cugraph/tests/test_louvain.py @@ -74,6 +74,7 @@ def test_louvain_with_edgevals(graph_file): M = utils.read_csv_for_nx(graph_file) cu_M = utils.read_csv_file(graph_file) cu_parts, cu_mod = cugraph_call(cu_M, edgevals=True) + nx_parts = networkx_call(M) # Calculating modularity scores for comparison Gnx = nx.from_pandas_edgelist( From 8957f800d84643abdcac78fa83db4a397be64fc9 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Fri, 25 Sep 2020 13:08:01 -0400 Subject: [PATCH 58/74] [REVIEW] ENH updating Nx compatibility testing (#1162) * move Nx test to test_nx_convert.py * better testing * update * changelog Co-authored-by: BradReesWork --- CHANGELOG.md | 1 + python/cugraph/tests/test_graph.py | 59 ------------------------- python/cugraph/tests/test_nx_convert.py | 55 +++++++++++++++++++++-- 3 files changed, 52 insertions(+), 63 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c640407fc6e..a0b3487c161 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -18,6 +18,7 @@ - PR #1139 MNMG Louvain Python updates, Cython cleanup - PR #1149 Parquet read and concat within workers - PR #1152 graph container cleanup, added arg for instantiating legacy types and switch statements to factory function +- PR #1162 enhanced networkx testing ## Bug Fixes - PR #1131 Show style checker errors with set +e diff --git a/python/cugraph/tests/test_graph.py b/python/cugraph/tests/test_graph.py index d8aadb56609..59d0d5c4e09 100644 --- a/python/cugraph/tests/test_graph.py +++ b/python/cugraph/tests/test_graph.py @@ -384,65 +384,6 @@ def test_view_edge_list_for_Graph(graph_file): ).all() -# Test -@pytest.mark.skip(reason="skipping while new Nx framework is being worked") -@pytest.mark.parametrize("graph_file", utils.DATASETS) -def test_networkx_compatibility(graph_file): - gc.collect() - - # test from_cudf_edgelist() - M = utils.read_csv_for_nx(graph_file) - - df = pd.DataFrame() - df["source"] = pd.Series(M["0"]) - df["target"] = pd.Series(M["1"]) - df["weight"] = pd.Series(M.weight) - gdf = cudf.from_pandas(df) - - Gnx = nx.from_pandas_edgelist( - df, - source="source", - target="target", - edge_attr="weight", - create_using=nx.DiGraph, - ) - G = cugraph.from_cudf_edgelist( - gdf, - source="source", - destination="target", - edge_attr="weight", - create_using=cugraph.DiGraph, - ) - - print('g from gdf = \n', gdf) - print('nx from df = \n', df) - - t1 = time.time() - assert compare_graphs(Gnx, G) - t2 = time.time() - t1 - print('compare_graphs time: ', t2) - - Gnx.clear() - G.clear() - Gnx = nx.from_pandas_edgelist( - df, source="source", target="target", create_using=nx.DiGraph - ) - G = cugraph.from_cudf_edgelist( - gdf, - source="source", - destination="target", - create_using=cugraph.DiGraph, - ) - - t1 = time.time() - assert compare_graphs(Gnx, G) - t2 = time.time() - t1 - print('compare_graphs time: ', t2) - - Gnx.clear() - G.clear() - - # Test @pytest.mark.parametrize('graph_file', utils.DATASETS) def test_consolidation(graph_file): diff --git a/python/cugraph/tests/test_nx_convert.py b/python/cugraph/tests/test_nx_convert.py index 5c47c9eb5e2..08a96a801e2 100644 --- a/python/cugraph/tests/test_nx_convert.py +++ b/python/cugraph/tests/test_nx_convert.py @@ -13,7 +13,7 @@ import gc import pytest - +import cudf import cugraph from cugraph.tests import utils @@ -29,6 +29,54 @@ import networkx as nx +def _compare_graphs(nxG, cuG, has_wt=True): + assert nxG.number_of_nodes() == cuG.number_of_nodes() + assert nxG.number_of_edges() == cuG.number_of_edges() + + cu_df = cuG.view_edge_list().to_pandas() + if has_wt is True: + cu_df = cu_df.drop(columns=["weights"]) + cu_df = cu_df.sort_values(by=["src", "dst"]).reset_index(drop=True) + + nx_df = nx.to_pandas_edgelist(nxG) + if has_wt is True: + nx_df = nx_df.drop(columns=["weight"]) + nx_df = nx_df.rename(columns={"source": "src", "target": "dst"}) + nx_df = nx_df.astype('int32') + nx_df = nx_df.sort_values(by=["src", "dst"]).reset_index(drop=True) + + assert cu_df.to_dict() == nx_df.to_dict() + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_networkx_compatibility(graph_file): + # test to make sure cuGraph and Nx build similar Graphs + + gc.collect() + + # Read in the graph + M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=True) + + # create a NetworkX DiGraph + nxG = nx.from_pandas_edgelist( + M, source="0", target="1", edge_attr="weight", + create_using=nx.DiGraph() + ) + + # create a cuGraph DiGraph + gdf = cudf.from_pandas(M) + gdf = gdf.rename(columns={"weight": "weights"}) + cuG = cugraph.from_cudf_edgelist( + gdf, + source="0", + destination="1", + edge_attr="weights", + create_using=cugraph.DiGraph, + ) + + _compare_graphs(nxG, cuG) + + # Test @pytest.mark.parametrize("graph_file", utils.DATASETS) def test_nx_convert(graph_file): @@ -36,12 +84,11 @@ def test_nx_convert(graph_file): # read data and create a Nx Graph nx_df = utils.read_csv_for_nx(graph_file) - nxG = nx.from_pandas_edgelist(nx_df, "0", "1") + nxG = nx.from_pandas_edgelist(nx_df, "0", "1", create_using=nx.DiGraph) cuG = cugraph.utilities.convert_from_nx(nxG) - assert nxG.number_of_nodes() == cuG.number_of_nodes() - assert nxG.number_of_edges() == cuG.number_of_edges() + _compare_graphs(nxG, cuG, has_wt=False) @pytest.mark.parametrize("graph_file", utils.DATASETS) From b353e1e87bd26039d44ab14e66ba025e618cde00 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 25 Sep 2020 13:24:03 -0400 Subject: [PATCH 59/74] Pattern accelerator based implementation of PageRank, Katz Centrality, BFS, & SSSP (#838) * draft pattern accelerator API for the pagerank pattern * implement pagerank using the pattern accelerator API * implement katz centrality using the pattern accelerator API * add handle to the pattern accelerator API * fix minor issues in pagerank & katz_centrality * add a pattern to support BFS * draft implementation of BFS using pattern accelerator APIs * move non-public APIs to the detail namespace * minor tweak to bfs * initial draft of sssp using pattern accelerator * merge e_op and e_pred_op and add reduce_op for bfs & sssp patterns * tweaking patterns for BFS & SSSP for better accelerator implementation * raise abstraction level for vertex queue * direction optimized to direction optimizing in BFS * update comments, class & function names, and several additional API changes to enable optimization * add FIXME comments to remove opg as a template parameter from graph analytics functions * rename frontier to better reflect that it is a froniter on adjacency matrix rows * updated pattern accelerator API for better expressiblity and to enable more performance optimizations in accelerator API implementations * remove template parameter bool opg from graph analytics * remove unnecessary code * .cuh to .cu and explicit instantiation for templated graph analytics functions * "split patterns.hpp to three files" * "add aliases vertex_type, edge_type, weight_type to Graph classes (to support e.g. GraphType::vertex_type)" * "add invalid_vertex_id and invalid_edge_id" * "add traits.hpp (initially supporting is_csr and is_csc)" * "fix typos" * add bfs.cu to CMakeLists.txt * misc. fixes * fix several compile errors * add graph_device_view class * add is_opg to graph classes * add a frontier queue class for pattern accelerators * fix bfs compile errors with pattern accelrator API * initail commit of two level pattern accelrators * fix frontier queue compile errors * few tweaks * initial commit of reduce_op.cuh * improve AdjMatrixRowFrontier implementation * first full implementation of two level patterns for BFS & SSSP * first full ipmlementation of BFS using a pattern accelerator * update copyright year and add min to reduce_op * add sssp to CMakeLists.txt * spilt two_levels_patterns.cuh to one file per pattern * thrust::raw_pointer_cast to data().get() following cuDF's convention * move pattern accelerator related files to the patterns directory * add edge_utils.cuh * add transform_reduce_e pattern accelerator implmentation * add utility functions * update bfs with pattern accelerator implementation * update sssp with pattern accelerator implementation * update graph_device_view * update queue implementation * update expand_and_transform_if_e pattern implementation * placeholder * fix merge error * move implemented patterns out from one_level_patterns.cuh * fix a conceptual bug (row in graph adjacency matrix is always source, CSC is a column major representation) * temporary commit to switch a branch * minor fixes on include statements * add experimental BFS to the test suites * use the real raft handle than the temporary placeholder * add experimental BFS test * several bug fixes * run clang-format * GraphType graph => GraphType const& graph_device_view in pattern accelerator * now BFS passes C++ tests * add depth_limit to the reference BFS implementation * run clang-format * remove dead code * fix to work with new RAFT based error handling mechanism * minor code restructuring * apply cutoff * add SSSP test * cosmetic updates to BFS test * SSSP bug fixes * SSSP code restructuring * update template bfs & sssp functions to take pointers for distances and predecessors instead of general iterators * now SSSP passes C++ tests * add fixme comments * temporary commit to change branch * bug fix in graph_device_view.cuh * compile error fix in bfs_test.cpp * add declarations for PageRank and Katz in algorithms.hpp * bug fix in is_column_major * fix namings * implement patter accelerator APIs for PageRank and Katz Centrality * remove unused file * bug fix * modify reference BFS & SSSP to take pointers instead of iterators * compute adj_matrix_out_weights_sums if not provided (nullptr) in PageRank * rename ..._v..._e to ..._v..._nbr * add utilities for atomic_add (better move this to RAFT) * update experimental SSSP test * bug fix in copy_v_transform_reduce_in|out_nbr * reorder pattern accelerator API input parameters * tweak pattern accelerator API * add PageRank tests with the pattern accelerator API * tweak katz centrality with the pattern accelerator * minor tweak for PageRank test code * add katz centrality test * remove experimental:: from graph classes * style fix (use T{} instead of static_cast for constant values * minor style fix * count_if_adj_matrix_row to any_of_adj_matrix_row (adj_matrix_row values are replicated in p_row processes assuming 2D partitioning and p = p_row * p_col, so count_if can be confusing) * AdjMatrixRowFrontier -> VertexFrontier * break update_frontier_v_push_if_out_nbr to two functions (the second part is replaced with copy_to_adj_matrix_row) * add pure_function flag to reduce_op (if this is defined and set to true, reduction_op can be executed in any GPU in OPG) * add documentation for experimental bfs, sssp, pagerank, and katz_centrality using the pattern accelerator API * add documentation * rename opg to multi-GPU * change get_number_of_edges return type from vertex_type to edge_type * fix compile errors * move the pattern accelerator API out from the detail namespace (this will make migration to RAFT easier) * thrust::cuda::par.on(handle.get_stream()) to rmm::exec_policy(handle.get_stream()).on(handle.get_stream()) * fix typo in comments * escape code from exp_graph.hpp to graph_device_view.cuh * partially update tests to work with the new graph class (more updates are necessary) * temp commit to change branch * update tests to use the new graph class * update algorithm public interface to work with the new graph class * update any_of_adj_matrix_row to support MG * temporary commit for branch change * update to work with the new graph class * fix compile errors * clang-format * replace graph_device_view.cuh with vertex_partition_device.cuh & matrix_partition_device.cuh * undo changes in include/graph.hpp, this file is no longer relevant to this PR * additionally undo changes in include/graph.hpp, this file is no longer relevant to this PR * remove unnecessary comments * remove unnecessary template parameters * add copy_to_adj_matrix_col * replace for loops with thrust algorithms and few minor cosmetic fixes * break unnecessary loop carried dependendy * bug fix * bug fix (previously used plus_thrust_tuple where plus_edge_op_result should be used) * fix erreneous comments * clang-format * fixed a bug (copy_v_transform_reduce_nbr worked with only raw pointer VertexValueOutputIterator type) * update change log * clang-format * remove cuda.cuh (this is replaced by raft) * clang-format * clang-format * update raft tag * remove unecessary code * update sG interface of graph_view.hpp to mirror MG interface * replace comm_p_row_key & comm_p_col_key with key_naming_t().row_name() and col_name() * fixed confusing variable names --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 4 + cpp/include/algorithms.hpp | 175 ++++++ .../experimental/detail/graph_utils.cuh | 53 +- cpp/include/experimental/graph.hpp | 3 +- cpp/include/experimental/graph_view.hpp | 268 +++++++-- cpp/include/matrix_partition_device.cuh | 246 +++++++++ .../patterns/any_of_adj_matrix_row.cuh | 72 +++ .../patterns/copy_to_adj_matrix_col.cuh | 127 +++++ .../patterns/copy_to_adj_matrix_row.cuh | 127 +++++ .../patterns/copy_v_transform_reduce_nbr.cuh | 351 ++++++++++++ cpp/include/patterns/count_if_e.cuh | 231 ++++++++ cpp/include/patterns/count_if_v.cuh | 106 ++++ cpp/include/patterns/edge_op_utils.cuh | 127 +++++ cpp/include/patterns/reduce_op.cuh | 47 ++ cpp/include/patterns/reduce_v.cuh | 99 ++++ cpp/include/patterns/transform_reduce_e.cuh | 259 +++++++++ cpp/include/patterns/transform_reduce_v.cuh | 116 ++++ ...transform_reduce_v_with_adj_matrix_row.cuh | 92 ++++ .../update_frontier_v_push_if_out_nbr.cuh | 510 ++++++++++++++++++ cpp/include/patterns/vertex_frontier.cuh | 381 +++++++++++++ cpp/include/utilities/thrust_tuple_utils.cuh | 225 ++++++++ cpp/include/vertex_partition_device.cuh | 112 ++++ cpp/src/experimental/bfs.cu | 218 ++++++++ cpp/src/experimental/graph.cu | 39 +- cpp/src/experimental/graph_view.cu | 33 +- cpp/src/experimental/katz_centrality.cu | 218 ++++++++ cpp/src/experimental/pagerank.cu | 349 ++++++++++++ cpp/src/experimental/sssp.cu | 285 ++++++++++ cpp/tests/CMakeLists.txt | 62 ++- cpp/tests/experimental/bfs_test.cpp | 204 +++++++ .../experimental/katz_centrality_test.cpp | 224 ++++++++ cpp/tests/experimental/pagerank_test.cpp | 244 +++++++++ cpp/tests/experimental/sssp_test.cpp | 223 ++++++++ cpp/tests/utilities/test_utilities.hpp | 46 +- 35 files changed, 5765 insertions(+), 112 deletions(-) create mode 100644 cpp/include/matrix_partition_device.cuh create mode 100644 cpp/include/patterns/any_of_adj_matrix_row.cuh create mode 100644 cpp/include/patterns/copy_to_adj_matrix_col.cuh create mode 100644 cpp/include/patterns/copy_to_adj_matrix_row.cuh create mode 100644 cpp/include/patterns/copy_v_transform_reduce_nbr.cuh create mode 100644 cpp/include/patterns/count_if_e.cuh create mode 100644 cpp/include/patterns/count_if_v.cuh create mode 100644 cpp/include/patterns/edge_op_utils.cuh create mode 100644 cpp/include/patterns/reduce_op.cuh create mode 100644 cpp/include/patterns/reduce_v.cuh create mode 100644 cpp/include/patterns/transform_reduce_e.cuh create mode 100644 cpp/include/patterns/transform_reduce_v.cuh create mode 100644 cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh create mode 100644 cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh create mode 100644 cpp/include/patterns/vertex_frontier.cuh create mode 100644 cpp/include/utilities/thrust_tuple_utils.cuh create mode 100644 cpp/include/vertex_partition_device.cuh create mode 100644 cpp/src/experimental/bfs.cu create mode 100644 cpp/src/experimental/katz_centrality.cu create mode 100644 cpp/src/experimental/pagerank.cu create mode 100644 cpp/src/experimental/sssp.cu create mode 100644 cpp/tests/experimental/bfs_test.cpp create mode 100644 cpp/tests/experimental/katz_centrality_test.cpp create mode 100644 cpp/tests/experimental/pagerank_test.cpp create mode 100644 cpp/tests/experimental/sssp_test.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index a0b3487c161..4e3c135f5fc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ ## New Features - PR #1098 Add new graph classes to support 2D partitioning - PR #1124 Sub-communicator initialization for 2D partitioning support +- PR #838 Add pattern accelerator API functions and pattern accelerator API based implementations of PageRank, Katz Centrality, BFS, and SSSP - PR #1147 Added support for NetworkX graphs as input type - PR #1157 Louvain API update to use graph_container_t diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 52faf34be01..3315f2c86e8 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -303,6 +303,10 @@ add_library(cugraph SHARED src/centrality/betweenness_centrality.cu src/experimental/graph.cu src/experimental/graph_view.cu + src/experimental/bfs.cu + src/experimental/sssp.cu + src/experimental/pagerank.cu + src/experimental/katz_centrality.cu ) # diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 1331f12ff07..9118ed3a7c4 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -933,4 +934,178 @@ void hits(GraphCSRView const &graph, } // namespace gunrock +namespace experimental { + +/** + * @brief Run breadth-first search to find the distances (and predecessors) from the source + * vertex. + * + * This function computes the distances (minimum number of hops to reach the vertex) from the source + * vertex. If @p predecessors is not `nullptr`, this function calculates the predecessor of each + * vertex (parent vertex in the breadth-first search tree) as well. + * + * @throws cugraph::logic_error on erroneous input arguments. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param distances Pointer to the output distance array. + * @param predecessors Pointer to the output predecessor array or `nullptr`. + * @param source_vertex Source vertex to start breadth-first search (root vertex of the breath-first + * search tree). + * @param direction_optimizing If set to true, this algorithm switches between the push based + * breadth-first search and pull based breadth-first search depending on the size of the + * breadth-first search frontier (currently unsupported). This option is valid only for symmetric + * input graphs. + * @param depth_limit Sets the maximum number of breadth-first search iterations. Any vertices + * farther than @p depth_limit hops from @p source_vertex will be marked as unreachable. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + vertex_t *distances, + vertex_t *predecessors, + vertex_t source_vertex, + bool direction_optimizing = false, + vertex_t depth_limit = std::numeric_limits::max(), + bool do_expensive_check = false); + +/** + * @brief Run single-source shortest-path to compute the minimum distances (and predecessors) from + * the source vertex. + * + * This function computes the distances (minimum edge weight sums) from the source vertex. If @p + * predecessors is not `nullptr`, this function calculates the predecessor of each vertex in the + * shortest-path as well. Graph edge weights should be non-negative. + * + * @throws cugraph::logic_error on erroneous input arguments. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param distances Pointer to the output distance array. + * @param predecessors Pointer to the output predecessor array or `nullptr`. + * @param source_vertex Source vertex to start single-source shortest-path. + * @param cutoff Single-source shortest-path terminates if no more vertices are reachable within the + * distance of @p cutoff. Any vertex farther than @p cutoff will be marked as unreachable. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + weight_t *distances, + vertex_t *predecessors, + vertex_t source_vertex, + weight_t cutoff = std::numeric_limits::max(), + bool do_expensive_check = false); + +/** + * @brief Compute PageRank scores. + * + * This function computes general (if @p personalization_vertices is `nullptr`) or personalized (if + * @p personalization_vertices is not `nullptr`.) PageRank scores. + * + * @throws cugraph::logic_error on erroneous input arguments or if fails to converge before @p + * max_iterations. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam result_t Type of PageRank scores. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param adj_matrix_row_out_weight_sums Pointer to an array storing sums of out-going edge weights + * for the vertices in the rows of the graph adjacency matrix (for re-use) or `nullptr`. If + * `nullptr`, these values are freshly computed. Computing these values outsid this function reduces + * the number of memoray allocations/deallocations and computing if a user repeatedly computes + * PageRank scores using the same graph with different personalization vectors. + * @param personalization_vertices Pointer to an array storing personalization vertex identifiers + * (compute personalized PageRank) or `nullptr` (compute general PageRank). + * @param personalization_values Pointer to an array storing personalization values for the vertices + * in the personalization set. Relevant only if @p personalization_vertices is not `nullptr`. + * @param personalization_vector_size Size of the personalization set. If @personalization_vertices + * is not `nullptr`, the sizes of the arrays pointed by @p personalization_vertices and @p + * personalization_values should be @p personalization_vector_size. + * @param pageranks Pointer to the output PageRank score array. + * @param alpha PageRank damping factor. + * @param epsilon Error tolerance to check convergence. Convergence is assumed if the sum of the + * differences in PageRank values between two consecutive iterations is less than the number of + * vertices in the graph multiplied by @p epsilon. + * @param max_iterations Maximum number of PageRank iterations. + * @param has_initial_guess If set to `true`, values in the PageRank output array (pointed by @p + * pageranks) is used as initial PageRank values. If false, initial PageRank values are set to 1.0 + * divided by the number of vertices in the graph. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void pagerank(raft::handle_t &handle, + graph_view_t const &graph_view, + weight_t *adj_matrix_row_out_weight_sums, + vertex_t *personalization_vertices, + result_t *personalization_values, + vertex_t personalization_vector_size, + result_t *pageranks, + result_t alpha, + result_t epsilon, + size_t max_iterations = 500, + bool has_initial_guess = false, + bool do_expensive_check = false); + +/** + * @brief Compute Katz Centrality scores. + * + * This function computes Katz Centrality scores. + * + * @throws cugraph::logic_error on erroneous input arguments or if fails to converge before @p + * max_iterations. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam result_t Type of Katz Centrality scores. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param betas Pointer to an array holding the values to be added to each vertex's new Katz + * Centrality score in every iteration or `nullptr`. If set to `nullptr`, constant @p beta is used + * instead. + * @param katz_centralities Pointer to the output Katz Centrality score array. + * @param alpha Katz Centrality attenuation factor. This should be smaller than the inverse of the + * maximum eigenvalue of the adjacency matrix of @p graph. + * @param beta Constant value to be added to each vertex's new Katz Centrality score in every + * iteration. Relevant only when @p betas is `nullptr`. + * @param epsilon Error tolerance to check convergence. Convergence is assuemd if the sum of the + * differences in Katz Centrality values between two consecutive iterations is less than the number + * of vertices in the graph multiplied by @p epsilon. + * @param max_iterations Maximum number of Katz Centrality iterations. + * @param has_initial_guess If set to `true`, values in the Katz Centrality output array (pointed by + * @p katz_centralities) is used as initial Katz Centrality values. If false, zeros are used as + * initial Katz Centrality values. + * @param normalize If set to `true`, final Katz Centrality scores are normalized (the L2-norm of + * the returned Katz Centrality score array is 1.0) before returning. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + */ +template +void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + result_t *betas, + result_t *katz_centralities, + result_t alpha, + result_t beta, + result_t epsilon, + size_t max_iterations = 500, + bool has_initial_guess = false, + bool normalize = false, + bool do_expensive_check = false); + +} // namespace experimental + } // namespace cugraph diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index fe092342f80..c94348329f7 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -39,57 +40,57 @@ rmm::device_uvector compute_major_degree( std::vector const &adj_matrix_partition_offsets, partition_t const &partition) { - auto &comm_p_row = handle.get_subcomm(comm_p_row_key); - auto const comm_p_row_rank = comm_p_row.get_rank(); - auto const comm_p_row_size = comm_p_row.get_size(); - auto &comm_p_col = handle.get_subcomm(comm_p_col_key); - auto const comm_p_col_rank = comm_p_col.get_rank(); - auto const comm_p_col_size = comm_p_col.get_size(); + auto &row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto &col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); rmm::device_uvector local_degrees(0, handle.get_stream()); rmm::device_uvector degrees(0, handle.get_stream()); vertex_t max_num_local_degrees{0}; - for (int i = 0; i < comm_p_col_size; ++i) { + for (int i = 0; i < col_comm_size; ++i) { auto vertex_partition_idx = partition.is_hypergraph_partitioned() - ? static_cast(comm_p_row_size) * static_cast(i) + - static_cast(comm_p_row_rank) - : static_cast(comm_p_col_size) * static_cast(comm_p_row_rank) + + ? static_cast(row_comm_size) * static_cast(i) + + static_cast(row_comm_rank) + : static_cast(col_comm_size) * static_cast(row_comm_rank) + static_cast(i); vertex_t major_first{}; vertex_t major_last{}; std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx); max_num_local_degrees = std::max(max_num_local_degrees, major_last - major_first); - if (i == comm_p_col_rank) { degrees.resize(major_last - major_first, handle.get_stream()); } + if (i == col_comm_rank) { degrees.resize(major_last - major_first, handle.get_stream()); } } local_degrees.resize(max_num_local_degrees, handle.get_stream()); - for (int i = 0; i < comm_p_col_size; ++i) { + for (int i = 0; i < col_comm_size; ++i) { auto vertex_partition_idx = partition.is_hypergraph_partitioned() - ? static_cast(comm_p_row_size) * static_cast(i) + - static_cast(comm_p_row_rank) - : static_cast(comm_p_col_size) * static_cast(comm_p_row_rank) + + ? static_cast(row_comm_size) * static_cast(i) + + static_cast(row_comm_rank) + : static_cast(col_comm_size) * static_cast(row_comm_rank) + static_cast(i); vertex_t major_first{}; vertex_t major_last{}; std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx); - auto p_offsets = partition.is_hypergraph_partitioned() - ? adj_matrix_partition_offsets[i] - : adj_matrix_partition_offsets[0] + - (major_first - partition.get_vertex_partition_range_first( - comm_p_col_size * comm_p_row_rank)); + auto p_offsets = + partition.is_hypergraph_partitioned() + ? adj_matrix_partition_offsets[i] + : adj_matrix_partition_offsets[0] + + (major_first - partition.get_vertex_partition_first(col_comm_size * row_comm_rank)); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_last - major_first), local_degrees.data(), [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); - comm_p_row.reduce(local_degrees.data(), - i == comm_p_col_rank ? degrees.data() : static_cast(nullptr), - degrees.size(), - raft::comms::op_t::SUM, - comm_p_col_rank, - handle.get_stream()); + row_comm.reduce(local_degrees.data(), + i == col_comm_rank ? degrees.data() : static_cast(nullptr), + degrees.size(), + raft::comms::op_t::SUM, + col_comm_rank, + handle.get_stream()); } auto status = handle.get_comms().sync_stream( diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index ea4a7882363..a6e22ce7009 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -72,8 +72,7 @@ class graph_t view() diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 1d37858dfa4..0e0bf40a01d 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -30,10 +31,6 @@ namespace cugraph { namespace experimental { -// FIXME: these should better be defined somewhere else. -std::string const comm_p_row_key = "comm_p_row"; -std::string const comm_p_col_key = "comm_p_key"; - /** * @brief store vertex partitioning map * @@ -77,20 +74,20 @@ class partition_t { public: partition_t(std::vector const& vertex_partition_offsets, bool hypergraph_partitioned, - int comm_p_row_size, - int comm_p_col_size, - int comm_p_row_rank, - int comm_p_col_rank) + int row_comm_size, + int col_comm_size, + int row_comm_rank, + int col_comm_rank) : vertex_partition_offsets_(vertex_partition_offsets), hypergraph_partitioned_(hypergraph_partitioned), - comm_p_rank_(comm_p_col_size * comm_p_row_rank + comm_p_col_rank), - comm_p_row_size_(comm_p_row_size), - comm_p_col_size_(comm_p_col_size), - comm_p_row_rank_(comm_p_row_rank), - comm_p_col_rank_(comm_p_col_rank) + comm_rank_(col_comm_size * row_comm_rank + col_comm_rank), + row_comm_size_(row_comm_size), + col_comm_size_(col_comm_size), + row_comm_rank_(row_comm_rank), + col_comm_rank_(col_comm_rank) { CUGRAPH_EXPECTS( - vertex_partition_offsets.size() == static_cast(comm_p_row_size * comm_p_col_size), + vertex_partition_offsets.size() == static_cast(row_comm_size * col_comm_size), "Invalid API parameter: erroneous vertex_partition_offsets.size()."); CUGRAPH_EXPECTS( @@ -98,23 +95,24 @@ class partition_t { "Invalid API parameter: partition.vertex_partition_offsets values should be non-descending."); CUGRAPH_EXPECTS(vertex_partition_offsets_[0] == vertex_t{0}, "Invalid API parameter: partition.vertex_partition_offsets[0] should be 0."); + + vertex_t start_offset{0}; + matrix_partition_major_value_start_offsets_.assign(get_number_of_matrix_partitions(), 0); + for (size_t i = 0; i < matrix_partition_major_value_start_offsets_.size(); ++i) { + matrix_partition_major_value_start_offsets_[i] = start_offset; + start_offset += get_matrix_partition_major_last(i) - get_matrix_partition_major_first(i); + } } std::tuple get_vertex_partition_range() const { - return std::make_tuple(vertex_partition_offsets_[comm_p_rank_], - vertex_partition_offsets_[comm_p_rank_ + 1]); + return std::make_tuple(vertex_partition_offsets_[comm_rank_], + vertex_partition_offsets_[comm_rank_ + 1]); } - vertex_t get_vertex_partition_range_first() const - { - return vertex_partition_offsets_[comm_p_rank_]; - } + vertex_t get_vertex_partition_first() const { return vertex_partition_offsets_[comm_rank_]; } - vertex_t get_vertex_partition_range_last() const - { - return vertex_partition_offsets_[comm_p_rank_ + 1]; - } + vertex_t get_vertex_partition_last() const { return vertex_partition_offsets_[comm_rank_ + 1]; } std::tuple get_vertex_partition_range(size_t vertex_partition_idx) const { @@ -122,49 +120,86 @@ class partition_t { vertex_partition_offsets_[vertex_partition_idx + 1]); } - vertex_t get_vertex_partition_range_first(size_t vertex_partition_idx) const + vertex_t get_vertex_partition_first(size_t vertex_partition_idx) const { return vertex_partition_offsets_[vertex_partition_idx]; } - vertex_t get_vertex_partition_range_last(size_t vertex_partition_idx) const + vertex_t get_vertex_partition_last(size_t vertex_partition_idx) const { return vertex_partition_offsets_[vertex_partition_idx + 1]; } + size_t get_number_of_matrix_partitions() const + { + return hypergraph_partitioned_ ? col_comm_size_ : 1; + } + std::tuple get_matrix_partition_major_range(size_t partition_idx) const { auto major_first = hypergraph_partitioned_ - ? vertex_partition_offsets_[comm_p_row_size_ * partition_idx + comm_p_row_rank_] - : vertex_partition_offsets_[comm_p_row_rank_ * comm_p_col_size_]; + ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_] + : vertex_partition_offsets_[row_comm_rank_ * col_comm_size_]; auto major_last = hypergraph_partitioned_ - ? vertex_partition_offsets_[comm_p_row_size_ * partition_idx + comm_p_row_rank_ + 1] - : vertex_partition_offsets_[(comm_p_row_rank_ + 1) * comm_p_col_size_]; + ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_ + 1] + : vertex_partition_offsets_[(row_comm_rank_ + 1) * col_comm_size_]; return std::make_tuple(major_first, major_last); } + vertex_t get_matrix_partition_major_first(size_t partition_idx) const + { + return hypergraph_partitioned_ + ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_] + : vertex_partition_offsets_[row_comm_rank_ * col_comm_size_]; + } + + vertex_t get_matrix_partition_major_last(size_t partition_idx) const + { + return hypergraph_partitioned_ + ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_ + 1] + : vertex_partition_offsets_[(row_comm_rank_ + 1) * col_comm_size_]; + } + + vertex_t get_matrix_partition_major_value_start_offset(size_t partition_idx) const + { + return matrix_partition_major_value_start_offsets_[partition_idx]; + } + std::tuple get_matrix_partition_minor_range() const { - auto minor_first = vertex_partition_offsets_[comm_p_col_rank_ * comm_p_row_size_]; - auto minor_last = vertex_partition_offsets_[(comm_p_col_rank_ + 1) * comm_p_row_size_]; + auto minor_first = vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; + auto minor_last = vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; return std::make_tuple(minor_first, minor_last); } + vertex_t get_matrix_partition_minor_first() const + { + return vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; + } + + vertex_t get_matrix_partition_minor_last() const + { + return vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; + } + bool is_hypergraph_partitioned() const { return hypergraph_partitioned_; } private: std::vector vertex_partition_offsets_{}; // size = P + 1 bool hypergraph_partitioned_{false}; - int comm_p_rank_{0}; - int comm_p_row_size_{0}; - int comm_p_col_size_{0}; - int comm_p_row_rank_{0}; - int comm_p_col_rank_{0}; + int comm_rank_{0}; + int row_comm_size_{0}; + int col_comm_size_{0}; + int row_comm_rank_{0}; + int col_comm_rank_{0}; + + std::vector + matrix_partition_major_value_start_offsets_{}; // size = get_number_of_matrix_partitions() }; struct graph_properties_t { @@ -195,6 +230,18 @@ class graph_base_t { vertex_t get_number_of_vertices() const { return number_of_vertices_; } edge_t get_number_of_edges() const { return number_of_edges_; } + template + std::enable_if_t::value, bool> is_valid_vertex(vertex_type v) const + { + return ((v >= 0) && (v < number_of_vertices_)); + } + + template + std::enable_if_t::value, bool> is_valid_vertex(vertex_type v) const + { + return (v < number_of_vertices_); + } + bool is_symmetric() const { return properties_.is_symmetric; } bool is_multigraph() const { return properties_.is_multigraph; } @@ -254,13 +301,96 @@ class graph_view_t 0; } + vertex_t get_number_of_local_vertices() const { - return partition_.get_vertex_partition_range_last() - - partition_.get_vertex_partition_range_first(); + return partition_.get_vertex_partition_last() - partition_.get_vertex_partition_first(); } - size_t get_number_of_adj_matrix_partitions() { return adj_matrix_partition_offsets_.size(); } + vertex_t get_local_vertex_first() const { return partition_.get_vertex_partition_first(); } + + vertex_t get_local_vertex_last() const { return partition_.get_vertex_partition_last(); } + + bool is_local_vertex_nocheck(vertex_t v) const + { + return (v >= get_local_vertex_first()) && (v < get_local_vertex_last()); + } + + size_t get_number_of_local_adj_matrix_partitions() const + { + return adj_matrix_partition_offsets_.size(); + } + + vertex_t get_number_of_local_adj_matrix_partition_rows() const + { + if (!store_transposed) { + vertex_t ret{0}; + for (size_t i = 0; i < partition_.get_number_of_matrix_partitions(); ++i) { + ret += partition_.get_matrix_partition_major_last(i) - + partition_.get_matrix_partition_major_first(i); + } + return ret; + } else { + return partition_.get_matrix_partition_minor_last() - + partition_.get_matrix_partition_minor_first(); + } + } + + vertex_t get_number_of_local_adj_matrix_partition_cols() const + { + if (store_transposed) { + vertex_t ret{0}; + for (size_t i = 0; i < partition_.get_number_of_matrix_partitions(); ++i) { + ret += partition_.get_matrix_partition_major_last(i) - + partition_.get_matrix_partition_major_first(i); + } + return ret; + } else { + return partition_.get_matrix_partition_minor_last() - + partition_.get_matrix_partition_minor_first(); + } + } + + vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const + { + return store_transposed ? partition_.get_matrix_partition_minor_first() + : partition_.get_matrix_partition_major_first(adj_matrix_partition_idx); + } + + vertex_t get_local_adj_matrix_partition_row_last(size_t adj_matrix_partition_idx) const + { + return store_transposed ? partition_.get_matrix_partition_minor_last() + : partition_.get_matrix_partition_major_last(adj_matrix_partition_idx); + } + + vertex_t get_local_adj_matrix_partition_row_value_start_offset( + size_t adj_matrix_partition_idx) const + { + return store_transposed + ? 0 + : partition_.get_matrix_partition_major_value_start_offset(adj_matrix_partition_idx); + } + + vertex_t get_local_adj_matrix_partition_col_first(size_t adj_matrix_partition_idx) const + { + return store_transposed ? partition_.get_matrix_partition_major_first(adj_matrix_partition_idx) + : partition_.get_matrix_partition_minor_first(); + } + + vertex_t get_local_adj_matrix_partition_col_last(size_t adj_matrix_partition_idx) const + { + return store_transposed ? partition_.get_matrix_partition_major_last(adj_matrix_partition_idx) + : partition_.get_matrix_partition_minor_last(); + } + + vertex_t get_local_adj_matrix_partition_col_value_start_offset( + size_t adj_matrix_partition_idx) const + { + return store_transposed + ? partition_.get_matrix_partition_major_value_start_offset(adj_matrix_partition_idx) + : 0; + } // FIXME: this function is not part of the public stable API.This function is mainly for pattern // accelerator implementation. This function is currently public to support the legacy @@ -335,8 +465,66 @@ class graph_view_tget_number_of_vertices(); } + constexpr vertex_t get_local_vertex_first() const { return vertex_t{0}; } + + vertex_t get_local_vertex_last() const { return this->get_number_of_vertices(); } + + constexpr bool is_local_vertex_nocheck(vertex_t v) const { return true; } + + constexpr size_t get_number_of_local_adj_matrix_partitions() const { return size_t(1); } + + vertex_t get_number_of_local_adj_matrix_partition_rows() const + { + return this->get_number_of_vertices(); + } + + vertex_t get_number_of_local_adj_matrix_partition_cols() const + { + return this->get_number_of_vertices(); + } + + vertex_t get_local_adj_matrix_partition_row_first(size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return vertex_t{0}; + } + + vertex_t get_local_adj_matrix_partition_row_last(size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return this->get_number_of_vertices(); + } + + vertex_t get_local_adj_matrix_partition_row_value_start_offset( + size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return vertex_t{0}; + } + + vertex_t get_local_adj_matrix_partition_col_first(size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return vertex_t{0}; + } + + vertex_t get_local_adj_matrix_partition_col_last(size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return this->get_number_of_vertices(); + } + + vertex_t get_local_adj_matrix_partition_col_value_start_offset( + size_t adj_matrix_partition_idx) const + { + assert(adj_matrix_partition_idx == 0); + return vertex_t{0}; + } + // FIXME: this function is not part of the public stable API.This function is mainly for pattern // accelerator implementation. This function is currently public to support the legacy // implementations directly accessing CSR/CSC data, but this function will eventually become diff --git a/cpp/include/matrix_partition_device.cuh b/cpp/include/matrix_partition_device.cuh new file mode 100644 index 00000000000..53796530f60 --- /dev/null +++ b/cpp/include/matrix_partition_device.cuh @@ -0,0 +1,246 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include + +namespace cugraph { +namespace experimental { + +template +class matrix_partition_device_base_t { + public: + matrix_partition_device_base_t(edge_t const* offsets, + vertex_t const* indices, + weight_t const* weights) + : offsets_(offsets), indices_(indices), weights_(weights) + { + } + + __device__ thrust::tuple get_local_edges( + vertex_t major_offset) const noexcept + { + auto edge_offset = *(offsets_ + major_offset); + auto local_degree = *(offsets_ + (major_offset + 1)) - edge_offset; + auto indices = indices_ + edge_offset; + auto weights = weights_ != nullptr ? weights_ + edge_offset : nullptr; + return thrust::make_tuple(indices, weights, local_degree); + } + + __device__ edge_t get_local_degree(vertex_t major_offset) const noexcept + { + return *(offsets_ + (major_offset + 1)) - *(offsets_ + major_offset); + } + + private: + // should be trivially copyable to device + edge_t const* offsets_{nullptr}; + vertex_t const* indices_{nullptr}; + weight_t const* weights_{nullptr}; +}; + +template +class matrix_partition_device_t; + +// multi-GPU version +template +class matrix_partition_device_t> + : public matrix_partition_device_base_t { + public: + matrix_partition_device_t(GraphViewType const& graph_view, size_t partition_idx) + : matrix_partition_device_base_t( + graph_view.offsets(partition_idx), + graph_view.indices(partition_idx), + graph_view.weights(partition_idx)), + major_first_(GraphViewType::is_adj_matrix_transposed + ? graph_view.get_local_adj_matrix_partition_col_first(partition_idx) + : graph_view.get_local_adj_matrix_partition_row_first(partition_idx)), + major_last_(GraphViewType::is_adj_matrix_transposed + ? graph_view.get_local_adj_matrix_partition_col_last(partition_idx) + : graph_view.get_local_adj_matrix_partition_row_last(partition_idx)), + minor_first_(GraphViewType::is_adj_matrix_transposed + ? graph_view.get_local_adj_matrix_partition_row_first(partition_idx) + : graph_view.get_local_adj_matrix_partition_col_first(partition_idx)), + minor_last_(GraphViewType::is_adj_matrix_transposed + ? graph_view.get_local_adj_matrix_partition_row_last(partition_idx) + : graph_view.get_local_adj_matrix_partition_col_last(partition_idx)), + major_value_start_offset_( + GraphViewType::is_adj_matrix_transposed + ? graph_view.get_local_adj_matrix_partition_col_value_start_offset(partition_idx) + : graph_view.get_local_adj_matrix_partition_row_value_start_offset(partition_idx)) + { + } + + typename GraphViewType::vertex_type get_major_value_start_offset() const + { + return major_value_start_offset_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_first() const noexcept + { + return major_first_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_last() const noexcept + { + return major_last_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_size() const noexcept + { + return major_last_ - major_first_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_first() const noexcept + { + return minor_first_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_last() const noexcept + { + return minor_last_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_size() const noexcept + { + return minor_last_ - minor_first_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_offset_from_major_nocheck( + typename GraphViewType::vertex_type major) const noexcept + { + return major - major_first_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_offset_from_minor_nocheck( + typename GraphViewType::vertex_type minor) const noexcept + { + return minor - minor_first_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_from_major_offset_nocheck( + typename GraphViewType::vertex_type major_offset) const noexcept + { + return major_first_ + major_offset; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_from_minor_offset_nocheck( + typename GraphViewType::vertex_type minor_offset) const noexcept + { + return minor_first_ + minor_offset; + } + + private: + // should be trivially copyable to device + typename GraphViewType::vertex_type major_first_{0}; + typename GraphViewType::vertex_type major_last_{0}; + typename GraphViewType::vertex_type minor_first_{0}; + typename GraphViewType::vertex_type minor_last_{0}; + + typename GraphViewType::vertex_type major_value_start_offset_{0}; +}; + +// single-GPU version +template +class matrix_partition_device_t> + : public matrix_partition_device_base_t { + public: + matrix_partition_device_t(GraphViewType const& graph_view, size_t partition_idx) + : matrix_partition_device_base_t( + graph_view.offsets(), graph_view.indices(), graph_view.weights()), + number_of_vertices_(graph_view.get_number_of_vertices()) + { + assert(partition_idx == 0); + } + + typename GraphViewType::vertex_type get_major_value_start_offset() const + { + return typename GraphViewType::vertex_type{0}; + } + + __host__ __device__ constexpr typename GraphViewType::vertex_type get_major_first() const noexcept + { + return typename GraphViewType::vertex_type{0}; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_last() const noexcept + { + return number_of_vertices_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_size() const noexcept + { + return number_of_vertices_; + } + + __host__ __device__ constexpr typename GraphViewType::vertex_type get_minor_first() const noexcept + { + return typename GraphViewType::vertex_type{0}; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_last() const noexcept + { + return number_of_vertices_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_size() const noexcept + { + return number_of_vertices_; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_offset_from_major_nocheck( + typename GraphViewType::vertex_type major) const noexcept + { + return major; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_offset_from_minor_nocheck( + typename GraphViewType::vertex_type minor) const noexcept + { + return minor; + } + + __host__ __device__ typename GraphViewType::vertex_type get_major_from_major_offset_nocheck( + typename GraphViewType::vertex_type major_offset) const noexcept + { + return major_offset; + } + + __host__ __device__ typename GraphViewType::vertex_type get_minor_from_minor_offset_nocheck( + typename GraphViewType::vertex_type minor_offset) const noexcept + { + return minor_offset; + } + + private: + typename GraphViewType::vertex_type number_of_vertices_; +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/patterns/any_of_adj_matrix_row.cuh new file mode 100644 index 00000000000..32602991cc3 --- /dev/null +++ b/cpp/include/patterns/any_of_adj_matrix_row.cuh @@ -0,0 +1,72 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Check any of graph adjacency matrix row properties satisfy the given predicate. + * + * Returns true if @p row_op returns true for at least once (in any process in multi-GPU), returns + * false otherwise. This function is inspired by thrust::any_of(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam RowOp Type of the unary predicate operator. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row properties + * for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param row_op Unary predicate operator that takes *(@p adj_matrix_row_value_input_first + i) + * (where i = [0, @p graph_view.get_number_of_adj_matrix_local_rows()) and returns either + * true or false. + * @return true If the predicate returns true at least once (in any process in multi-GPU). + * @return false If the predicate never returns true (in any process in multi-GPU). + */ +template +bool any_of_adj_matrix_row(raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + RowOp row_op) +{ + // better use thrust::any_of once https://github.com/thrust/thrust/issues/1016 is resolved + auto count = thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + adj_matrix_row_value_input_first, + adj_matrix_row_value_input_first + graph_view.get_number_of_local_adj_matrix_partition_rows(), + row_op); + if (GraphViewType::is_multi_gpu) { + handle.get_comms().allreduce(&count, &count, 1, raft::comms::op_t::SUM, handle.get_stream()); + } + return (count > 0); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/copy_to_adj_matrix_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_col.cuh new file mode 100644 index 00000000000..c2c96dca586 --- /dev/null +++ b/cpp/include/patterns/copy_to_adj_matrix_col.cuh @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Copy vertex property values to the corresponding graph adjacency matrix column property + * variables. + * + * This version fills the entire set of graph adjacency matrix column property values. This function + * is inspired by thrust::copy(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixColValueOutputIterator Type of the iterator for graph adjacency matrix column + * output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_col_value_output_first Iterator pointing to the adjacency matrix column output + * property variables for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + */ +template +void copy_to_adj_matrix_col(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixColValueOutputIterator adj_matrix_col_value_output_first) +{ + if (GraphViewType::is_multi_gpu) { + CUGRAPH_FAIL("unimplemented."); + } else { + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_adj_matrix_local_cols()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + adj_matrix_col_value_output_first); + } +} + +/** + * @brief Copy vertex property values to the corresponding graph adjacency matrix column property + * variables. + * + * This version fills only a subset of graph adjacency matrix column property values. [@p + * vertex_first, @p vertex_last) specifies the vertices with new values to be copied to graph + * adjacency matrix column property variables. This function is inspired by thrust::copy(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexIterator Type of the iterator for vertex identifiers. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixColValueOutputIterator Type of the iterator for graph adjacency matrix column + * output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_first Iterator pointing to the first (inclusive) vertex with new values to be + * copied. v in [vertex_first, vertex_last) should be distinct (and should belong to this process in + * multi-GPU), otherwise undefined behavior + * @param vertex_last Iterator pointing to the last (exclusive) vertex with new values to be copied. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_col_value_output_first Iterator pointing to the adjacency matrix column output + * property variables for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + */ +template +void copy_to_adj_matrix_col(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixColValueOutputIterator adj_matrix_col_value_output_first) +{ + if (GraphViewType::is_multi_gpu) { + CUGRAPH_FAIL("unimplemented."); + } else { + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_adj_matrix_local_cols()); + auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first); + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + val_first, + val_first + thrust::distance(vertex_first, vertex_last), + vertex_first, + adj_matrix_col_value_output_first); + } +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/copy_to_adj_matrix_row.cuh b/cpp/include/patterns/copy_to_adj_matrix_row.cuh new file mode 100644 index 00000000000..626562d6fae --- /dev/null +++ b/cpp/include/patterns/copy_to_adj_matrix_row.cuh @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Copy vertex property values to the corresponding graph adjacency matrix row property + * variables. + * + * This version fills the entire set of graph adjacency matrix row property values. This function is + * inspired by thrust::copy(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixRowValueOutputIterator Type of the iterator for graph adjacency matrix row + * output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output + * property variables for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_rows(). + */ +template +void copy_to_adj_matrix_row(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixRowValueOutputIterator adj_matrix_row_value_output_first) +{ + if (GraphViewType::is_multi_gpu) { + CUGRAPH_FAIL("unimplemented."); + } else { + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_adj_matrix_local_rows()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + adj_matrix_row_value_output_first); + } +} + +/** + * @brief Copy vertex property values to the corresponding graph adjacency matrix row property + * variables. + * + * This version fills only a subset of graph adjacency matrix row property values. [@p vertex_first, + * @p vertex_last) specifies the vertices with new values to be copied to graph adjacency matrix row + * property variables. This function is inspired by thrust::copy(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexIterator Type of the iterator for vertex identifiers. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixRowValueOutputIterator Type of the iterator for graph adjacency matrix row + * output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_first Iterator pointing to the first (inclusive) vertex with new values to be + * copied. v in [vertex_first, vertex_last) should be distinct (and should belong to this process in + * multi-GPU), otherwise undefined behavior + * @param vertex_last Iterator pointing to the last (exclusive) vertex with new values to be copied. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output + * property variables for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_rows(). + */ +template +void copy_to_adj_matrix_row(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixRowValueOutputIterator adj_matrix_row_value_output_first) +{ + if (GraphViewType::is_multi_gpu) { + CUGRAPH_FAIL("unimplemented."); + } else { + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_adj_matrix_local_rows()); + auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first); + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + val_first, + val_first + thrust::distance(vertex_first, vertex_last), + vertex_first, + adj_matrix_row_value_output_first); + } +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh new file mode 100644 index 00000000000..5e975dbc10a --- /dev/null +++ b/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr copy_v_transform_reduce_nbr_for_all_low_out_degree_block_size = 128; + +#if 0 +// FIXME: delete this once we verify that the thrust replace in for_all_major_for_all_nbr_low_out_degree is no slower than the original for loop based imoplementation +template +__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) +{ + lhs = plus_edge_op_result(lhs, rhs); +} + +template +__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) +{ + atomic_add(&lhs, rhs); +} +#endif + +template +__global__ void for_all_major_for_all_nbr_low_out_degree( + matrix_partition_device_t matrix_partition, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultValueOutputIterator result_value_output_first, + EdgeOp e_op, + T init /* relevent only if update_major == true */) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = T; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto idx = static_cast(tid); + + while (idx < static_cast(matrix_partition.get_major_size())) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); +#if 1 + auto transform_op = [&matrix_partition, + &adj_matrix_row_value_input_first, + &adj_matrix_col_value_input_first, + &e_op, + idx, + indices, + weights] __device__(auto i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + return evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + }; + + if (update_major) { + *(result_value_output_first + idx) = thrust::transform_reduce( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_op, + init, + [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + } else { + thrust::for_each( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + [&matrix_partition, indices, &result_value_output_first, &transform_op] __device__(auto i) { + auto e_op_result = transform_op(i); + auto minor = indices[i]; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); + }); + } +#else + // FIXME: delete this once we verify that the code above is not slower than this. + e_op_result_t e_op_result_sum{init}; // relevent only if update_major == true + for (edge_t i = 0; i < local_degree; ++i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (update_major) { + accumulate_edge_op_result(e_op_result_sum, e_op_result); + } else { + accumulate_edge_op_result(*(result_value_output_first + minor_offset), + e_op_result); + } + } + if (update_major) { *(result_value_output_first + idx) = e_op_result_sum; } +#endif + idx += gridDim.x * blockDim.x; + } +} + +} // namespace detail + +/** + * @brief Iterate over the incoming edges to update vertex properties. + * + * This function is inspired by thrust::transfrom_reduce() (iteration over the incoming edges part) + * and thrust::copy() (update vertex properties part, take transform_reduce output as copy input). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam T Type of the initial value for reduction over the incoming edges. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. + * @param init Initial value to be added to the reduced @e_op return values for each vertex. + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first + * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.get_number_of_local_vertices(). + */ +template +void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + T init, + VertexValueOutputIterator vertex_value_output_first) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + + if (GraphViewType::is_multi_gpu) { + CUGRAPH_FAIL("unimplemented."); + } else { + matrix_partition_device_t matrix_partition(graph_view, 0); + + raft::grid_1d_thread_t update_grid( + matrix_partition.get_major_size(), + detail::copy_v_transform_reduce_nbr_for_all_low_out_degree_block_size, + handle.get_device_properties().maxGridSize[0]); + + if (!GraphViewType::is_adj_matrix_transposed) { + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_number_of_local_vertices(), + init); + } + + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_adj_matrix_local_rows()); + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_adj_matrix_local_cols()); + detail::for_all_major_for_all_nbr_low_out_degree + <<>>( + matrix_partition, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + vertex_value_output_first, + e_op, + init); + } +} + +/** + * @brief Iterate over the outgoing edges to update vertex properties. + * + * This function is inspired by thrust::transfrom_reduce() (iteration over the outgoing edges part) + * and thrust::copy() (update vertex properties part, take transform_reduce output as copy input). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam T Type of the initial value for reduction over the outgoing edges. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. + * @param init Initial value to be added to the reduced @e_op return values for each vertex. + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first + * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.get_number_of_local_vertices(). + */ +template +void copy_v_transform_reduce_out_nbr( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + T init, + VertexValueOutputIterator vertex_value_output_first) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + + if (GraphViewType::is_multi_gpu) { + CUGRAPH_FAIL("unimplemented."); + } else { + matrix_partition_device_t matrix_partition(graph_view, 0); + + raft::grid_1d_thread_t update_grid( + matrix_partition.get_major_size(), + detail::copy_v_transform_reduce_nbr_for_all_low_out_degree_block_size, + handle.get_device_properties().maxGridSize[0]); + + if (GraphViewType::is_adj_matrix_transposed) { + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_number_of_local_vertices(), + init); + } + + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_local_adj_matrix_partition_rows()); + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_local_adj_matrix_partition_cols()); + detail::for_all_major_for_all_nbr_low_out_degree + <<>>( + matrix_partition, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + vertex_value_output_first, + e_op, + init); + } +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh new file mode 100644 index 00000000000..adf75a1f6c8 --- /dev/null +++ b/cpp/include/patterns/count_if_e.cuh @@ -0,0 +1,231 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr count_if_e_for_all_low_out_degree_block_size = 128; + +// FIXME: function names conflict if included with transform_reduce_e.cuh +template +__global__ void for_all_major_for_all_nbr_low_out_degree( + matrix_partition_device_t matrix_partition, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + typename GraphViewType::edge_type* block_counts, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto idx = static_cast(tid); + + edge_t count{0}; + while (idx < static_cast(matrix_partition.get_major_size())) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); +#if 1 + count += thrust::count_if( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + [&matrix_partition, + &adj_matrix_row_value_input_first, + &adj_matrix_col_value_input_first, + &e_op, + idx, + indices, + weights] __device__(auto i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : 1.0; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + + return e_op_result; + }); +#else + // FIXME: delete this once we verify that the code above is not slower than this. + for (vertex_t i = 0; i < local_degree; ++i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : 1.0; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (e_op_result) { count++; } + } +#endif + idx += gridDim.x * blockDim.x; + } + + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + count = BlockReduce(temp_storage).Sum(count); + if (threadIdx.x == 0) { *(block_counts + blockIdx.x) = count; } +} + +} // namespace detail + +/** + * @brief Count the number of edges that satisfies the given predicate. + * + * This function is inspired by thrust::count_if(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns true if this edge should be + * included in the returned count. + * @return GraphViewType::edge_type Number of times @p e_op returned true. + */ +template +typename GraphViewType::edge_type count_if_e( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + + edge_t count{0}; + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + auto row_value_input_offset = + GraphViewType::is_adj_matrix_transposed ? 0 : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = + GraphViewType::is_adj_matrix_transposed ? matrix_partition.get_major_value_start_offset() : 0; + + raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::count_if_e_for_all_low_out_degree_block_size, + handle.get_device_properties().maxGridSize[0]); + + rmm::device_vector block_counts(update_grid.num_blocks); + + detail::for_all_major_for_all_nbr_low_out_degree<<>>( + matrix_partition, + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + block_counts.data().get(), + e_op); + + // FIXME: we have several options to implement this. With cooperative group support + // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within + // the previous kernel. Using atomics at the end of the previous kernel is another option + // (sequentialization due to atomics may not be bad as different blocks may reach the + // synchronization point in varying timings and the number of SMs is not very big) + count += thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + block_counts.begin(), + block_counts.end(), + edge_t{0}, + thrust::plus()); + } + + if (GraphViewType::is_multi_gpu) { + // need to reduce count + CUGRAPH_FAIL("unimplemented."); + } + + return count; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/count_if_v.cuh b/cpp/include/patterns/count_if_v.cuh new file mode 100644 index 00000000000..6e4ddeee16f --- /dev/null +++ b/cpp/include/patterns/count_if_v.cuh @@ -0,0 +1,106 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Count the number of vertices that satisfies the given predicate. + * + * This version iterates over the entire set of graph vertices. This function is inspired by + * thrust::count_if(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam VertexOp Type of the unary predicate operator. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param v_op Unary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p + * graph_view.get_number_of_local_vertices())) and returns true if this vertex should be + * included in the returned count. + * @return GraphViewType::vertex_type Number of times @p v_op returned true. + */ +template +typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + VertexOp v_op) +{ + auto count = + thrust::count_if(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + v_op); + if (GraphViewType::is_multi_gpu) { + // need to reduce count + CUGRAPH_FAIL("unimplemented."); + } + return count; +} + +/** + * @brief Count the number of vertices that satisfies the given predicate. + * + * This version (conceptually) iterates over only a subset of the graph vertices. This function + * actually works as thrust::count_if() on [@p input_first, @p input_last) (followed by + * inter-process reduction in multi-GPU). @p input_last - @p input_first (or the sum of @p + * input_last - @p input_first values in multi-GPU) should not overflow GraphViewType::vertex_type. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam InputIterator Type of the iterator for input values. + * @tparam VertexOp VertexOp Type of the unary predicate operator. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param input_first Iterator pointing to the beginning (inclusive) of the values to be passed to + * @p v_op. + * @param input_last Iterator pointing to the end (exclusive) of the values to be passed to @p v_op. + * @param v_op Unary operator takes *(@p input_first + i) (where i is [0, @p input_last - @p + * input_first)) and returns true if this vertex should be included in the returned count. + * @return GraphViewType::vertex_type Number of times @p v_op returned true. + */ +template +typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle, + GraphViewType const& graph_view, + InputIterator input_first, + InputIterator input_last, + VertexOp v_op) +{ + auto count = thrust::count_if( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), input_first, input_last, v_op); + if (GraphViewType::is_multi_gpu) { + // need to reduce count + CUGRAPH_FAIL("unimplemented."); + } + return count; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/edge_op_utils.cuh b/cpp/include/patterns/edge_op_utils.cuh new file mode 100644 index 00000000000..184d1f1e794 --- /dev/null +++ b/cpp/include/patterns/edge_op_utils.cuh @@ -0,0 +1,127 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +template +struct is_valid_edge_op { + static constexpr bool value = false; +}; + +template +struct is_valid_edge_op< + ResultOfEdgeOp, + typename std::conditional::type> { + static constexpr bool valid = true; +}; + +template +struct evaluate_edge_op { + using vertex_type = typename GraphViewType::vertex_type; + using weight_type = typename GraphViewType::weight_type; + using row_value_type = typename std::iterator_traits::value_type; + using col_value_type = typename std::iterator_traits::value_type; + + template + __device__ std::enable_if_t>::valid, + typename std::result_of::type> + compute(V r, V c, W w, R rv, C cv, E e) + { + return e(r, c, w, rv, cv); + } + + template + __device__ std::enable_if_t>::valid, + typename std::result_of::type> + compute(V r, V c, W w, R rv, C cv, E e) + { + return e(r, c, rv, cv); + } +}; + +template +__host__ __device__ std::enable_if_t::value, T> plus_edge_op_result( + T const& lhs, T const& rhs) +{ + return lhs + rhs; +} + +template +__host__ __device__ std::enable_if_t::value, T> plus_edge_op_result(T const& lhs, + T const& rhs) +{ + return plus_thrust_tuple()(lhs, rhs); +} + +template +__device__ + std::enable_if_t::value_type, T>::value && + std::is_arithmetic::value, + void> + atomic_accumulate_edge_op_result(Iterator iter, T const& value) +{ + atomicAdd(&(thrust::raw_reference_cast(*iter)), value); +} + +template +__device__ std::enable_if_t::value && + std::is_arithmetic::value, + void> +atomic_accumulate_edge_op_result(Iterator iter, T const& value) +{ + // no-op +} + +template +__device__ + std::enable_if_t::value_type>::value && + is_thrust_tuple::value, + void> + atomic_accumulate_edge_op_result(Iterator iter, T const& value) +{ + static_assert(thrust::tuple_size::value_type>::value == + thrust::tuple_size::value); + atomic_accumulate_thrust_tuple()(iter, value); + return; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/reduce_op.cuh b/cpp/include/patterns/reduce_op.cuh new file mode 100644 index 00000000000..e9011914292 --- /dev/null +++ b/cpp/include/patterns/reduce_op.cuh @@ -0,0 +1,47 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +namespace cugraph { +namespace experimental { +namespace reduce_op { + +// reducing N elements, any element can be a valid output. +template +struct any { + using type = T; + static constexpr bool pure_function = true; // this can be called in any process + + __host__ __device__ T operator()(T const& lhs, T const& rhs) const { return lhs; } +}; + +// reducing N elements (operator < should be defined between any two elements), the minimum element +// should be selected. +template +struct min { + using type = T; + static constexpr bool pure_function = true; // this can be called in any process + + __host__ __device__ T operator()(T const& lhs, T const& rhs) const + { + return lhs < rhs ? lhs : rhs; + } +}; + +} // namespace reduce_op +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/reduce_v.cuh b/cpp/include/patterns/reduce_v.cuh new file mode 100644 index 00000000000..bc12f13225d --- /dev/null +++ b/cpp/include/patterns/reduce_v.cuh @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Reduce the vertex properties. + * + * This version iterates over the entire set of graph vertices. This function is inspired by + * thrust::reduce(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam T Type of the initial value. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param init Initial value to be added to the reduced input vertex properties. + * @return T Reduction of the input vertex properties. + */ +template +T reduce_v(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + T init) +{ + auto ret = thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + init); + if (GraphViewType::is_multi_gpu) { + // need to reduce ret + CUGRAPH_FAIL("unimplemented."); + } + return ret; +} + +/** + * @brief Reduce the vertex properties. + * + * This version (conceptually) iterates over only a subset of the graph vertices. This function + * actually works as thrust::reduce() on [@p input_first, @p input_last) (followed by + * inter-process reduction in multi-GPU). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam InputIterator Type of the iterator for input values. + * @tparam T Type of the initial value. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param input_first Iterator pointing to the beginning (inclusive) of the values to be reduced. + * @param input_last Iterator pointing to the end (exclusive) of the values to be reduced. + * @param init Initial value to be added to the reduced input vertex properties. + * @return T Reduction of the input vertex properties. + */ +template +T reduce_v(raft::handle_t const& handle, + GraphViewType const& graph_view, + InputIterator input_first, + InputIterator input_last, + T init) +{ + auto ret = thrust::reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), input_first, input_last, init); + if (GraphViewType::is_multi_gpu) { + // need to reduce ret + CUGRAPH_FAIL("unimplemented."); + } + return ret; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh new file mode 100644 index 00000000000..184f1fffac5 --- /dev/null +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr transform_reduce_e_for_all_low_out_degree_block_size = 128; + +template +struct block_reduce_edge_op_result { + template + __device__ std::enable_if_t::value, T> compute(T const& edge_op_result) + { + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + return BlockReduce(temp_storage).Sum(edge_op_result); + } + + template + __device__ std::enable_if_t::value, T> compute(T const& edge_op_result) + { + return block_reduce_thrust_tuple()(edge_op_result); + } +}; + +template +__global__ void for_all_major_for_all_nbr_low_out_degree( + matrix_partition_device_t matrix_partition, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BlockResultIterator block_result_first, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = typename std::iterator_traits::value_type; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + size_t idx = static_cast(tid); + + e_op_result_t e_op_result_sum{}; + while (idx < static_cast(matrix_partition.get_major_size())) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); +#if 1 + auto sum = thrust::transform_reduce( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + [&matrix_partition, + &adj_matrix_row_value_input_first, + &adj_matrix_col_value_input_first, + &e_op, + idx, + indices, + weights] __device__(auto i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + return evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + }, + e_op_result_t{}, + [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + + e_op_result_sum = plus_edge_op_result(e_op_result_sum, sum); +#else + // FIXME: delete this once we verify that the code above is not slower than this. + for (vertex_t i = 0; i < local_degree; ++i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + } +#endif + idx += gridDim.x * blockDim.x; + } + + e_op_result_sum = + block_reduce_edge_op_result() + .compute(e_op_result_sum); + if (threadIdx.x == 0) { *(block_result_first + blockIdx.x) = e_op_result_sum; } +} + +} // namespace detail + +/** + * @brief Iterate over the entire set of edges and reduce @p edge_op outputs. + * + * This function is inspired by thrust::transform_reduce(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam T Type of the initial value. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a transformed value to be reduced. + * @param init Initial value to be added to the transform-reduced input vertex properties. + * @return T Reduction of the @p edge_op outputs. + */ +template +T transform_reduce_e(raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + T init) +{ + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + + using vertex_t = typename GraphViewType::vertex_type; + + T result{}; + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + auto row_value_input_offset = + GraphViewType::is_adj_matrix_transposed ? 0 : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = + GraphViewType::is_adj_matrix_transposed ? matrix_partition.get_major_value_start_offset() : 0; + + raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::transform_reduce_e_for_all_low_out_degree_block_size, + handle.get_device_properties().maxGridSize[0]); + + rmm::device_vector block_results(update_grid.num_blocks); + + detail::for_all_major_for_all_nbr_low_out_degree<<>>( + matrix_partition, + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + block_results.data(), + e_op); + + // FIXME: we have several options to implement this. With cooperative group support + // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within the + // previous kernel. Using atomics at the end of the previous kernel is another option + // (sequentialization due to atomics may not be bad as different blocks may reach the + // synchronization point in varying timings and the number of SMs is not very big) + auto partial_result = + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + block_results.begin(), + block_results.end(), + T(), + [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + + result = plus_edge_op_result(result, partial_result); + } + + if (GraphViewType::is_multi_gpu) { + // need reduction + CUGRAPH_FAIL("unimplemented."); + } + + return plus_edge_op_result(init, result); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/transform_reduce_v.cuh b/cpp/include/patterns/transform_reduce_v.cuh new file mode 100644 index 00000000000..0d31df19b35 --- /dev/null +++ b/cpp/include/patterns/transform_reduce_v.cuh @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Apply an operator to the vertex properties and reduce. + * + * This version iterates over the entire set of graph vertices. This function is inspired by + * thrust::transform_reduce(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam VertexOp Type of the unary vertex operator. + * @tparam T Type of the initial value. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param v_op Unary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p + * graph_view.get_number_of_local_vertices())) and returns a transformed value to be reduced. + * @param init Initial value to be added to the transform-reduced input vertex properties. + * @return T Reduction of the @p v_op outputs. + */ +template +T transform_reduce_v(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + VertexOp v_op, + T init) +{ + auto ret = + thrust::transform_reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + v_op, + init, + thrust::plus()); + if (GraphViewType::is_multi_gpu) { + // need to reduce ret + CUGRAPH_FAIL("unimplemented."); + } + return ret; +} + +/** + * @brief Apply an operator to the vertex properties and reduce. + * + * This version (conceptually) iterates over only a subset of the graph vertices. This function + * actually works as thrust::transform_reduce() on [@p input_first, @p input_last) (followed by + * inter-process reduction in multi-GPU). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam InputIterator Type of the iterator for input values. + * @tparam VertexOp + * @tparam T Type of the initial value. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param input_first Iterator pointing to the beginning (inclusive) of the values to be passed to + * @p v_op. + * @param input_last Iterator pointing to the end (exclusive) of the values to be passed to @p v_op. + * @param v_op Unary operator takes *(@p input_first + i) (where i is [0, @p input_last - @p + * input_first)) and returns a transformed value to be reduced. + * @param init Initial value to be added to the transform-reduced input vertex properties. + * @return T Reduction of the @p v_op outputs. + */ +template +T transform_reduce_v(raft::handle_t const& handle, + GraphViewType const& graph_view, + InputIterator input_first, + InputIterator input_last, + VertexOp v_op, + T init) +{ + auto ret = + thrust::transform_reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_first, + input_last, + v_op, + init, + thrust::plus()); + if (GraphViewType::is_multi_gpu) { + // need to reduce ret + CUGRAPH_FAIL("unimplemented."); + } + return ret; +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh new file mode 100644 index 00000000000..e9cc476e221 --- /dev/null +++ b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +/** + * @brief Apply an operator to the matching vertex and adjacency matrix row properties and reduce. + * + * i'th vertex matches with the i'th row in the graph adjacency matrix. @p v_op takes vertex + * properties and adjacency matrix row properties for the matching row, and @p v_op outputs are + * reduced. This function is inspired by thrust::transform_reduce(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam VertexOp Type of the binary vertex operator. + * @tparam T Type of the initial value. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param v_op Binary operator takes *(@p vertex_value_input_first + i) and *(@p + * adj_matrix_row_value_input_first + j) (where i and j are set for a vertex and the matching row) + * and returns a transformed value to be reduced. + * @param init Initial value to be added to the transform-reduced input vertex properties. + * @return T Reduction of the @p v_op outputs. + */ +template +T transform_reduce_v_with_adj_matrix_row( + raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + VertexOp v_op, + T init) +{ + if (GraphViewType::is_multi_gpu) { + CUGRAPH_FAIL("unimplemented."); + } else { + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_adj_matrix_local_rows()); + auto input_first = thrust::make_zip_iterator( + thrust::make_tuple(vertex_value_input_first, adj_matrix_row_value_input_first)); + auto v_op_wrapper = [v_op] __device__(auto v_and_row_val) { + return v_op(thrust::get<0>(v_and_row_val), thrust::get<1>(v_and_row_val)); + }; + return thrust::transform_reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_first, + input_first + graph_view.get_number_of_local_vertices(), + v_op_wrapper, + init, + thrust::plus()); + } +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh new file mode 100644 index 00000000000..f376ace1267 --- /dev/null +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -0,0 +1,510 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_low_out_degree_block_size = 128; +int32_t constexpr update_frontier_v_push_if_out_nbr_update_block_size = 128; + +template +__global__ void for_all_frontier_row_for_all_nbr_low_out_degree( + matrix_partition_device_t matrix_partition, + RowIterator row_first, + RowIterator row_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t* buffer_idx_ptr, + EdgeOp e_op) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto num_rows = static_cast(thrust::distance(row_first, row_last)); + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + size_t idx = tid; + + while (idx < num_rows) { + vertex_t row = *(row_first + idx); + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_out_degree{}; + thrust::tie(indices, weights, local_out_degree) = matrix_partition.get_local_edges(row_offset); + for (vertex_t i = 0; i < local_out_degree; ++i) { + auto col = indices[i]; + auto weight = weights != nullptr ? weights[i] : 1.0; + auto col_offset = matrix_partition.get_minor_offset_from_minor_nocheck(col); + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (thrust::get<0>(e_op_result) == true) { + // FIXME: This atomicAdd serializes execution. If we renumber vertices to insure that rows + // within a partition are sorted by their out-degree in decreasing order, we can compute + // a tight uppper bound for the maximum number of pushes per warp/block and use shared + // memory buffer to reduce the number of atomicAdd operations. + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), + static_cast(1)); + *(buffer_key_output_first + buffer_idx) = col_offset; + *(buffer_payload_output_first + buffer_idx) = + remove_first_thrust_tuple_element()(e_op_result); + } + } + + idx += gridDim.x * blockDim.x; + } +} + +template +size_t reduce_buffer_elements(raft::handle_t const& handle, + BufferKeyOutputIterator buffer_key_output_first, + BufferPayloadOutputIterator buffer_payload_output_first, + size_t num_buffer_elements, + ReduceOp reduce_op) +{ + thrust::sort_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements, + buffer_payload_output_first); + + if (std::is_same>::value) { + // FIXME: if ReducOp is any, we may have a cheaper alternative than sort & uique (i.e. discard + // non-first elements) + auto it = thrust::unique_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements, + buffer_payload_output_first); + return static_cast(thrust::distance(buffer_key_output_first, thrust::get<0>(it))); + } else { + using key_t = typename std::iterator_traits::value_type; + using payload_t = typename std::iterator_traits::value_type; + // FIXME: better avoid temporary buffer or at least limit the maximum buffer size (if we adopt + // CUDA cooperative group https://devblogs.nvidia.com/cooperative-groups and global sync(), we + // can use aggregate shared memory as a temporary buffer, or we can limit the buffer size, and + // split one thrust::reduce_by_key call to multiple thrust::reduce_by_key calls if the + // temporary buffer size exceeds the maximum buffer size (may be definied as percentage of the + // system HBM size or a function of the maximum number of threads in the system)) + rmm::device_vector keys(num_buffer_elements); + rmm::device_vector values(num_buffer_elements); + auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_key_output_first, + buffer_key_output_first + num_buffer_elements, + buffer_payload_output_first, + keys.begin(), + values.begin(), + thrust::equal_to(), + reduce_op); + auto num_reduced_buffer_elements = + static_cast(thrust::distance(keys.begin(), thrust::get<0>(it))); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + keys.begin(), + keys.begin() + num_reduced_buffer_elements, + buffer_key_output_first); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + values.begin(), + values.begin() + num_reduced_buffer_elements, + buffer_payload_output_first); + return num_reduced_buffer_elements; + } +} + +template +__global__ void update_frontier_and_vertex_output_values( + BufferKeyInputIterator buffer_key_input_first, + BufferPayloadInputIterator buffer_payload_input_first, + size_t num_buffer_elements, + VertexValueInputIterator vertex_value_input_first, + VertexValueOutputIterator vertex_value_output_first, + vertex_t** bucket_ptrs, + size_t* bucket_sizes_ptr, + size_t invalid_bucket_idx, + vertex_t invalid_vertex, + VertexOp v_op) +{ + static_assert(std::is_same::value_type, + vertex_t>::value); + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + size_t idx = tid; + size_t block_idx = blockIdx.x; + // FIXME: it might be more performant to process more than one element per thread + auto num_blocks = (num_buffer_elements + blockDim.x - 1) / blockDim.x; + + using BlockScan = + cub::BlockScan; + __shared__ typename BlockScan::TempStorage temp_storage; + + __shared__ size_t bucket_block_start_offsets[num_buckets]; + + size_t bucket_block_local_offsets[num_buckets]; + size_t bucket_block_aggregate_sizes[num_buckets]; + + while (block_idx < num_blocks) { + for (size_t i = 0; i < num_buckets; ++i) { bucket_block_local_offsets[i] = 0; } + + size_t selected_bucket_idx{invalid_bucket_idx}; + vertex_t key{invalid_vertex}; + + if (idx < num_buffer_elements) { + key = *(buffer_key_input_first + idx); + auto v_val = *(vertex_value_input_first + key); + auto payload = *(buffer_payload_input_first + idx); + auto v_op_result = v_op(v_val, payload); + selected_bucket_idx = thrust::get<0>(v_op_result); + if (selected_bucket_idx != invalid_bucket_idx) { + *(vertex_value_output_first + key) = + remove_first_thrust_tuple_element()(v_op_result); + bucket_block_local_offsets[selected_bucket_idx] = 1; + } + } + + for (size_t i = 0; i < num_buckets; ++i) { + BlockScan(temp_storage) + .ExclusiveSum(bucket_block_local_offsets[i], + bucket_block_local_offsets[i], + bucket_block_aggregate_sizes[i]); + } + + if (threadIdx.x == 0) { + for (size_t i = 0; i < num_buckets; ++i) { + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + bucket_block_start_offsets[i] = + atomicAdd(reinterpret_cast(bucket_sizes_ptr + i), + static_cast(bucket_block_aggregate_sizes[i])); + } + } + + __syncthreads(); + + // FIXME: better use shared memory buffer to aggreaget global memory writes + if (selected_bucket_idx != invalid_bucket_idx) { + bucket_ptrs[selected_bucket_idx][bucket_block_start_offsets[selected_bucket_idx] + + bucket_block_local_offsets[selected_bucket_idx]] = key; + } + + idx += gridDim.x * blockDim.x; + block_idx += gridDim.x; + } +} + +} // namespace detail + +/** + * @brief Update vertex frontier and vertex property values iterating over the outgoing edges. + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexIterator Type of the iterator for vertex identifiers. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam ReduceOp Type of the binary reduction operator. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam VertexValueOutputIterator Type of the iterator for vertex property variables. + * @tparam VertexFrontierType Type of the vertex frontier class which abstracts vertex frontier + * managements. + * @tparam VertexOp Type of the binary vertex operator. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_first Iterator pointing to the first (inclusive) vertex in the current frontier. v + * in [vertex_first, vertex_last) should be distinct (and should belong to this process in + * multi-GPU), otherwise undefined behavior + * @param vertex_last Iterator pointing to the last (exclusive) vertex in the current frontier. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a value to reduced by the @p + * reduce_op. + * @param reduce_op Binary operator takes two input arguments and reduce the two variables to one. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first + * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.get_number_of_local_vertices(). + * @param vertex_frontier vertex frontier class object for vertex frontier managements. This object + * includes multiple bucket objects. + * @param v_op Binary operator takes *(@p vertex_value_input_first + i) (where i is [0, @p + * graph_view.get_number_of_local_vertices())) and reduced value of the @p e_op outputs for + * this vertex and returns the target bucket index (for frontier update) and new verrtex property + * values (to update *(@p vertex_value_output_first + i)). + */ +template +void update_frontier_v_push_if_out_nbr( + raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + ReduceOp reduce_op, + VertexValueInputIterator vertex_value_input_first, + VertexValueOutputIterator vertex_value_output_first, + VertexFrontierType& vertex_frontier, + VertexOp v_op) +{ + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using reduce_op_input_t = typename ReduceOp::type; + + std::vector frontier_adj_matrix_partition_offsets( + graph_view.get_number_of_local_adj_matrix_partitions() + 1, + 0); // relevant only if GraphViewType::is_multi_gpu is true + thrust::device_vector + frontier_rows{}; // relevant only if GraphViewType::is_multi_gpu is true + edge_t max_pushes{0}; + + if (GraphViewType::is_multi_gpu) { + // need to merge row_frontier and update frontier_offsets; + CUGRAPH_FAIL("unimplemented."); + +#if 0 // comment out to suppress "loop is not reachable warning till the merge part is + // implemented." + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + + max_pushes += thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_rows.begin() + frontier_adj_matrix_partition_offsets[i], + frontier_rows.begin() + frontier_adj_matrix_partition_offsets[i + 1], + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()); + } +#endif + } else { + matrix_partition_device_t matrix_partition(graph_view, 0); + + max_pushes = thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_first, + vertex_last, + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()); + } + + // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain + // additional per column data for filtering in e_op). If we can pause & resume execution if + // buffer needs to be increased (and if we reserve address space to avoid expensive + // reallocation; + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management/), we can + // start with a smaller buffer size (especially when the frontier size is large). + vertex_frontier.resize_buffer(max_pushes); + vertex_frontier.set_buffer_idx_value(0); + auto buffer_first = vertex_frontier.buffer_begin(); + auto buffer_key_first = std::get<0>(buffer_first); + auto buffer_payload_first = std::get<1>(buffer_first); + + if (GraphViewType::is_multi_gpu) { + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + matrix_partition_device_t matrix_partition(graph_view, i); + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? 0 + : matrix_partition.get_major_value_start_offset(); + + raft::grid_1d_thread_t for_all_low_out_degree_grid( + frontier_adj_matrix_partition_offsets[i + 1] - frontier_adj_matrix_partition_offsets[i], + detail::update_frontier_v_push_if_out_nbr_for_all_low_out_degree_block_size, + handle.get_device_properties().maxGridSize[0]); + + // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber + // vertices to insure that rows within a partition are sorted by their out-degree in + // decreasing order, we will apply this kernel only to low out-degree vertices. + detail:: + for_all_frontier_row_for_all_nbr_low_out_degree<<>>( + matrix_partition, + frontier_rows.begin() + frontier_adj_matrix_partition_offsets[i], + frontier_rows.begin() + frontier_adj_matrix_partition_offsets[i + 1], + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + buffer_key_first, + buffer_payload_first, + vertex_frontier.get_buffer_idx_ptr(), + e_op); + } + } else { + matrix_partition_device_t matrix_partition(graph_view, 0); + + raft::grid_1d_thread_t for_all_low_out_degree_grid( + thrust::distance(vertex_first, vertex_last), + detail::update_frontier_v_push_if_out_nbr_for_all_low_out_degree_block_size, + handle.get_device_properties().maxGridSize[0]); + + // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber + // vertices to insure that rows within a partition are sorted by their out-degree in + // decreasing order, we will apply this kernel only to low out-degree vertices. + detail:: + for_all_frontier_row_for_all_nbr_low_out_degree<<>>( + matrix_partition, + vertex_first, + vertex_last, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + buffer_key_first, + buffer_payload_first, + vertex_frontier.get_buffer_idx_ptr(), + e_op); + } + + auto num_buffer_elements = detail::reduce_buffer_elements(handle, + buffer_key_first, + buffer_payload_first, + vertex_frontier.get_buffer_idx_value(), + reduce_op); + + if (GraphViewType::is_multi_gpu) { + // need to exchange buffer elements (and may reduce again) + CUGRAPH_FAIL("unimplemented."); + } + + if (num_buffer_elements > 0) { + raft::grid_1d_thread_t update_grid(num_buffer_elements, + detail::update_frontier_v_push_if_out_nbr_update_block_size, + handle.get_device_properties().maxGridSize[0]); + + auto constexpr invalid_vertex = invalid_vertex_id::value; + + auto bucket_and_bucket_size_device_ptrs = + vertex_frontier.get_bucket_and_bucket_size_device_pointers(); + detail::update_frontier_and_vertex_output_values + <<>>( + buffer_key_first, + buffer_payload_first, + num_buffer_elements, + vertex_value_input_first, + vertex_value_output_first, + std::get<0>(bucket_and_bucket_size_device_ptrs).get(), + std::get<1>(bucket_and_bucket_size_device_ptrs).get(), + VertexFrontierType::kInvalidBucketIdx, + invalid_vertex, + v_op); + + auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); + thrust::host_vector bucket_sizes( + bucket_sizes_device_ptr, bucket_sizes_device_ptr + VertexFrontierType::kNumBuckets); + for (size_t i = 0; i < VertexFrontierType::kNumBuckets; ++i) { + vertex_frontier.get_bucket(i).set_size(bucket_sizes[i]); + } + } +} + +/* + +FIXME: +is_fully_functional type trait (???) for reduce_op + +iterating over lower triangular (or upper triangular) : triangle counting +LRB might be necessary if the cost of processing an edge (i, j) is a function of degree(i) and +degree(j) : triangle counting +push-pull switching support (e.g. DOBFS), in this case, we need both +CSR & CSC (trade-off execution time vs memory requirement, unless graph is symmetric) +should I take multi-GPU support as a template argument? +if graph is symmetric, there will be additional optimization opportunities (e.g. in-degree == +out-degree) For BFS, sending a bit vector (for the entire set of dest vertices per partitoin may +work better we can use thrust::set_intersection for triangle counting think about adding thrust +wrappers for reduction functions. Can I pass nullptr for dummy +instead of thrust::make_counting_iterator(0)? +*/ + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh new file mode 100644 index 00000000000..fba6326fd8d --- /dev/null +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -0,0 +1,381 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr move_and_invalidate_if_block_size = 128; + +// FIXME: better move to another file for reusability +inline size_t round_up(size_t number_to_round, size_t modulus) +{ + return ((number_to_round + (modulus - 1)) / modulus) * modulus; +} + +template +auto make_buffer_zip_iterator_impl(std::vector& buffer_ptrs, + size_t offset, + std::index_sequence) +{ + auto key_ptr = reinterpret_cast(buffer_ptrs[0]) + offset; + auto payload_it = thrust::make_zip_iterator( + thrust::make_tuple(reinterpret_cast::type*>( + buffer_ptrs[1 + Is])...)); + return std::make_tuple(key_ptr, payload_it); +} + +template +auto make_buffer_zip_iterator(std::vector& buffer_ptrs, size_t offset) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return make_buffer_zip_iterator_impl( + buffer_ptrs, offset, std::make_index_sequence()); +} + +template +__global__ void move_and_invalidate_if(RowIterator row_first, + RowIterator row_last, + vertex_t** bucket_ptrs, + size_t* bucket_sizes_ptr, + size_t this_bucket_idx, + size_t invalid_bucket_idx, + vertex_t invalid_vertex, + SplitOp split_op) +{ + static_assert( + std::is_same::value_type, vertex_t>::value); + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + size_t idx = tid; + size_t block_idx = blockIdx.x; + auto num_elements = thrust::distance(row_first, row_last); + // FIXME: it might be more performant to process more than one element per thread + auto num_blocks = (num_elements + blockDim.x - 1) / blockDim.x; + + using BlockScan = cub::BlockScan; + __shared__ typename BlockScan::TempStorage temp_storage; + + __shared__ size_t bucket_block_start_offsets[num_buckets]; + + size_t bucket_block_local_offsets[num_buckets]; + size_t bucket_block_aggregate_sizes[num_buckets]; + + while (block_idx < num_blocks) { + for (size_t i = 0; i < num_buckets; ++i) { bucket_block_local_offsets[i] = 0; } + + size_t selected_bucket_idx{invalid_bucket_idx}; + vertex_t key{invalid_vertex}; + + if (idx < num_elements) { + key = *(row_first + idx); + selected_bucket_idx = split_op(key); + if (selected_bucket_idx != this_bucket_idx) { + *(row_first + idx) = invalid_vertex; + if (selected_bucket_idx != invalid_bucket_idx) { + bucket_block_local_offsets[selected_bucket_idx] = 1; + } + } + } + + for (size_t i = 0; i < num_buckets; ++i) { + BlockScan(temp_storage) + .ExclusiveSum(bucket_block_local_offsets[i], + bucket_block_local_offsets[i], + bucket_block_aggregate_sizes[i]); + } + + if (threadIdx.x == 0) { + for (size_t i = 0; i < num_buckets; ++i) { + static_assert(sizeof(unsigned long long int) == sizeof(size_t)); + bucket_block_start_offsets[i] = + atomicAdd(reinterpret_cast(bucket_sizes_ptr + i), + static_cast(bucket_block_aggregate_sizes[i])); + } + } + + __syncthreads(); + + // FIXME: better use shared memory buffer to aggreaget global memory writes + if ((selected_bucket_idx != this_bucket_idx) && (selected_bucket_idx != invalid_bucket_idx)) { + bucket_ptrs[selected_bucket_idx][bucket_block_start_offsets[selected_bucket_idx] + + bucket_block_local_offsets[selected_bucket_idx]] = key; + } + + idx += gridDim.x * blockDim.x; + block_idx += gridDim.x; + } +} + +} // namespace detail + +template +class Bucket { + public: + Bucket(raft::handle_t const& handle, size_t capacity) + : handle_ptr_(&handle), elements_(capacity, invalid_vertex_id::value) + { + } + + void insert(vertex_t v) + { + elements_[size_] = v; + ++size_; + } + + size_t size() const { return size_; } + + void set_size(size_t size) { size_ = size; } + + template + std::enable_if_t aggregate_size() const + { + CUGRAPH_FAIL("unimplemented."); + return size_; + } + + template + std::enable_if_t aggregate_size() const + { + return size_; + } + + void clear() { size_ = 0; } + + size_t capacity() const { return elements_.size(); } + + auto const data() const { return elements_.data().get(); } + + auto data() { return elements_.data().get(); } + + auto const begin() const { return elements_.begin(); } + + auto begin() { return elements_.begin(); } + + auto const end() const { return elements_.begin() + size_; } + + auto end() { return elements_.begin() + size_; } + + private: + raft::handle_t const* handle_ptr_{nullptr}; + rmm::device_vector elements_{}; + size_t size_{0}; +}; + +template +class VertexFrontier { + public: + static size_t constexpr kNumBuckets = num_buckets; + static size_t constexpr kInvalidBucketIdx{std::numeric_limits::max()}; + + VertexFrontier(raft::handle_t const& handle, std::vector bucket_capacities) + : handle_ptr_(&handle), + tmp_bucket_ptrs_(num_buckets, nullptr), + tmp_bucket_sizes_(num_buckets, 0), + buffer_ptrs_(kReduceInputTupleSize + 1 /* to store destination column number */, nullptr), + buffer_idx_(0, handle_ptr_->get_stream()) + { + CUGRAPH_EXPECTS(bucket_capacities.size() == num_buckets, + "invalid input argument bucket_capacities (size mismatch)"); + for (size_t i = 0; i < num_buckets; ++i) { + buckets_.emplace_back(handle, bucket_capacities[i]); + } + buffer_.set_stream(handle_ptr_->get_stream()); + } + + Bucket& get_bucket(size_t bucket_idx) { return buckets_[bucket_idx]; } + + Bucket const& get_bucket(size_t bucket_idx) const + { + return buckets_[bucket_idx]; + } + + void swap_buckets(size_t bucket_idx0, size_t bucket_idx1) + { + std::swap(buckets_[bucket_idx0], buckets_[bucket_idx1]); + } + + template + void split_bucket(size_t bucket_idx, SplitOp split_op) + { + auto constexpr invalid_vertex = invalid_vertex_id::value; + + auto bucket_and_bucket_size_device_ptrs = get_bucket_and_bucket_size_device_pointers(); + + auto& this_bucket = get_bucket(bucket_idx); + raft::grid_1d_thread_t move_and_invalidate_if_grid( + this_bucket.size(), + detail::move_and_invalidate_if_block_size, + handle_ptr_->get_device_properties().maxGridSize[0]); + + detail::move_and_invalidate_if + <<get_stream()>>>(this_bucket.begin(), + this_bucket.end(), + std::get<0>(bucket_and_bucket_size_device_ptrs).get(), + std::get<1>(bucket_and_bucket_size_device_ptrs).get(), + bucket_idx, + kInvalidBucketIdx, + invalid_vertex, + split_op); + + // FIXME: if we adopt CUDA cooperative group https://devblogs.nvidia.com/cooperative-groups + // and global sync(), we can merge this step with the above kernel (and rename the above kernel + // to move_if) + auto it = + thrust::remove_if(rmm::exec_policy(handle_ptr_->get_stream())->on(handle_ptr_->get_stream()), + get_bucket(bucket_idx).begin(), + get_bucket(bucket_idx).end(), + [] __device__(auto value) { return value == invalid_vertex; }); + + auto bucket_sizes_device_ptr = std::get<1>(bucket_and_bucket_size_device_ptrs); + thrust::host_vector bucket_sizes(bucket_sizes_device_ptr, + bucket_sizes_device_ptr + kNumBuckets); + for (size_t i = 0; i < kNumBuckets; ++i) { + if (i != bucket_idx) { get_bucket(i).set_size(bucket_sizes[i]); } + } + + auto size = thrust::distance(get_bucket(bucket_idx).begin(), it); + get_bucket(bucket_idx).set_size(size); + + return; + } + + auto get_bucket_and_bucket_size_device_pointers() + { + thrust::host_vector tmp_ptrs(buckets_.size(), nullptr); + thrust::host_vector tmp_sizes(buckets_.size(), 0); + for (size_t i = 0; i < buckets_.size(); ++i) { + tmp_ptrs[i] = get_bucket(i).data(); + tmp_sizes[i] = get_bucket(i).size(); + } + tmp_bucket_ptrs_ = tmp_ptrs; + tmp_bucket_sizes_ = tmp_sizes; + return std::make_tuple(tmp_bucket_ptrs_.data(), tmp_bucket_sizes_.data()); + } + + void resize_buffer(size_t size) + { + // FIXME: rmm::device_buffer resize incurs copy if memory is reallocated, which is unnecessary + // in this case. + buffer_.resize(compute_aggregate_buffer_size_in_bytes(size), handle_ptr_->get_stream()); + if (size > buffer_capacity_) { + buffer_capacity_ = size; + update_buffer_ptrs(); + } + buffer_size_ = size; + } + + void clear_buffer() { resize_buffer(0); } + + void shrink_to_fit_buffer() + { + if (buffer_size_ != buffer_capacity_) { + // FIXME: rmm::device_buffer shrink_to_fit incurs copy if memory is reallocated, which is + // unnecessary in this case. + buffer_.shrink_to_fit(handle_ptr_->get_stream()); + update_buffer_ptrs(); + buffer_capacity_ = buffer_size_; + } + } + + auto buffer_begin() + { + return detail::make_buffer_zip_iterator(buffer_ptrs_, 0); + } + + auto buffer_end() + { + return detail::make_buffer_zip_iterator(buffer_ptrs_, + buffer_size_); + } + + auto get_buffer_idx_ptr() { return buffer_idx_.data(); } + + size_t get_buffer_idx_value() { return buffer_idx_.value(handle_ptr_->get_stream()); } + + void set_buffer_idx_value(size_t value) + { + buffer_idx_.set_value(value, handle_ptr_->get_stream()); + } + + private: + static size_t constexpr kReduceInputTupleSize = thrust::tuple_size::value; + static size_t constexpr kBufferAlignment = 128; + + raft::handle_t const* handle_ptr_{nullptr}; + std::vector> buckets_{}; + rmm::device_vector tmp_bucket_ptrs_{}; + rmm::device_vector tmp_bucket_sizes_{}; + + std::array tuple_element_sizes_ = + compute_thrust_tuple_element_sizes()(); + std::vector buffer_ptrs_{}; + rmm::device_buffer buffer_{}; + size_t buffer_size_{0}; + size_t buffer_capacity_{0}; + rmm::device_scalar buffer_idx_{}; + + size_t compute_aggregate_buffer_size_in_bytes(size_t size) + { + size_t aggregate_buffer_size_in_bytes = + detail::round_up(sizeof(vertex_t) * size, kBufferAlignment); + for (size_t i = 0; i < kReduceInputTupleSize; ++i) { + aggregate_buffer_size_in_bytes += + detail::round_up(tuple_element_sizes_[i] * size, kBufferAlignment); + } + return aggregate_buffer_size_in_bytes; + } + + void update_buffer_ptrs() + { + uintptr_t ptr = reinterpret_cast(buffer_.data()); + buffer_ptrs_[0] = reinterpret_cast(ptr); + ptr += detail::round_up(sizeof(vertex_t) * buffer_capacity_, kBufferAlignment); + for (size_t i = 0; i < kReduceInputTupleSize; ++i) { + buffer_ptrs_[1 + i] = reinterpret_cast(ptr); + ptr += detail::round_up(tuple_element_sizes_[i] * buffer_capacity_, kBufferAlignment); + } + } +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/utilities/thrust_tuple_utils.cuh new file mode 100644 index 00000000000..f2b18adafce --- /dev/null +++ b/cpp/include/utilities/thrust_tuple_utils.cuh @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +struct is_thrust_tuple_of_arithemetic_impl { + constexpr bool evaluate() const + { + if (!std::is_arithmetic::type>::value) { + return false; + } else { + return is_thrust_tuple_of_arithemetic_impl().evaluate(); + } + } +}; + +template +struct is_thrust_tuple_of_arithemetic_impl { + constexpr bool evaluate() const { return true; } +}; + +template +struct compute_thrust_tuple_element_sizes_impl { + void compute(std::array::value>& arr) const + { + arr[I] = sizeof(typename thrust::tuple_element::type); + compute_thrust_tuple_element_sizes_impl().compute(arr); + } +}; + +template +struct compute_thrust_tuple_element_sizes_impl { + void compute(std::array::value>& arr) const {} +}; + +template +__device__ constexpr auto remove_first_thrust_tuple_element_impl(TupleType const& tuple, + std::index_sequence) +{ + return thrust::make_tuple(thrust::get<1 + Is>(tuple)...); +} + +template +struct plus_thrust_tuple_impl { + __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const + { + thrust::get(lhs) += thrust::get(rhs); + plus_thrust_tuple_impl().compute(lhs, rhs); + } +}; + +template +struct plus_thrust_tuple_impl { + __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const {} +}; + +template +__device__ std::enable_if_t::value, void> atomic_accumulate_impl(T& lhs, + T const& rhs) +{ + atomicAdd(&lhs, rhs); +} + +template +__device__ std::enable_if_t::value, void> atomic_accumulate_impl( + thrust::detail::any_assign& /* dereferencing thrust::discard_iterator results in this type */ lhs, + T const& rhs) +{ + // no-op +} + +template +struct atomic_accumulate_thrust_tuple_impl { + __device__ constexpr void compute(Iterator iter, TupleType const& value) const + { + atomic_accumulate_impl(thrust::raw_reference_cast(thrust::get(*iter)), + thrust::get(value)); + atomic_accumulate_thrust_tuple_impl().compute(iter, value); + } +}; + +template +struct atomic_accumulate_thrust_tuple_impl { + __device__ constexpr void compute(Iterator iter, TupleType const& value) const {} +}; + +template +struct block_reduce_thrust_tuple_impl { + __device__ void compute(TupleType& tuple) const + { + using T = typename thrust::tuple_element::type; + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + thrust::get(tuple) = BlockReduce(temp_storage).Sum(thrust::get(tuple)); + } +}; + +template +struct block_reduce_thrust_tuple_impl { + __device__ void compute(TupleType& tuple) const {} +}; + +} // namespace detail + +template +struct is_thrust_tuple : std::false_type { +}; + +template +struct is_thrust_tuple> : std::true_type { +}; + +template +struct is_thrust_tuple_of_arithmetic : std::false_type { +}; + +template +struct is_thrust_tuple_of_arithmetic::value>> { + static constexpr bool value = + detail::is_thrust_tuple_of_arithemetic_impl( + thrust::tuple_size::value)>() + .evaluate(); +}; + +template +struct is_arithmetic_or_thrust_tuple_of_arithmetic + : std::integral_constant::value> { +}; + +template +struct is_arithmetic_or_thrust_tuple_of_arithmetic> + : std::integral_constant>::value> { +}; + +template +struct compute_thrust_tuple_element_sizes { + auto operator()() const + { + size_t constexpr tuple_size = thrust::tuple_size::value; + std::array ret; + detail::compute_thrust_tuple_element_sizes_impl().compute( + ret); + return ret; + } +}; + +template +struct remove_first_thrust_tuple_element { + __device__ constexpr auto operator()(TupleType const& tuple) const + { + size_t constexpr tuple_size = thrust::tuple_size::value; + return detail::remove_first_thrust_tuple_element_impl( + tuple, std::make_index_sequence()); + } +}; + +template +struct plus_thrust_tuple { + __host__ __device__ constexpr TupleType operator()(TupleType const& lhs, + TupleType const& rhs) const + { + size_t constexpr tuple_size = thrust::tuple_size::value; + auto ret = lhs; + detail::plus_thrust_tuple_impl().compute(ret, rhs); + return ret; + } +}; + +template +struct atomic_accumulate_thrust_tuple { + __device__ constexpr void operator()(Iterator iter, TupleType const& value) const + { + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value); + size_t constexpr tuple_size = thrust::tuple_size::value; + detail::atomic_accumulate_thrust_tuple_impl() + .compute(iter, value); + } +}; + +template +struct block_reduce_thrust_tuple { + __device__ TupleType operator()(TupleType const& tuple) const + { + size_t constexpr tuple_size = thrust::tuple_size::value; + auto ret = tuple; + detail::block_reduce_thrust_tuple_impl().compute( + ret); + return ret; + } +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/vertex_partition_device.cuh b/cpp/include/vertex_partition_device.cuh new file mode 100644 index 00000000000..a6a78ad3878 --- /dev/null +++ b/cpp/include/vertex_partition_device.cuh @@ -0,0 +1,112 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +namespace cugraph { +namespace experimental { + +template +class vertex_partition_device_base_t { + public: + vertex_partition_device_base_t(vertex_t number_of_vertices) + : number_of_vertices_(number_of_vertices) + { + } + + template + __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_type v) const noexcept + { + return ((v >= 0) && (v < number_of_vertices_)); + } + + template + __host__ __device__ std::enable_if_t::value, bool> is_valid_vertex( + vertex_type v) const noexcept + { + return (v < number_of_vertices_); + } + + private: + // should be trivially copyable to device + vertex_t number_of_vertices_{0}; +}; + +template +class vertex_partition_device_t; + +// multi-GPU version +template +class vertex_partition_device_t> + : public vertex_partition_device_base_t { + public: + vertex_partition_device_t(GraphViewType const& graph_view) + : vertex_partition_device_base_t( + graph_view.get_number_of_vertices()), + first_(graph_view.get_local_vertex_first()), + last_(graph_view.get_local_vertex_last()) + { + } + + __host__ __device__ bool is_local_vertex_nocheck(typename GraphViewType::vertex_type v) const + noexcept + { + return (v >= first_) && (v < last_); + } + + __host__ __device__ typename GraphViewType::vertex_type + get_local_vertex_offset_from_vertex_nocheck(typename GraphViewType::vertex_type v) const noexcept + { + return v - first_; + } + + private: + // should be trivially copyable to device + typename GraphViewType::vertex_type first_{0}; + typename GraphViewType::vertex_type last_{0}; +}; + +// single-GPU version +template +class vertex_partition_device_t> + : public vertex_partition_device_base_t { + public: + vertex_partition_device_t(GraphViewType const& graph_view) + : vertex_partition_device_base_t( + graph_view.get_number_of_vertices()) + { + } + + __host__ __device__ constexpr bool is_local_vertex_nocheck( + typename GraphViewType::vertex_type v) const noexcept + { + return true; + } + + __host__ __device__ constexpr typename GraphViewType::vertex_type + get_local_vertex_offset_from_vertex_nocheck(typename GraphViewType::vertex_type v) const noexcept + { + return v; + } +}; + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu new file mode 100644 index 00000000000..adcdd65f645 --- /dev/null +++ b/cpp/src/experimental/bfs.cu @@ -0,0 +1,218 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +template +void bfs(raft::handle_t &handle, + GraphViewType const &push_graph_view, + typename GraphViewType::vertex_type *distances, + PredecessorIterator predecessor_first, + typename GraphViewType::vertex_type source_vertex, + bool direction_optimizing, + typename GraphViewType::vertex_type depth_limit, + bool do_expensive_check) +{ + using vertex_t = typename GraphViewType::vertex_type; + + static_assert(std::is_integral::value, + "GraphViewType::vertex_type should be integral."); + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const num_vertices = push_graph_view.get_number_of_vertices(); + if (num_vertices == 0) { return; } + + // 1. check input arguments + + CUGRAPH_EXPECTS( + push_graph_view.is_symmetric() || !direction_optimizing, + "Invalid input argument: input graph should be symmetric for direction optimizing BFS."); + CUGRAPH_EXPECTS(push_graph_view.is_valid_vertex(source_vertex), + "Invalid input argument: source vertex out-of-range."); + + if (do_expensive_check) { + // nothing to do + } + + // 2. initialize distances and predecessors + + auto constexpr invalid_distance = std::numeric_limits::max(); + auto constexpr invalid_vertex = invalid_vertex_id::value; + + auto val_first = thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(push_graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(push_graph_view.get_local_vertex_last()), + val_first, + [source_vertex] __device__(auto val) { + auto distance = invalid_distance; + if (val == source_vertex) { distance = vertex_t{0}; } + return thrust::make_tuple(distance, invalid_vertex); + }); + + // 3. initialize BFS frontier + + enum class Bucket { cur, num_buckets }; + std::vector bucket_sizes(static_cast(Bucket::num_buckets), + push_graph_view.get_number_of_local_vertices()); + VertexFrontier, vertex_t, false, static_cast(Bucket::num_buckets)> + vertex_frontier(handle, bucket_sizes); + + if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { + vertex_frontier.get_bucket(static_cast(Bucket::cur)).insert(source_vertex); + } + + // 4. BFS iteration + + vertex_t depth{0}; + auto cur_local_vertex_frontier_first = + vertex_frontier.get_bucket(static_cast(Bucket::cur)).begin(); + auto cur_vertex_frontier_aggregate_size = + vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size(); + while (true) { + if (direction_optimizing) { + CUGRAPH_FAIL("unimplemented."); + } else { + vertex_partition_device_t vertex_partition(push_graph_view); + + auto cur_local_vertex_frontier_last = + vertex_frontier.get_bucket(static_cast(Bucket::cur)).end(); + update_frontier_v_push_if_out_nbr( + handle, + push_graph_view, + cur_local_vertex_frontier_first, + cur_local_vertex_frontier_last, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [vertex_partition, distances] __device__( + vertex_t src, vertex_t dst, auto src_val, auto dst_val) { + auto push = true; + if (vertex_partition.is_local_vertex_nocheck(dst)) { + auto distance = + *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(dst)); + if (distance != invalid_distance) { push = false; } + } + // FIXME: need to test this works properly if payload size is 0 (returns a tuple of size + // 1) + return thrust::make_tuple(push, src); + }, + reduce_op::any>(), + distances, + thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), + vertex_frontier, + [depth] __device__(auto v_val, auto pushed_val) { + auto idx = (v_val == invalid_distance) + ? static_cast(Bucket::cur) + : VertexFrontier, vertex_t>::kInvalidBucketIdx; + return thrust::make_tuple(idx, depth + 1, thrust::get<0>(pushed_val)); + }); + + auto new_vertex_frontier_aggregate_size = + vertex_frontier.get_bucket(static_cast(Bucket::cur)).aggregate_size() - + cur_vertex_frontier_aggregate_size; + if (new_vertex_frontier_aggregate_size == 0) { break; } + + cur_local_vertex_frontier_first = cur_local_vertex_frontier_last; + cur_vertex_frontier_aggregate_size += new_vertex_frontier_aggregate_size; + } + + depth++; + if (depth >= depth_limit) { break; } + } + + return; +} + +} // namespace detail + +template +void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + vertex_t *distances, + vertex_t *predecessors, + vertex_t source_vertex, + bool direction_optimizing, + vertex_t depth_limit, + bool do_expensive_check) +{ + if (predecessors != nullptr) { + detail::bfs(handle, + graph_view, + distances, + predecessors, + source_vertex, + direction_optimizing, + depth_limit, + do_expensive_check); + } else { + detail::bfs(handle, + graph_view, + distances, + thrust::make_discard_iterator(), + source_vertex, + direction_optimizing, + depth_limit, + do_expensive_check); + } +} + +// explicit instantiation + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int32_t *distances, + int32_t *predecessors, + int32_t source_vertex, + bool direction_optimizing, + int32_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int32_t *distances, + int32_t *predecessors, + int32_t source_vertex, + bool direction_optimizing, + int32_t depth_limit, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 7b7625fd911..eb791206c3c 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -218,15 +219,17 @@ graph_tget_handle_ptr()->get_comms(); - auto const comm_p_size = comm_p.get_size(); - auto &comm_p_row = this->get_handle_ptr()->get_subcomm(comm_p_row_key); - auto const comm_p_row_rank = comm_p_row.get_rank(); - auto const comm_p_row_size = comm_p_row.get_size(); - auto &comm_p_col = this->get_handle_ptr()->get_subcomm(comm_p_col_key); - auto const comm_p_col_rank = comm_p_col.get_rank(); - auto const comm_p_col_size = comm_p_col.get_size(); - auto default_stream = this->get_handle_ptr()->get_stream(); + auto &comm = this->get_handle_ptr()->get_comms(); + auto const comm_size = comm.get_size(); + auto &row_comm = + this->get_handle_ptr()->get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto &col_comm = + this->get_handle_ptr()->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + auto default_stream = this->get_handle_ptr()->get_stream(); CUGRAPH_EXPECTS(edgelists.size() > 0, "Invalid API parameter: edgelists.size() should be non-zero."); @@ -247,7 +250,7 @@ graph_t(comm_p_row_size))) || + (edgelists.size() == static_cast(row_comm_size))) || (!(partition.is_hypergraph_partitioned()) && (edgelists.size() == 1)), "Invalid API parameter: errneous edgelists.size()."); @@ -286,7 +289,7 @@ graph_t aggregate_segment_offsets( - comm_p_row_size * segment_offsets.size(), default_stream); - comm_p_row.allgather(segment_offsets.data(), - aggregate_segment_offsets.data(), - segment_offsets.size(), - default_stream); + rmm::device_uvector aggregate_segment_offsets(row_comm_size * segment_offsets.size(), + default_stream); + row_comm.allgather(segment_offsets.data(), + aggregate_segment_offsets.data(), + segment_offsets.size(), + default_stream); - vertex_partition_segment_offsets_.resize(comm_p_row_size * (segment_offsets.size())); + vertex_partition_segment_offsets_.resize(row_comm_size * (segment_offsets.size())); raft::update_host(vertex_partition_segment_offsets_.data(), aggregate_segment_offsets.data(), aggregate_segment_offsets.size(), diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index b297a825a01..5038f521a69 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -16,6 +16,7 @@ #include #include +#include #include #include @@ -76,9 +77,13 @@ graph_view_tget_handle_ptr()->get_comms().get_size(); - auto const comm_p_row_size = this->get_handle_ptr()->get_subcomm(comm_p_row_key).get_size(); - auto const comm_p_col_size = this->get_handle_ptr()->get_subcomm(comm_p_col_key).get_size(); + auto const comm_size = this->get_handle_ptr()->get_comms().get_size(); + auto const row_comm_size = this->get_handle_ptr() + ->get_subcomm(cugraph::partition_2d::key_naming_t().row_name()) + .get_size(); + auto const col_comm_size = this->get_handle_ptr() + ->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()) + .get_size(); CUGRAPH_EXPECTS(adj_matrix_partition_offsets.size() == adj_matrix_partition_indices.size(), "Invalid API parameter: adj_matrix_partition_offsets.size() and " @@ -90,13 +95,13 @@ graph_view_t(comm_p_row_size))) || + (adj_matrix_partition_offsets.size() == static_cast(row_comm_size))) || (!(partition.is_hypergraph_partitioned()) && (adj_matrix_partition_offsets.size() == 1)), "Invalid API parameter: errneous adj_matrix_partition_offsets.size()."); CUGRAPH_EXPECTS((sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == - comm_p_col_size * (detail::num_segments_per_vertex_partition + 1))) || + col_comm_size * (detail::num_segments_per_vertex_partition + 1))) || (!sorted_by_global_degree_within_vertex_partition && (vertex_partition_segment_offsets.size() == 0)), "Invalid API parameter: vertex_partition_segment_offsets.size() does not match " @@ -107,8 +112,12 @@ graph_view_tget_handle_ptr()->get_stream(); - auto const comm_p_row_rank = this->get_handle_ptr()->get_subcomm(comm_p_row_key).get_rank(); - auto const comm_p_col_rank = this->get_handle_ptr()->get_subcomm(comm_p_col_key).get_rank(); + auto const row_comm_rank = this->get_handle_ptr() + ->get_subcomm(cugraph::partition_2d::key_naming_t().row_name()) + .get_rank(); + auto const col_comm_rank = this->get_handle_ptr() + ->get_subcomm(cugraph::partition_2d::key_naming_t().col_name()) + .get_rank(); edge_t number_of_local_edges_sum{}; for (size_t i = 0; i < adj_matrix_partition_offsets.size(); ++i) { @@ -159,7 +168,7 @@ graph_view_t +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +template +void katz_centrality(raft::handle_t &handle, + GraphViewType const &pull_graph_view, + result_t *betas, + result_t *katz_centralities, + result_t alpha, + result_t beta, // relevant only if betas == nullptr + result_t epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check) +{ + using vertex_t = typename GraphViewType::vertex_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(std::is_integral::value, + "GraphViewType::vertex_type should be integral."); + static_assert(std::is_floating_point::value, + "result_t should be a floating-point type."); + static_assert(GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the pull model."); + + auto const num_vertices = pull_graph_view.get_number_of_vertices(); + if (num_vertices == 0) { return; } + + // 1. check input arguments + + CUGRAPH_EXPECTS((alpha >= 0.0) && (alpha <= 1.0), + "Invalid input argument: alpha should be in [0.0, 1.0]."); + CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative."); + + if (do_expensive_check) { + // FIXME: should I check for betas? + + if (has_initial_guess) { + auto num_negative_values = count_if_v( + handle, pull_graph_view, katz_centralities, [] __device__(auto val) { return val < 0.0; }); + CUGRAPH_EXPECTS(num_negative_values == 0, + "Invalid input argument: initial guess values should be non-negative."); + } + } + + // 2. initialize katz centrality values + + if (!has_initial_guess) { + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + katz_centralities, + katz_centralities + pull_graph_view.get_number_of_local_vertices(), + result_t{0.0}); + } + + // 3. katz centrality iteration + + // old katz centrality values + rmm::device_vector adj_matrix_row_katz_centralities( + pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), result_t{0.0}); + size_t iter{0}; + while (true) { + copy_to_adj_matrix_row( + handle, pull_graph_view, katz_centralities, adj_matrix_row_katz_centralities.begin()); + + copy_v_transform_reduce_in_nbr( + handle, + pull_graph_view, + adj_matrix_row_katz_centralities.begin(), + thrust::make_constant_iterator(0) /* dummy */, + [alpha] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return static_cast(alpha * src_val * w); + }, + betas != nullptr ? result_t{0.0} : beta, + katz_centralities); + + if (betas != nullptr) { + auto val_first = thrust::make_zip_iterator(thrust::make_tuple(katz_centralities, betas)); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + val_first, + val_first + pull_graph_view.get_number_of_local_vertices(), + katz_centralities, + [] __device__(auto val) { + auto const katz_centrality = thrust::get<0>(val); + auto const beta = thrust::get<1>(val); + return katz_centrality + beta; + }); + } + + auto diff_sum = transform_reduce_v_with_adj_matrix_row( + handle, + pull_graph_view, + katz_centralities, + adj_matrix_row_katz_centralities.begin(), + [] __device__(auto v_val, auto row_val) { return std::abs(v_val - row_val); }, + result_t{0.0}); + + iter++; + + if (diff_sum < static_cast(num_vertices) * epsilon) { + break; + } else if (iter >= max_iterations) { + CUGRAPH_FAIL("Katz Centrality failed to converge."); + } + } + + if (normalize) { + auto l2_norm = transform_reduce_v( + handle, + pull_graph_view, + katz_centralities, + [] __device__(auto val) { return val * val; }, + result_t{0.0}); + l2_norm = std::sqrt(l2_norm); + CUGRAPH_EXPECTS(l2_norm > 0.0, + "L2 norm of the computed Katz Centrality values should be positive."); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + katz_centralities, + katz_centralities + pull_graph_view.get_number_of_local_vertices(), + katz_centralities, + [l2_norm] __device__(auto val) { return val / l2_norm; }); + } + + return; +} + +} // namespace detail + +template +void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + result_t *betas, + result_t *katz_centralities, + result_t alpha, + result_t beta, // relevant only if beta == nullptr + result_t epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check) +{ + detail::katz_centrality(handle, + graph_view, + betas, + katz_centralities, + alpha, + beta, + epsilon, + max_iterations, + has_initial_guess, + normalize, + do_expensive_check); +} + +// explicit instantiation + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + float *betas, + float *katz_centralities, + float alpha, + float beta, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + float *betas, + float *katz_centralities, + float alpha, + float beta, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu new file mode 100644 index 00000000000..0eb5da952f3 --- /dev/null +++ b/cpp/src/experimental/pagerank.cu @@ -0,0 +1,349 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +// FIXME: personalization_vector_size is confusing in OPG (local or aggregate?) +template +void pagerank(raft::handle_t& handle, + GraphViewType const& pull_graph_view, + typename GraphViewType::weight_type* adj_matrix_row_out_weight_sums, + typename GraphViewType::vertex_type* personalization_vertices, + result_t* personalization_values, + typename GraphViewType::vertex_type personalization_vector_size, + result_t* pageranks, + result_t alpha, + result_t epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check) +{ + using vertex_t = typename GraphViewType::vertex_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(std::is_integral::value, + "GraphViewType::vertex_type should be integral."); + static_assert(std::is_floating_point::value, + "result_t should be a floating-point type."); + static_assert(GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the pull model."); + + auto const num_vertices = pull_graph_view.get_number_of_vertices(); + if (num_vertices == 0) { return; } + + // 1. check input arguments + + CUGRAPH_EXPECTS( + (personalization_vertices == nullptr) || (personalization_values != nullptr), + "Invalid input argument: if personalization verties are provided, personalization " + "values should be provided as well."); + CUGRAPH_EXPECTS((alpha >= 0.0) && (alpha <= 1.0), + "Invalid input argument: alpha should be in [0.0, 1.0]."); + CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative."); + + if (do_expensive_check) { + if (adj_matrix_row_out_weight_sums != nullptr) { + auto has_negative_weight_sums = any_of_adj_matrix_row( + handle, pull_graph_view, adj_matrix_row_out_weight_sums, [] __device__(auto val) { + return val < result_t{0.0}; + }); + CUGRAPH_EXPECTS( + has_negative_weight_sums == false, + "Invalid input argument: outgoing edge weight sum values should be non-negative."); + } + + if (pull_graph_view.is_weighted()) { + auto num_nonpositive_edge_weights = count_if_e( + handle, + pull_graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return w <= 0.0; + }); + CUGRAPH_EXPECTS(num_nonpositive_edge_weights == 0, + "Invalid input argument: input graph should have postive edge weights."); + } + + if (has_initial_guess) { + auto num_negative_values = count_if_v( + handle, pull_graph_view, pageranks, [] __device__(auto val) { return val < 0.0; }); + CUGRAPH_EXPECTS(num_negative_values == 0, + "Invalid input argument: initial guess values should be non-negative."); + } + + if (personalization_vertices != nullptr) { + vertex_partition_device_t vertex_partition(pull_graph_view); + auto num_invalid_vertices = + count_if_v(handle, + pull_graph_view, + personalization_vertices, + personalization_vertices + personalization_vector_size, + [vertex_partition] __device__(auto val) { + return !(vertex_partition.is_valid_vertex(val) && + vertex_partition.is_local_vertex_nocheck(val)); + }); + CUGRAPH_EXPECTS(num_invalid_vertices == 0, + "Invalid input argument: peresonalization vertices have invalid vertex IDs."); + auto num_negative_values = count_if_v(handle, + pull_graph_view, + personalization_values, + personalization_values + personalization_vector_size, + [] __device__(auto val) { return val < 0.0; }); + CUGRAPH_EXPECTS(num_negative_values == 0, + "Invalid input argument: peresonalization values should be non-negative."); + } + } + + // 2. compute the sums of the out-going edge weights (if not provided) + + rmm::device_vector tmp_adj_matrix_row_out_weight_sums{}; + if (adj_matrix_row_out_weight_sums == nullptr) { + rmm::device_vector tmp_out_weight_sums(pull_graph_view.get_number_of_local_vertices(), + weight_t{0.0}); + // FIXME: better refactor this out (computing out-degree). + copy_v_transform_reduce_out_nbr( + handle, + pull_graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [alpha] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return w; + }, + weight_t{0.0}, + tmp_out_weight_sums.data().get()); + + tmp_adj_matrix_row_out_weight_sums.assign( + pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), weight_t{0.0}); + copy_to_adj_matrix_row(handle, + pull_graph_view, + tmp_out_weight_sums.data().get(), + tmp_adj_matrix_row_out_weight_sums.begin()); + } + + auto row_out_weight_sums = adj_matrix_row_out_weight_sums != nullptr + ? adj_matrix_row_out_weight_sums + : tmp_adj_matrix_row_out_weight_sums.data().get(); + + // 3. initialize pagerank values + + if (has_initial_guess) { + auto sum = reduce_v(handle, pull_graph_view, pageranks, result_t{0.0}); + CUGRAPH_EXPECTS( + sum > 0.0, + "Invalid input argument: sum of the PageRank initial guess values should be positive."); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pageranks, + pageranks + pull_graph_view.get_number_of_local_vertices(), + pageranks, + [sum] __device__(auto val) { return val / sum; }); + } else { + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pageranks, + pageranks + pull_graph_view.get_number_of_local_vertices(), + result_t{1.0} / static_cast(num_vertices)); + } + + // 4. sum the personalization values + + result_t personalization_sum{0.0}; + if (personalization_vertices != nullptr) { + personalization_sum = reduce_v(handle, + pull_graph_view, + personalization_values, + personalization_values + personalization_vector_size, + result_t{0.0}); + CUGRAPH_EXPECTS(personalization_sum > 0.0, + "Invalid input argument: sum of personalization valuese should be positive."); + } + + // 5. pagerank iteration + + // old PageRank values + rmm::device_vector adj_matrix_row_pageranks( + pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), result_t{0.0}); + size_t iter{0}; + while (true) { + copy_to_adj_matrix_row(handle, pull_graph_view, pageranks, adj_matrix_row_pageranks.begin()); + + auto row_val_first = thrust::make_zip_iterator( + thrust::make_tuple(adj_matrix_row_pageranks.begin(), row_out_weight_sums)); + thrust::transform( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + row_val_first, + row_val_first + pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), + adj_matrix_row_pageranks.begin(), + [] __device__(auto val) { + auto const row_pagerank = thrust::get<0>(val); + auto const row_out_weight_sum = thrust::get<1>(val); + auto const divisor = + row_out_weight_sum == result_t{0.0} ? result_t{1.0} : row_out_weight_sum; + return row_pagerank / divisor; + }); + + auto dangling_sum = transform_reduce_v_with_adj_matrix_row( + handle, + pull_graph_view, + thrust::make_constant_iterator(0) /* dummy */, + row_val_first, + [] __device__(auto v_val, auto row_val) { + auto const row_pagerank = thrust::get<0>(row_val); + auto const row_out_weight_sum = thrust::get<1>(row_val); + return row_out_weight_sum == result_t{0.0} ? row_pagerank : result_t{0.0}; + }, + result_t{0.0}); + + auto unvarying_part = + personalization_vertices == nullptr + ? (dangling_sum + static_cast(1.0 - alpha)) / static_cast(num_vertices) + : result_t{0.0}; + + copy_v_transform_reduce_in_nbr( + handle, + pull_graph_view, + adj_matrix_row_pageranks.begin(), + thrust::make_constant_iterator(0) /* dummy */, + [alpha] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return src_val * w * alpha; + }, + unvarying_part, + pageranks); + + if (personalization_vertices != nullptr) { + vertex_partition_device_t vertex_partition(pull_graph_view); + auto val_first = thrust::make_zip_iterator( + thrust::make_tuple(personalization_vertices, personalization_values)); + thrust::for_each( + val_first, + val_first + personalization_vector_size, + [vertex_partition, pageranks, dangling_sum, personalization_sum, alpha] __device__( + auto val) { + auto v = thrust::get<0>(val); + auto value = thrust::get<1>(val); + *(pageranks + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)) += + (dangling_sum + static_cast(1.0 - alpha)) * (value / personalization_sum); + }); + } + + auto diff_sum = transform_reduce_v_with_adj_matrix_row( + handle, + pull_graph_view, + pageranks, + thrust::make_zip_iterator( + thrust::make_tuple(adj_matrix_row_pageranks.begin(), row_out_weight_sums)), + [] __device__(auto v_val, auto row_val) { + auto multiplier = + thrust::get<1>(row_val) == result_t{0.0} ? result_t{1.0} : thrust::get<1>(row_val); + return std::abs(v_val - thrust::get<0>(row_val) * multiplier); + }, + result_t{0.0}); + + iter++; + + if (diff_sum < static_cast(num_vertices) * epsilon) { + break; + } else if (iter >= max_iterations) { + CUGRAPH_FAIL("PageRank failed to converge."); + } + } + + return; +} + +} // namespace detail + +template +void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + weight_t* adj_matrix_row_out_weight_sums, + vertex_t* personalization_vertices, + result_t* personalization_values, + vertex_t personalization_vector_size, + result_t* pageranks, + result_t alpha, + result_t epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check) +{ + detail::pagerank(handle, + graph_view, + adj_matrix_row_out_weight_sums, + personalization_vertices, + personalization_values, + personalization_vector_size, + pageranks, + alpha, + epsilon, + max_iterations, + has_initial_guess, + do_expensive_check); +} + +// explicit instantiation + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + float* adj_matrix_row_out_weight_sums, + int32_t* personalization_vertices, + float* personalization_values, + int32_t personalization_vector_size, + float* pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + float* adj_matrix_row_out_weight_sums, + int32_t* personalization_vertices, + float* personalization_values, + int32_t personalization_vector_size, + float* pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu new file mode 100644 index 00000000000..3c3f43631ec --- /dev/null +++ b/cpp/src/experimental/sssp.cu @@ -0,0 +1,285 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +template +void sssp(raft::handle_t &handle, + GraphViewType const &push_graph_view, + typename GraphViewType::weight_type *distances, + PredecessorIterator predecessor_first, + typename GraphViewType::vertex_type source_vertex, + typename GraphViewType::weight_type cutoff, + bool do_expensive_check) +{ + using vertex_t = typename GraphViewType::vertex_type; + using weight_t = typename GraphViewType::weight_type; + + static_assert(std::is_integral::value, + "GraphViewType::vertex_type should be integral."); + static_assert(!GraphViewType::is_adj_matrix_transposed, + "GraphViewType should support the push model."); + + auto const num_vertices = push_graph_view.get_number_of_vertices(); + auto const num_edges = push_graph_view.get_number_of_edges(); + if (num_vertices == 0) { return; } + + // implements the Near-Far Pile method in + // A. Davidson, S. Baxter, M. Garland, and J. D. Owens, "Work-efficient parallel GPU methods for + // single-source shortest paths," 2014. + + // 1. check input arguments + + CUGRAPH_EXPECTS(push_graph_view.is_valid_vertex(source_vertex), + "Invalid input argument: source vertex out-of-range."); + + if (do_expensive_check) { + auto num_negative_edge_weights = + count_if_e(handle, + push_graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + return w < 0.0; + }); + CUGRAPH_EXPECTS(num_negative_edge_weights == 0, + "Invalid input argument: input graph should have non-negative edge weights."); + } + + // 2. initialize distances and predecessors + + auto constexpr invalid_distance = std::numeric_limits::max(); + auto constexpr invalid_vertex = invalid_vertex_id::value; + + auto val_first = thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)); + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(push_graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(push_graph_view.get_local_vertex_last()), + val_first, + [source_vertex] __device__(auto val) { + auto distance = invalid_distance; + if (val == source_vertex) { distance = weight_t{0.0}; } + return thrust::make_tuple(distance, invalid_vertex); + }); + + if (num_edges == 0) { return; } + + // 3. update delta + + weight_t average_vertex_degree{0.0}; + weight_t average_edge_weight{0.0}; + thrust::tie(average_vertex_degree, average_edge_weight) = transform_reduce_e( + handle, + push_graph_view, + thrust::make_constant_iterator(0) /* dummy */, + thrust::make_constant_iterator(0) /* dummy */, + [] __device__(vertex_t row, vertex_t col, weight_t w, auto row_val, auto col_val) { + return thrust::make_tuple(weight_t{1.0}, w); + }, + thrust::make_tuple(weight_t{0.0}, weight_t{0.0})); + average_vertex_degree /= static_cast(num_vertices); + average_edge_weight /= static_cast(num_edges); + auto delta = + (static_cast(raft::warp_size()) * average_edge_weight) / average_vertex_degree; + + // 4. initialize SSSP frontier + + enum class Bucket { cur_near, new_near, far, num_buckets }; + // FIXME: need to double check the bucket sizes are sufficient + std::vector bucket_sizes(static_cast(Bucket::num_buckets), + push_graph_view.get_number_of_local_vertices()); + VertexFrontier, + vertex_t, + false, + static_cast(Bucket::num_buckets)> + vertex_frontier(handle, bucket_sizes); + + // 5. SSSP iteration + + bool vertex_and_adj_matrix_row_ranges_coincide = + push_graph_view.get_number_of_local_vertices() == + push_graph_view.get_number_of_local_adj_matrix_partition_rows() + ? true + : false; + rmm::device_vector adj_matrix_row_distances{}; + if (!vertex_and_adj_matrix_row_ranges_coincide) { + adj_matrix_row_distances.assign(push_graph_view.get_number_of_local_adj_matrix_partition_rows(), + std::numeric_limits::max()); + } + auto row_distances = + !vertex_and_adj_matrix_row_ranges_coincide ? adj_matrix_row_distances.data().get() : distances; + + if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).insert(source_vertex); + } + + auto near_far_threshold = delta; + while (true) { + if (!vertex_and_adj_matrix_row_ranges_coincide) { + copy_to_adj_matrix_row( + handle, + push_graph_view, + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).begin(), + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).end(), + distances, + row_distances); + } + + vertex_partition_device_t vertex_partition(push_graph_view); + + update_frontier_v_push_if_out_nbr( + handle, + push_graph_view, + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).begin(), + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).end(), + row_distances, + thrust::make_constant_iterator(0) /* dummy */, + [vertex_partition, distances, cutoff] __device__( + vertex_t src, vertex_t dst, weight_t w, auto src_val, auto dst_val) { + auto push = true; + auto new_distance = src_val + w; + auto threshold = cutoff; + if (vertex_partition.is_local_vertex_nocheck(dst)) { + auto local_vertex_offset = + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(dst); + auto old_distance = *(distances + local_vertex_offset); + threshold = old_distance < threshold ? old_distance : threshold; + } + if (new_distance >= threshold) { push = false; } + return thrust::make_tuple(push, new_distance, src); + }, + reduce_op::min>(), + distances, + thrust::make_zip_iterator(thrust::make_tuple(distances, predecessor_first)), + vertex_frontier, + [near_far_threshold] __device__(auto v_val, auto pushed_val) { + auto new_dist = thrust::get<0>(pushed_val); + auto idx = new_dist < v_val + ? (new_dist < near_far_threshold ? static_cast(Bucket::new_near) + : static_cast(Bucket::far)) + : VertexFrontier, vertex_t>::kInvalidBucketIdx; + return thrust::make_tuple(idx, thrust::get<0>(pushed_val), thrust::get<1>(pushed_val)); + }); + + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).clear(); + if (vertex_frontier.get_bucket(static_cast(Bucket::new_near)).aggregate_size() > 0) { + vertex_frontier.swap_buckets(static_cast(Bucket::cur_near), + static_cast(Bucket::new_near)); + } else if (vertex_frontier.get_bucket(static_cast(Bucket::far)).aggregate_size() > + 0) { // near queue is empty, split the far queue + auto old_near_far_threshold = near_far_threshold; + near_far_threshold += delta; + + while (true) { + vertex_frontier.split_bucket( + static_cast(Bucket::far), + [vertex_partition, distances, old_near_far_threshold, near_far_threshold] __device__( + auto v) { + auto dist = + *(distances + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)); + if (dist < old_near_far_threshold) { + return VertexFrontier, vertex_t>::kInvalidBucketIdx; + } else if (dist < near_far_threshold) { + return static_cast(Bucket::cur_near); + } else { + return static_cast(Bucket::far); + } + }); + if (vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).aggregate_size() > + 0) { + break; + } else { + near_far_threshold += delta; + } + } + } else { + break; + } + } + + return; +} + +} // namespace detail + +template +void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + weight_t *distances, + vertex_t *predecessors, + vertex_t source_vertex, + weight_t cutoff, + bool do_expensive_check) +{ + if (predecessors != nullptr) { + detail::sssp( + handle, graph_view, distances, predecessors, source_vertex, cutoff, do_expensive_check); + } else { + detail::sssp(handle, + graph_view, + distances, + thrust::make_discard_iterator(), + source_vertex, + cutoff, + do_expensive_check); + } +} + +// explicit instantiation + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + float *distances, + int32_t *predecessors, + int32_t source_vertex, + float cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + float *distances, + int32_t *predecessors, + int32_t source_vertex, + float cutoff, + bool do_expensive_check); + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 1758dce30c3..ac3a27c7b77 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -128,7 +128,7 @@ set(KATZ_TEST_SRC ConfigureTest(KATZ_TEST "${KATZ_TEST_SRC}" "") ################################################################################################### -# - betweenness centrality tests ------------------------------------------------------------------------- +# - betweenness centrality tests ------------------------------------------------------------------ set(BETWEENNESS_TEST_SRC "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" @@ -153,17 +153,19 @@ ConfigureTest(PAGERANK_TEST "${PAGERANK_TEST_SRC}" "") ################################################################################################### # - SSSP tests ------------------------------------------------------------------------------------ + set(SSSP_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/traversal/sssp_test.cu") ConfigureTest(SSSP_TEST "${SSSP_TEST_SRCS}" "") ################################################################################################### -# - BFS tests ------------------------------------------------------------------------------------ +# - BFS tests ------------------------------------------------------------------------------------- + set(BFS_TEST_SRCS - "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" - "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/traversal/bfs_test.cu") ConfigureTest(BFS_TEST "${BFS_TEST_SRCS}" "") @@ -194,7 +196,7 @@ set(ECG_TEST_SRC ConfigureTest(ECG_TEST "${ECG_TEST_SRC}" "") ################################################################################################### -# - Balanced cut clustering tests --------------------------------------------------------------------------------- +# - Balanced cut clustering tests ----------------------------------------------------------------- set(BALANCED_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/community/balanced_edge_test.cpp") @@ -202,7 +204,7 @@ set(BALANCED_TEST_SRC ConfigureTest(BALANCED_TEST "${BALANCED_TEST_SRC}" "") ################################################################################################### -# - TRIANGLE tests --------------------------------------------------------------------------------- +# - TRIANGLE tests -------------------------------------------------------------------------------- set(TRIANGLE_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/community/triangle_test.cu") @@ -219,7 +221,7 @@ set(RENUMBERING_TEST_SRC ConfigureTest(RENUMBERING_TEST "${RENUMBERING_TEST_SRC}" "") ################################################################################################### -#-FORCE ATLAS 2 tests ------------------------------------------------------------------------------ +# - FORCE ATLAS 2 tests -------------------------------------------------------------------------- set(FA2_TEST_SRC "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" @@ -228,7 +230,7 @@ set(FA2_TEST_SRC ConfigureTest(FA2_TEST "${FA2_TEST_SRC}" "") ################################################################################################### -#-CONNECTED COMPONENTS tests --------------------------------------------------------------------- +# - CONNECTED COMPONENTS tests ------------------------------------------------------------------- set(CONNECT_TEST_SRC "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" @@ -237,7 +239,7 @@ set(CONNECT_TEST_SRC ConfigureTest(CONNECT_TEST "${CONNECT_TEST_SRC}" "") ################################################################################################### -#-STRONGLY CONNECTED COMPONENTS tests --------------------------------------------------------------------- +# - STRONGLY CONNECTED COMPONENTS tests ---------------------------------------------------------- set(SCC_TEST_SRC "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" @@ -246,7 +248,7 @@ set(SCC_TEST_SRC ConfigureTest(SCC_TEST "${SCC_TEST_SRC}" "") ################################################################################################### -#-FIND_MATCHES tests --------------------------------------------------------------------- +# - FIND_MATCHES tests ---------------------------------------------------------------------------- set(FIND_MATCHES_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/db/find_matches_test.cu") @@ -262,6 +264,42 @@ set(EXPERIMENTAL_GRAPH_TEST_SRCS ConfigureTest(EXPERIMENTAL_GRAPH_TEST "${EXPERIMENTAL_GRAPH_TEST_SRCS}" "") +################################################################################################### +# - Experimental BFS tests ------------------------------------------------------------------------ + +set(EXPERIMENTAL_BFS_TEST_SRCS + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/bfs_test.cpp") + +ConfigureTest(EXPERIMENTAL_BFS_TEST "${EXPERIMENTAL_BFS_TEST_SRCS}" "") + +################################################################################################### +# - Experimental SSSP tests ----------------------------------------------------------------------- + +set(EXPERIMENTAL_SSSP_TEST_SRCS + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/sssp_test.cpp") + +ConfigureTest(EXPERIMENTAL_SSSP_TEST "${EXPERIMENTAL_SSSP_TEST_SRCS}" "") + +################################################################################################### +# - Experimental PAGERANK tests ------------------------------------------------------------------- + +set(EXPERIMENTAL_PAGERANK_TEST_SRCS + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/pagerank_test.cpp") + +ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}" "") + +################################################################################################### +# - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ + +set(EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/katz_centrality_test.cpp") + +ConfigureTest(EXPERIMENTAL_KATZ_CENTRALITY_TEST "${EXPERIMENTAL_KATZ_CENTRALITY_TEST_SRCS}" "") + ################################################################################################### ### enable testing ################################################################################ ################################################################################################### diff --git a/cpp/tests/experimental/bfs_test.cpp b/cpp/tests/experimental/bfs_test.cpp new file mode 100644 index 00000000000..2498ca4f3f5 --- /dev/null +++ b/cpp/tests/experimental/bfs_test.cpp @@ -0,0 +1,204 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +template +void bfs_reference(edge_t* offsets, + vertex_t* indices, + vertex_t* distances, + vertex_t* predecessors, + vertex_t num_vertices, + vertex_t source, + vertex_t depth_limit = std::numeric_limits::max()) +{ + vertex_t depth{0}; + + std::fill(distances, distances + num_vertices, std::numeric_limits::max()); + std::fill(predecessors, predecessors + num_vertices, cugraph::invalid_vertex_id::value); + + *(distances + source) = depth; + std::vector cur_frontier_rows{source}; + std::vector new_frontier_rows{}; + + while (cur_frontier_rows.size() > 0) { + for (auto const row : cur_frontier_rows) { + auto nbr_offset_first = *(offsets + row); + auto nbr_offset_last = *(offsets + row + 1); + for (auto nbr_offset = nbr_offset_first; nbr_offset != nbr_offset_last; ++nbr_offset) { + auto nbr = *(indices + nbr_offset); + if (*(distances + nbr) == std::numeric_limits::max()) { + *(distances + nbr) = depth + 1; + *(predecessors + nbr) = row; + new_frontier_rows.push_back(nbr); + } + } + } + std::swap(cur_frontier_rows, new_frontier_rows); + new_frontier_rows.clear(); + ++depth; + if (depth >= depth_limit) { break; } + } + + return; +} + +typedef struct BFS_Usecase_t { + std::string graph_file_full_path{}; + size_t source{false}; + + BFS_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} BFS_Usecase; + +class Tests_BFS : public ::testing::TestWithParam { + public: + Tests_BFS() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(BFS_Usecase const& configuration) + { + using weight_t = float; + + raft::handle_t handle{}; + + auto graph = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, false); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(configuration.source >= 0 && + configuration.source <= graph_view.get_number_of_vertices()) + << "Starting sources should be >= 0 and" + << " less than the number of vertices in the graph."; + + std::vector h_reference_distances(graph_view.get_number_of_vertices()); + std::vector h_reference_predecessors(graph_view.get_number_of_vertices()); + + bfs_reference(h_offsets.data(), + h_indices.data(), + h_reference_distances.data(), + h_reference_predecessors.data(), + graph_view.get_number_of_vertices(), + static_cast(configuration.source), + std::numeric_limits::max()); + + rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), + handle.get_stream()); + rmm::device_uvector d_predecessors(graph_view.get_number_of_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::bfs(handle, + graph_view, + d_distances.begin(), + d_predecessors.begin(), + static_cast(configuration.source), + false, + std::numeric_limits::max(), + false); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); + std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); + + raft::update_host( + h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_predecessors.data(), + d_predecessors.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(std::equal( + h_reference_distances.begin(), h_reference_distances.end(), h_cugraph_distances.begin())) + << "distances do not match with the reference values."; + + for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { + auto i = std::distance(h_cugraph_predecessors.begin(), it); + if (*it == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_reference_predecessors[i] == *it) + << "vertex reachability do not match with the reference."; + } else { + ASSERT_TRUE(h_reference_distances[*it] + 1 == h_reference_distances[i]) + << "distance to this vertex != distance to the predecessor vertex + 1."; + bool found{false}; + for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { + if (h_indices[j] == i) { + found = true; + break; + } + } + ASSERT_TRUE(found) << "no edge from the predecessor vertex to this vertex."; + } + } + } +}; + +// FIXME: add tests for type combinations +TEST_P(Tests_BFS, CheckInt32Int32) { run_current_test(GetParam()); } + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_BFS, + ::testing::Values(BFS_Usecase("test/datasets/karate.mtx", 0), + BFS_Usecase("test/datasets/polbooks.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 0), + BFS_Usecase("test/datasets/netscience.mtx", 100), + BFS_Usecase("test/datasets/wiki2003.mtx", 1000), + BFS_Usecase("test/datasets/wiki-Talk.mtx", 1000))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/katz_centrality_test.cpp b/cpp/tests/experimental/katz_centrality_test.cpp new file mode 100644 index 00000000000..0352637dcf0 --- /dev/null +++ b/cpp/tests/experimental/katz_centrality_test.cpp @@ -0,0 +1,224 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +template +void katz_centrality_reference(edge_t* offsets, + vertex_t* indices, + weight_t* weights, + result_t* betas, + result_t* katz_centralities, + vertex_t num_vertices, + result_t alpha, + result_t beta, // relevant only if betas == nullptr + result_t epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize) +{ + if (num_vertices == 0) { return; } + + if (!has_initial_guess) { + std::fill(katz_centralities, katz_centralities + num_vertices, result_t{0.0}); + } + + std::vector old_katz_centralities(num_vertices, result_t{0.0}); + size_t iter{0}; + while (true) { + std::copy(katz_centralities, katz_centralities + num_vertices, old_katz_centralities.begin()); + for (vertex_t i = 0; i < num_vertices; ++i) { + katz_centralities[i] = betas != nullptr ? betas[i] : beta; + for (auto j = *(offsets + i); j < *(offsets + i + 1); ++j) { + auto nbr = indices[j]; + auto w = weights != nullptr ? weights[j] : result_t{1.0}; + katz_centralities[i] += alpha * old_katz_centralities[nbr] * w; + } + } + + result_t diff_sum{0.0}; + for (vertex_t i = 0; i < num_vertices; ++i) { + diff_sum += fabs(katz_centralities[i] - old_katz_centralities[i]); + } + if (diff_sum < static_cast(num_vertices) * epsilon) { break; } + iter++; + ASSERT_TRUE(iter < max_iterations); + } + + if (normalize) { + auto l2_norm = std::sqrt(std::inner_product( + katz_centralities, katz_centralities + num_vertices, katz_centralities, result_t{0.0})); + std::transform( + katz_centralities, katz_centralities + num_vertices, katz_centralities, [l2_norm](auto& val) { + return val / l2_norm; + }); + } + + return; +} + +typedef struct KatzCentrality_Usecase_t { + std::string graph_file_full_path{}; + bool test_weighted{false}; + + KatzCentrality_Usecase_t(std::string const& graph_file_path, bool test_weighted) + : test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} KatzCentrality_Usecase; + +class Tests_KatzCentrality : public ::testing::TestWithParam { + public: + Tests_KatzCentrality() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(KatzCentrality_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + std::vector h_weights{}; + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + if (graph_view.is_weighted()) { + h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + } + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::vector h_reference_katz_centralities(graph_view.get_number_of_vertices()); + + std::vector tmps(h_offsets.size()); + std::adjacent_difference(h_offsets.begin(), h_offsets.end(), tmps.begin()); + auto max_it = std::max_element(tmps.begin(), tmps.end()); + + result_t const alpha = result_t{1.0} / static_cast(*max_it + 1); + result_t constexpr beta{1.0}; + result_t constexpr epsilon{1e-6}; + + katz_centrality_reference( + h_offsets.data(), + h_indices.data(), + h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + static_cast(nullptr), + h_reference_katz_centralities.data(), + graph_view.get_number_of_vertices(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), + false, + false); + + rmm::device_uvector d_katz_centralities(graph_view.get_number_of_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::katz_centrality(handle, + graph_view, + static_cast(nullptr), + d_katz_centralities.begin(), + alpha, + beta, + epsilon, + std::numeric_limits::max(), + false, + false, + false); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_katz_centralities(graph_view.get_number_of_vertices()); + + raft::update_host(h_cugraph_katz_centralities.data(), + d_katz_centralities.data(), + d_katz_centralities.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + + ASSERT_TRUE(std::equal(h_reference_katz_centralities.begin(), + h_reference_katz_centralities.end(), + h_cugraph_katz_centralities.begin(), + nearly_equal)) + << "Katz centrality values do not match with the reference values."; + } +}; + +// FIXME: add tests for type combinations +TEST_P(Tests_KatzCentrality, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P( + simple_test, + Tests_KatzCentrality, + ::testing::Values(KatzCentrality_Usecase("test/datasets/karate.mtx", false), + KatzCentrality_Usecase("test/datasets/karate.mtx", true), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", false), + KatzCentrality_Usecase("test/datasets/web-Google.mtx", true), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", false), + KatzCentrality_Usecase("test/datasets/ljournal-2008.mtx", true), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", false), + KatzCentrality_Usecase("test/datasets/webbase-1M.mtx", true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/pagerank_test.cpp b/cpp/tests/experimental/pagerank_test.cpp new file mode 100644 index 00000000000..3fe74e279ff --- /dev/null +++ b/cpp/tests/experimental/pagerank_test.cpp @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +template +void pagerank_reference(edge_t* offsets, + vertex_t* indices, + weight_t* weights, + vertex_t* personalization_vertices, + result_t* personalization_values, + result_t* pageranks, + vertex_t num_vertices, + vertex_t personalization_vector_size, + result_t alpha, + result_t epsilon, + size_t max_iterations, + bool has_initial_guess) +{ + if (num_vertices == 0) { return; } + + if (has_initial_guess) { + auto sum = std::accumulate(pageranks, pageranks + num_vertices, result_t{0.0}); + ASSERT_TRUE(sum > 0.0); + std::for_each(pageranks, pageranks + num_vertices, [sum](auto& val) { val /= sum; }); + } else { + std::for_each(pageranks, pageranks + num_vertices, [num_vertices](auto& val) { + val = result_t{1.0} / static_cast(num_vertices); + }); + } + + if (personalization_vertices != nullptr) { + auto sum = std::accumulate( + personalization_values, personalization_values + personalization_vector_size, result_t{0.0}); + ASSERT_TRUE(sum > 0.0); + std::for_each(personalization_values, + personalization_values + personalization_vector_size, + [sum](auto& val) { val /= sum; }); + } + + std::vector out_weight_sums(num_vertices, result_t{0.0}); + for (vertex_t i = 0; i < num_vertices; ++i) { + for (auto j = *(offsets + i); j < *(offsets + i + 1); ++j) { + auto nbr = indices[j]; + auto w = weights != nullptr ? weights[j] : 1.0; + out_weight_sums[nbr] += w; + } + } + + std::vector old_pageranks(num_vertices, result_t{0.0}); + size_t iter{0}; + while (true) { + std::copy(pageranks, pageranks + num_vertices, old_pageranks.begin()); + result_t dangling_sum{0.0}; + for (vertex_t i = 0; i < num_vertices; ++i) { + if (out_weight_sums[i] == result_t{0.0}) { dangling_sum += old_pageranks[i]; } + } + for (vertex_t i = 0; i < num_vertices; ++i) { + pageranks[i] = result_t{0.0}; + for (auto j = *(offsets + i); j < *(offsets + i + 1); ++j) { + auto nbr = indices[j]; + auto w = weights != nullptr ? weights[j] : result_t{1.0}; + pageranks[i] += alpha * old_pageranks[nbr] * (w / out_weight_sums[nbr]); + } + if (personalization_vertices == nullptr) { + pageranks[i] += (dangling_sum + (1.0 - alpha)) / static_cast(num_vertices); + } + } + if (personalization_vertices != nullptr) { + for (vertex_t i = 0; i < personalization_vector_size; ++i) { + auto v = personalization_vertices[i]; + pageranks[v] += (dangling_sum + (1.0 - alpha)) * personalization_values[i]; + } + } + result_t diff_sum{0.0}; + for (vertex_t i = 0; i < num_vertices; ++i) { + diff_sum += fabs(pageranks[i] - old_pageranks[i]); + } + if (diff_sum < static_cast(num_vertices) * epsilon) { break; } + iter++; + ASSERT_TRUE(iter < max_iterations); + } + + return; +} + +typedef struct PageRank_Usecase_t { + std::string graph_file_full_path{}; + bool test_weighted{false}; + + PageRank_Usecase_t(std::string const& graph_file_path, bool test_weighted) + : test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} PageRank_Usecase; + +class Tests_PageRank : public ::testing::TestWithParam { + public: + Tests_PageRank() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(PageRank_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + std::vector h_weights{}; + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + if (graph_view.is_weighted()) { + h_weights.assign(graph_view.get_number_of_edges(), weight_t{0.0}); + raft::update_host(h_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + } + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + std::vector h_reference_pageranks(graph_view.get_number_of_vertices()); + + result_t constexpr alpha{0.85}; + result_t constexpr epsilon{1e-6}; + + pagerank_reference(h_offsets.data(), + h_indices.data(), + h_weights.size() > 0 ? h_weights.data() : static_cast(nullptr), + static_cast(nullptr), + static_cast(nullptr), + h_reference_pageranks.data(), + graph_view.get_number_of_vertices(), + vertex_t{0}, + alpha, + epsilon, + std::numeric_limits::max(), + false); + + rmm::device_uvector d_pageranks(graph_view.get_number_of_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::pagerank(handle, + graph_view, + static_cast(nullptr), + static_cast(nullptr), + static_cast(nullptr), + vertex_t{0}, + d_pageranks.begin(), + alpha, + epsilon, + std::numeric_limits::max(), + false, + false); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_pageranks(graph_view.get_number_of_vertices()); + + raft::update_host( + h_cugraph_pageranks.data(), d_pageranks.data(), d_pageranks.size(), handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + + ASSERT_TRUE(std::equal(h_reference_pageranks.begin(), + h_reference_pageranks.end(), + h_cugraph_pageranks.begin(), + nearly_equal)) + << "PageRank values do not match with the reference values."; + } +}; + +// FIXME: add tests for type combinations +TEST_P(Tests_PageRank, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_PageRank, + ::testing::Values(PageRank_Usecase("test/datasets/karate.mtx", false), + PageRank_Usecase("test/datasets/karate.mtx", true), + PageRank_Usecase("test/datasets/web-Google.mtx", false), + PageRank_Usecase("test/datasets/web-Google.mtx", true), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", + false), + PageRank_Usecase("test/datasets/ljournal-2008.mtx", true), + PageRank_Usecase("test/datasets/webbase-1M.mtx", false), + PageRank_Usecase("test/datasets/webbase-1M.mtx", true))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/experimental/sssp_test.cpp b/cpp/tests/experimental/sssp_test.cpp new file mode 100644 index 00000000000..49eaca56f56 --- /dev/null +++ b/cpp/tests/experimental/sssp_test.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +// Dijkstra's algorithm +template +void sssp_reference(edge_t* offsets, + vertex_t* indices, + weight_t* weights, + weight_t* distances, + vertex_t* predecessors, + vertex_t num_vertices, + vertex_t source, + weight_t cutoff = std::numeric_limits::max()) +{ + using queue_iterm_t = std::tuple; + + std::fill(distances, distances + num_vertices, std::numeric_limits::max()); + std::fill(predecessors, predecessors + num_vertices, cugraph::invalid_vertex_id::value); + + *(distances + source) = weight_t{0.0}; + std::priority_queue, std::greater> + queue{}; + queue.push(std::make_tuple(weight_t{0.0}, source)); + + while (queue.size() > 0) { + weight_t distance{}; + vertex_t row{}; + std::tie(distance, row) = queue.top(); + queue.pop(); + if (distance > *(distances + row)) { continue; } + auto nbr_offsets = *(offsets + row); + auto nbr_offset_last = *(offsets + row + 1); + for (auto nbr_offset = nbr_offsets; nbr_offset != nbr_offset_last; ++nbr_offset) { + auto nbr = *(indices + nbr_offset); + auto new_distance = distance + *(weights + nbr_offset); + auto threshold = std::min(*(distances + nbr), cutoff); + if (new_distance < threshold) { + *(distances + nbr) = new_distance; + *(predecessors + nbr) = row; + queue.push(std::make_tuple(new_distance, nbr)); + } + } + } + + return; +} + +typedef struct SSSP_Usecase_t { + std::string graph_file_full_path{}; + size_t source{false}; + + SSSP_Usecase_t(std::string const& graph_file_path, size_t source) : source(source) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} SSSP_Usecase; + +class Tests_SSSP : public ::testing::TestWithParam { + public: + Tests_SSSP() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(SSSP_Usecase const& configuration) + { + raft::handle_t handle{}; + + auto graph = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, true); + auto graph_view = graph.view(); + + std::vector h_offsets(graph_view.get_number_of_vertices() + 1); + std::vector h_indices(graph_view.get_number_of_edges()); + std::vector h_weights(graph_view.get_number_of_edges()); + raft::update_host(h_offsets.data(), + graph_view.offsets(), + graph_view.get_number_of_vertices() + 1, + handle.get_stream()); + raft::update_host(h_indices.data(), + graph_view.indices(), + graph_view.get_number_of_edges(), + handle.get_stream()); + raft::update_host(h_weights.data(), + graph_view.weights(), + graph_view.get_number_of_edges(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + ASSERT_TRUE(configuration.source >= 0 && + configuration.source <= graph_view.get_number_of_vertices()) + << "Starting sources should be >= 0 and" + << " less than the number of vertices in the graph."; + + std::vector h_reference_distances(graph_view.get_number_of_vertices()); + std::vector h_reference_predecessors(graph_view.get_number_of_vertices()); + + sssp_reference(h_offsets.data(), + h_indices.data(), + h_weights.data(), + h_reference_distances.data(), + h_reference_predecessors.data(), + graph_view.get_number_of_vertices(), + static_cast(configuration.source)); + + rmm::device_uvector d_distances(graph_view.get_number_of_vertices(), + handle.get_stream()); + rmm::device_uvector d_predecessors(graph_view.get_number_of_vertices(), + handle.get_stream()); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + cugraph::experimental::sssp(handle, + graph_view, + d_distances.begin(), + d_predecessors.begin(), + static_cast(configuration.source), + std::numeric_limits::max(), + false); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::vector h_cugraph_distances(graph_view.get_number_of_vertices()); + std::vector h_cugraph_predecessors(graph_view.get_number_of_vertices()); + + raft::update_host( + h_cugraph_distances.data(), d_distances.data(), d_distances.size(), handle.get_stream()); + raft::update_host(h_cugraph_predecessors.data(), + d_predecessors.data(), + d_predecessors.size(), + handle.get_stream()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + + auto max_weight_element = std::max_element(h_weights.begin(), h_weights.end()); + auto epsilon = *max_weight_element * weight_t{1e-6}; + auto nearly_equal = [epsilon](auto lhs, auto rhs) { return std::fabs(lhs - rhs) < epsilon; }; + + ASSERT_TRUE(std::equal(h_reference_distances.begin(), + h_reference_distances.end(), + h_cugraph_distances.begin(), + nearly_equal)) + << "distances do not match with the reference values."; + + for (auto it = h_cugraph_predecessors.begin(); it != h_cugraph_predecessors.end(); ++it) { + auto i = std::distance(h_cugraph_predecessors.begin(), it); + if (*it == cugraph::invalid_vertex_id::value) { + ASSERT_TRUE(h_reference_predecessors[i] == *it) + << "vertex reachability do not match with the reference."; + } else { + auto pred_distance = h_reference_distances[*it]; + bool found{false}; + for (auto j = h_offsets[*it]; j < h_offsets[*it + 1]; ++j) { + if (h_indices[j] == i) { + if (nearly_equal(pred_distance + h_weights[j], h_reference_distances[i])) { + found = true; + break; + } + } + } + ASSERT_TRUE(found) + << "no edge from the predecessor vertex to this vertex with the matching weight."; + } + } + } +}; + +// FIXME: add tests for type combinations +TEST_P(Tests_SSSP, CheckInt32Int32Float) { run_current_test(GetParam()); } + +#if 0 +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_SSSP, + ::testing::Values(SSSP_Usecase("test/datasets/karate.mtx", 0))); +#else +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_SSSP, + ::testing::Values(SSSP_Usecase("test/datasets/karate.mtx", 0), + SSSP_Usecase("test/datasets/dblp.mtx", 0), + SSSP_Usecase("test/datasets/wiki2003.mtx", 1000))); +#endif + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index 65703e9541d..c87c63c56fb 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -15,9 +15,9 @@ */ #pragma once -#include - +#include #include +#include #include @@ -25,6 +25,9 @@ extern "C" { #include "mmio.h" } +#include + +#include #include #include #include @@ -376,5 +379,44 @@ edgelist_from_market_matrix_file_t read_edgelist_from_matrix return std::move(ret); } +template +cugraph::experimental::graph_t +read_graph_from_matrix_market_file(raft::handle_t const& handle, + std::string const& graph_file_full_path, + bool test_weighted) +{ + auto mm_graph = + read_edgelist_from_matrix_market_file(graph_file_full_path); + edge_t number_of_edges = static_cast(mm_graph.h_rows.size()); + + rmm::device_uvector d_edgelist_rows(number_of_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_cols(number_of_edges, handle.get_stream()); + rmm::device_uvector d_edgelist_weights(test_weighted ? number_of_edges : 0, + handle.get_stream()); + + raft::update_device( + d_edgelist_rows.data(), mm_graph.h_rows.data(), number_of_edges, handle.get_stream()); + raft::update_device( + d_edgelist_cols.data(), mm_graph.h_cols.data(), number_of_edges, handle.get_stream()); + if (test_weighted) { + raft::update_device( + d_edgelist_weights.data(), mm_graph.h_weights.data(), number_of_edges, handle.get_stream()); + } + + cugraph::experimental::edgelist_t edgelist{ + d_edgelist_rows.data(), + d_edgelist_cols.data(), + test_weighted ? d_edgelist_weights.data() : nullptr, + number_of_edges}; + + return cugraph::experimental::graph_t( + handle, + edgelist, + mm_graph.number_of_vertices, + cugraph::experimental::graph_properties_t{mm_graph.is_symmetric, false}, + false, + true); +} + } // namespace test } // namespace cugraph From 6e36fa104f1dbacb0ede4fe994c36602982435d8 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 25 Sep 2020 14:00:31 -0400 Subject: [PATCH 60/74] [REVIEW] pass size_t* instead of int_t[] for raft allgatherv's input parameter displs (#1158) * accomodate the change of raft allgahterv's input parameter displs type from int[] to size_t[] * update RAFT tag --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 2 +- cpp/src/traversal/mg/common_utils.cuh | 2 +- cpp/src/utilities/spmv_1D.cu | 4 +++- 4 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e3c135f5fc..5c67defed67 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ - PR #1131 Show style checker errors with set +e - PR #1150 Update RAFT git tag - PR #1155 Remove RMM library dependency and CXX11 ABI handling +- PR #1158 Pass size_t* & size_t* instead of size_t[] & int[] for raft allgatherv's input parameters recvcounts & displs # cuGraph 0.15.0 (26 Aug 2020) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3315f2c86e8..41cf82dfb39 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -250,7 +250,7 @@ else(DEFINED ENV{RAFT_PATH}) ExternalProject_Add(raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 516106e3b515b25c863776fcc51fb12df6c0a186 + GIT_TAG 53c1e2dde4045f386f9cc4bb7d3dc99d5690b886 PREFIX ${RAFT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" diff --git a/cpp/src/traversal/mg/common_utils.cuh b/cpp/src/traversal/mg/common_utils.cuh index 6199730c28f..2cda827b471 100644 --- a/cpp/src/traversal/mg/common_utils.cuh +++ b/cpp/src/traversal/mg/common_utils.cuh @@ -184,7 +184,7 @@ return_t collect_vectors(raft::handle_t const &handle, // h_buffer_offsets has to be int because raft allgatherv expects // int array for displacement vector. This should be changed in // raft so that the displacement is templated - thrust::host_vector h_buffer_offsets(h_buffer_len.size()); + thrust::host_vector h_buffer_offsets(h_buffer_len.size()); thrust::exclusive_scan( thrust::host, h_buffer_len.begin(), h_buffer_len.end(), h_buffer_offsets.begin()); diff --git a/cpp/src/utilities/spmv_1D.cu b/cpp/src/utilities/spmv_1D.cu index 4aec86919c9..8a7378e69d3 100644 --- a/cpp/src/utilities/spmv_1D.cu +++ b/cpp/src/utilities/spmv_1D.cu @@ -75,8 +75,10 @@ void MGcsrmv::run(weight_t *x) auto const &comm{handle_.get_comms()}; // local std::vector recvbuf(comm.get_size()); + std::vector displs(comm.get_size()); std::copy(local_vertices_, local_vertices_ + comm.get_size(), recvbuf.begin()); - comm.allgatherv(y_loc_.data().get(), x, recvbuf.data(), part_off_, stream); + std::copy(part_off_, part_off_ + comm.get_size(), displs.begin()); + comm.allgatherv(y_loc_.data().get(), x, recvbuf.data(), displs.data(), stream); } template class MGcsrmv; From b0455a6928a99a6daee0a8e430309dffc3b9326d Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Fri, 25 Sep 2020 13:02:30 -0500 Subject: [PATCH 61/74] [REVIEW] python 2D shuffling (#1133) * add shuffle for 2D partitioning * Update CHANGELOG.md * Update CHANGELOG.md * shuffle updates as per review * updated changelog * add transposed Co-authored-by: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Co-authored-by: BradReesWork --- CHANGELOG.md | 1 + python/cugraph/structure/__init__.py | 1 + python/cugraph/structure/graph.py | 21 +++++++ python/cugraph/structure/shuffle.py | 93 ++++++++++++++++++++++++++++ 4 files changed, 116 insertions(+) create mode 100644 python/cugraph/structure/shuffle.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 5c67defed67..e829f128182 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree - PR 1115 Replace deprecated rmm::mr::get_default_resource with rmm::mr::get_current_device_resource +- PR #1133 added python 2D shuffling - PR 1129 Refactored test to use common dataset and added additional doc pages - PR 1135 SG Updates to Louvain et. al. - PR 1132 Upgrade Thrust to latest commit diff --git a/python/cugraph/structure/__init__.py b/python/cugraph/structure/__init__.py index b43f4f3ebfa..3a3515eef67 100644 --- a/python/cugraph/structure/__init__.py +++ b/python/cugraph/structure/__init__.py @@ -16,3 +16,4 @@ from cugraph.structure.symmetrize import symmetrize, symmetrize_df from cugraph.structure.convert_matrix import from_cudf_edgelist from cugraph.structure.hypergraph import hypergraph +from cugraph.structure.shuffle import shuffle diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index a8bc4216f0e..d6e1689e515 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -347,6 +347,17 @@ def from_cudf_edgelist( if self.edgelist is not None or self.adjlist is not None: raise Exception("Graph already has values") + s_col = source + d_col = destination + if not isinstance(s_col, list): + s_col = [s_col] + if not isinstance(d_col, list): + d_col = [d_col] + if not (set(s_col).issubset(set(input_df.columns)) and + set(d_col).issubset(set(input_df.columns))): + raise Exception('source column names and/or destination column \ +names not found in input. Recheck the source and destination parameters') + # Consolidation if isinstance(input_df, cudf.DataFrame): if len(input_df[source]) > 2147483100: @@ -445,6 +456,16 @@ def from_dask_cudf_edgelist(self, input_ddf, source='source', if type(self) is Graph: raise Exception('Undirected distributed graph not supported') + s_col = source + d_col = destination + if not isinstance(s_col, list): + s_col = [s_col] + if not isinstance(d_col, list): + d_col = [d_col] + if not (set(s_col).issubset(set(input_ddf.columns)) and + set(d_col).issubset(set(input_ddf.columns))): + raise Exception('source column names and/or destination column \ +names not found in input. Recheck the source and destination parameters') # # Keep all of the original parameters so we can lazily # evaluate this function diff --git a/python/cugraph/structure/shuffle.py b/python/cugraph/structure/shuffle.py new file mode 100644 index 00000000000..88791dd0f71 --- /dev/null +++ b/python/cugraph/structure/shuffle.py @@ -0,0 +1,93 @@ +import math +from dask.dataframe.shuffle import rearrange_by_column +import cudf + + +def get_n_workers(): + from dask.distributed import default_client + client = default_client() + return len(client.scheduler_info()['workers']) + + +def get_2D_div(ngpus): + pcols = int(math.sqrt(ngpus)) + while ngpus % pcols != 0: + pcols = pcols - 1 + return int(ngpus/pcols), pcols + + +def _set_partitions_pre(df, vertex_row_partitions, vertex_col_partitions, + prows, pcols, transposed): + if transposed: + r = df['dst'] + c = df['src'] + else: + r = df['src'] + c = df['dst'] + r_div = vertex_row_partitions.searchsorted(r, side='right')-1 + c_div = vertex_col_partitions.searchsorted(c, side='right')-1 + partitions = r_div % prows + c_div * prows + return partitions + + +def shuffle(dg, transposed=False, prows=None, pcols=None): + """ + Shuffles the renumbered input distributed graph edgelist into ngpu + partitions. The number of processes/gpus P = prows*pcols. The 2D + partitioning divides the matrix into P*pcols rectangular partitions + as per vertex partitioning performed in renumbering, and then shuffles + these partitions into P gpus. + """ + + ddf = dg.edgelist.edgelist_df + ngpus = get_n_workers() + if prows is None and pcols is None: + prows, pcols = get_2D_div(ngpus) + else: + if prows is not None and pcols is not None: + if ngpus != prows*pcols: + raise Exception('prows*pcols should be equal to the\ + number of processes') + elif prows is not None: + if ngpus % prows != 0: + raise Exception('prows must be a factor of the number\ + of processes') + pcols = int(ngpus/prows) + elif pcols is not None: + if ngpus % pcols != 0: + raise Exception('pcols must be a factor of the number\ + of processes') + prows = int(ngpus/pcols) + + renumber_vertex_count = dg.renumber_map.implementation.\ + ddf.map_partitions(len).compute() + renumber_vertex_cumsum = renumber_vertex_count.cumsum() + src_dtype = ddf['src'].dtype + dst_dtype = ddf['dst'].dtype + + vertex_row_partitions = cudf.Series([0], dtype=src_dtype) + vertex_row_partitions = vertex_row_partitions.append(cudf.Series( + renumber_vertex_cumsum, dtype=src_dtype)) + num_verts = vertex_row_partitions.iloc[-1] + vertex_col_partitions = [] + for i in range(pcols + 1): + vertex_col_partitions.append(vertex_row_partitions.iloc[i*prows]) + vertex_col_partitions = cudf.Series(vertex_col_partitions, dtype=dst_dtype) + + meta = ddf._meta._constructor_sliced([0]) + partitions = ddf.map_partitions( + _set_partitions_pre, + vertex_row_partitions=vertex_row_partitions, + vertex_col_partitions=vertex_col_partitions, prows=prows, + pcols=pcols, transposed=transposed, meta=meta) + ddf2 = ddf.assign(_partitions=partitions) + ddf3 = rearrange_by_column( + ddf2, + "_partitions", + max_branch=None, + npartitions=ngpus, + shuffle="tasks", + ignore_index=True, + ).drop(columns=["_partitions"]) + + return ddf3, num_verts, vertex_row_partitions From 1becb6fcef773aecc2243252a24ed26589e41e71 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Tue, 29 Sep 2020 12:38:25 -0500 Subject: [PATCH 62/74] [REVIEW] Disabled MG tests on single GPU (#1168) * disabled MG tests on single GPU * changelog * style --- CHANGELOG.md | 2 +- python/cugraph/dask/common/mg_utils.py | 9 + .../test_mg_batch_betweenness_centrality.py | 77 ++++--- ...st_mg_batch_edge_betweenness_centrality.py | 71 +++--- python/cugraph/tests/dask/test_mg_bfs.py | 37 ++-- python/cugraph/tests/dask/test_mg_comms.py | 70 +++--- python/cugraph/tests/dask/test_mg_degree.py | 40 ++-- python/cugraph/tests/dask/test_mg_louvain.py | 30 ++- python/cugraph/tests/dask/test_mg_pagerank.py | 71 +++--- python/cugraph/tests/dask/test_mg_renumber.py | 58 +++-- .../cugraph/tests/dask/test_mg_replication.py | 204 +++++++++++------- python/cugraph/tests/dask/test_mg_utility.py | 63 +++--- 12 files changed, 455 insertions(+), 277 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e829f128182..b4ca31277d2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -27,7 +27,7 @@ - PR #1150 Update RAFT git tag - PR #1155 Remove RMM library dependency and CXX11 ABI handling - PR #1158 Pass size_t* & size_t* instead of size_t[] & int[] for raft allgatherv's input parameters recvcounts & displs - +- PR #1168 Disabled MG tests on single GPU # cuGraph 0.15.0 (26 Aug 2020) diff --git a/python/cugraph/dask/common/mg_utils.py b/python/cugraph/dask/common/mg_utils.py index 198b0756c00..7556afb122a 100644 --- a/python/cugraph/dask/common/mg_utils.py +++ b/python/cugraph/dask/common/mg_utils.py @@ -11,6 +11,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from cugraph.raft.dask.common.utils import default_client +import numba.cuda # FIXME: We currently look for the default client from dask, as such is the @@ -32,3 +33,11 @@ def prepare_worker_to_parts(data, client=None): if worker not in data.worker_to_parts: data.worker_to_parts[worker] = [placeholder] return data + + +def is_single_gpu(): + ngpus = len(numba.cuda.gpus) + if ngpus > 1: + return False + else: + return True diff --git a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py index ccb0c94b020..53942a277c2 100644 --- a/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_betweenness_centrality.py @@ -14,8 +14,8 @@ import pytest import numpy as np -from cugraph.tests.dask.mg_context import (MGContext, - skip_if_not_enough_devices) +from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices +from cugraph.dask.common.mg_utils import is_single_gpu # Get parameters from standard betwenness_centrality_test from cugraph.tests.test_betweenness_centrality import ( @@ -30,48 +30,59 @@ from cugraph.tests.test_betweenness_centrality import ( prepare_test, calc_betweenness_centrality, - compare_scores + compare_scores, ) # ============================================================================= # Parameters # ============================================================================= -DATASETS = ['../datasets/karate.csv'] +DATASETS = ["../datasets/karate.csv"] MG_DEVICE_COUNT_OPTIONS = [1, 2, 3, 4] RESULT_DTYPE_OPTIONS = [np.float64] # FIXME: The following creates and destroys Comms at every call making the # testsuite quite slow -@pytest.mark.parametrize('graph_file', DATASETS) -@pytest.mark.parametrize('directed', DIRECTED_GRAPH_OPTIONS) -@pytest.mark.parametrize('subset_size', SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize('normalized', NORMALIZED_OPTIONS) -@pytest.mark.parametrize('weight', [None]) -@pytest.mark.parametrize('endpoints', ENDPOINTS_OPTIONS) -@pytest.mark.parametrize('subset_seed', SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize('result_dtype', RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize('mg_device_count', MG_DEVICE_COUNT_OPTIONS) -def test_mg_betweenness_centrality(graph_file, - directed, - subset_size, - normalized, - weight, - endpoints, - subset_seed, - result_dtype, - mg_device_count): +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("endpoints", ENDPOINTS_OPTIONS) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) +def test_mg_betweenness_centrality( + graph_file, + directed, + subset_size, + normalized, + weight, + endpoints, + subset_seed, + result_dtype, + mg_device_count, +): prepare_test() skip_if_not_enough_devices(mg_device_count) with MGContext(mg_device_count): - sorted_df = calc_betweenness_centrality(graph_file, - directed=directed, - normalized=normalized, - k=subset_size, - weight=weight, - endpoints=endpoints, - seed=subset_seed, - result_dtype=result_dtype, - multi_gpu_batch=True) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc", - epsilon=DEFAULT_EPSILON) + sorted_df = calc_betweenness_centrality( + graph_file, + directed=directed, + normalized=normalized, + k=subset_size, + weight=weight, + endpoints=endpoints, + seed=subset_seed, + result_dtype=result_dtype, + multi_gpu_batch=True, + ) + compare_scores( + sorted_df, + first_key="cu_bc", + second_key="ref_bc", + epsilon=DEFAULT_EPSILON, + ) diff --git a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py index 01023839d06..7778f7bf421 100644 --- a/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py +++ b/python/cugraph/tests/dask/test_mg_batch_edge_betweenness_centrality.py @@ -13,10 +13,10 @@ import pytest import numpy as np +from cugraph.dask.common.mg_utils import is_single_gpu -from cugraph.tests.dask.mg_context import (MGContext, - skip_if_not_enough_devices) +from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices # Get parameters from standard betwenness_centrality_test from cugraph.tests.test_edge_betweenness_centrality import ( @@ -30,43 +30,54 @@ from cugraph.tests.test_edge_betweenness_centrality import ( prepare_test, calc_edge_betweenness_centrality, - compare_scores + compare_scores, ) # ============================================================================= # Parameters # ============================================================================= -DATASETS = ['../datasets/karate.csv'] +DATASETS = ["../datasets/karate.csv"] MG_DEVICE_COUNT_OPTIONS = [1, 2, 4] RESULT_DTYPE_OPTIONS = [np.float64] -@pytest.mark.parametrize('graph_file', DATASETS) -@pytest.mark.parametrize('directed', DIRECTED_GRAPH_OPTIONS) -@pytest.mark.parametrize('subset_size', SUBSET_SIZE_OPTIONS) -@pytest.mark.parametrize('normalized', NORMALIZED_OPTIONS) -@pytest.mark.parametrize('weight', [None]) -@pytest.mark.parametrize('subset_seed', SUBSET_SEED_OPTIONS) -@pytest.mark.parametrize('result_dtype', RESULT_DTYPE_OPTIONS) -@pytest.mark.parametrize('mg_device_count', MG_DEVICE_COUNT_OPTIONS) -def test_mg_edge_betweenness_centrality(graph_file, - directed, - subset_size, - normalized, - weight, - subset_seed, - result_dtype, - mg_device_count): +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +@pytest.mark.parametrize("graph_file", DATASETS) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("subset_size", SUBSET_SIZE_OPTIONS) +@pytest.mark.parametrize("normalized", NORMALIZED_OPTIONS) +@pytest.mark.parametrize("weight", [None]) +@pytest.mark.parametrize("subset_seed", SUBSET_SEED_OPTIONS) +@pytest.mark.parametrize("result_dtype", RESULT_DTYPE_OPTIONS) +@pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) +def test_mg_edge_betweenness_centrality( + graph_file, + directed, + subset_size, + normalized, + weight, + subset_seed, + result_dtype, + mg_device_count, +): prepare_test() skip_if_not_enough_devices(mg_device_count) with MGContext(mg_device_count): - sorted_df = calc_edge_betweenness_centrality(graph_file, - directed=directed, - normalized=normalized, - k=subset_size, - weight=weight, - seed=subset_seed, - result_dtype=result_dtype, - multi_gpu_batch=True) - compare_scores(sorted_df, first_key="cu_bc", second_key="ref_bc", - epsilon=DEFAULT_EPSILON) + sorted_df = calc_edge_betweenness_centrality( + graph_file, + directed=directed, + normalized=normalized, + k=subset_size, + weight=weight, + seed=subset_seed, + result_dtype=result_dtype, + multi_gpu_batch=True, + ) + compare_scores( + sorted_df, + first_key="cu_bc", + second_key="ref_bc", + epsilon=DEFAULT_EPSILON, + ) diff --git a/python/cugraph/tests/dask/test_mg_bfs.py b/python/cugraph/tests/dask/test_mg_bfs.py index a22f280e9b2..94bed827fd0 100644 --- a/python/cugraph/tests/dask/test_mg_bfs.py +++ b/python/cugraph/tests/dask/test_mg_bfs.py @@ -20,6 +20,7 @@ import dask_cudf import cudf from dask_cuda import LocalCUDACluster +from cugraph.dask.common.mg_utils import is_single_gpu @pytest.fixture @@ -35,39 +36,49 @@ def client_connection(): cluster.close() +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) def test_dask_bfs(client_connection): gc.collect() input_data_path = r"../datasets/netscience.csv" chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) - df = cudf.read_csv(input_data_path, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) g = cugraph.DiGraph() - g.from_cudf_edgelist(df, 'src', 'dst', renumber=True) + g.from_cudf_edgelist(df, "src", "dst", renumber=True) dg = cugraph.DiGraph() - dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') + dg.from_dask_cudf_edgelist(ddf, "src", "dst") expected_dist = cugraph.bfs(g, 0) result_dist = dcg.bfs(dg, 0, True) compare_dist = expected_dist.merge( - result_dist, on="vertex", suffixes=['_local', '_dask'] + result_dist, on="vertex", suffixes=["_local", "_dask"] ) err = 0 for i in range(len(compare_dist)): - if (compare_dist['distance_local'].iloc[i] != - compare_dist['distance_dask'].iloc[i]): + if ( + compare_dist["distance_local"].iloc[i] + != compare_dist["distance_dask"].iloc[i] + ): err = err + 1 assert err == 0 diff --git a/python/cugraph/tests/dask/test_mg_comms.py b/python/cugraph/tests/dask/test_mg_comms.py index 214dc76b9be..cd94f945f93 100644 --- a/python/cugraph/tests/dask/test_mg_comms.py +++ b/python/cugraph/tests/dask/test_mg_comms.py @@ -20,6 +20,7 @@ import dask_cudf import cudf from dask_cuda import LocalCUDACluster +from cugraph.dask.common.mg_utils import is_single_gpu @pytest.fixture @@ -35,6 +36,9 @@ def client_connection(): cluster.close() +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) def test_dask_pagerank(client_connection): gc.collect() @@ -47,43 +51,53 @@ def test_dask_pagerank(client_connection): input_data_path2 = r"../datasets/dolphins.csv" chunksize2 = dcg.get_chunksize(input_data_path2) - ddf1 = dask_cudf.read_csv(input_data_path1, chunksize=chunksize1, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + ddf1 = dask_cudf.read_csv( + input_data_path1, + chunksize=chunksize1, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) dg1 = cugraph.DiGraph() - dg1.from_dask_cudf_edgelist(ddf1, 'src', 'dst') + dg1.from_dask_cudf_edgelist(ddf1, "src", "dst") result_pr1 = dcg.pagerank(dg1) - ddf2 = dask_cudf.read_csv(input_data_path2, chunksize=chunksize2, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + ddf2 = dask_cudf.read_csv( + input_data_path2, + chunksize=chunksize2, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) dg2 = cugraph.DiGraph() - dg2.from_dask_cudf_edgelist(ddf2, 'src', 'dst') + dg2.from_dask_cudf_edgelist(ddf2, "src", "dst") result_pr2 = dcg.pagerank(dg2) # Calculate single GPU pagerank for verification of results - df1 = cudf.read_csv(input_data_path1, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + df1 = cudf.read_csv( + input_data_path1, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) g1 = cugraph.DiGraph() - g1.from_cudf_edgelist(df1, 'src', 'dst') + g1.from_cudf_edgelist(df1, "src", "dst") expected_pr1 = cugraph.pagerank(g1) - df2 = cudf.read_csv(input_data_path2, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + df2 = cudf.read_csv( + input_data_path2, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) g2 = cugraph.DiGraph() - g2.from_cudf_edgelist(df2, 'src', 'dst') + g2.from_cudf_edgelist(df2, "src", "dst") expected_pr2 = cugraph.pagerank(g2) # Compare and verify pagerank results @@ -93,14 +107,16 @@ def test_dask_pagerank(client_connection): tol = 1.0e-05 compare_pr1 = expected_pr1.merge( - result_pr1, on="vertex", suffixes=['_local', '_dask'] + result_pr1, on="vertex", suffixes=["_local", "_dask"] ) assert len(expected_pr1) == len(result_pr1) for i in range(len(compare_pr1)): - diff = abs(compare_pr1['pagerank_local'].iloc[i] - - compare_pr1['pagerank_dask'].iloc[i]) + diff = abs( + compare_pr1["pagerank_local"].iloc[i] + - compare_pr1["pagerank_dask"].iloc[i] + ) if diff > tol * 1.1: err1 = err1 + 1 print("Mismatches in ", input_data_path1, ": ", err1) @@ -108,12 +124,14 @@ def test_dask_pagerank(client_connection): assert len(expected_pr2) == len(result_pr2) compare_pr2 = expected_pr2.merge( - result_pr2, on="vertex", suffixes=['_local', '_dask'] + result_pr2, on="vertex", suffixes=["_local", "_dask"] ) for i in range(len(compare_pr2)): - diff = abs(compare_pr2['pagerank_local'].iloc[i] - - compare_pr2['pagerank_dask'].iloc[i]) + diff = abs( + compare_pr2["pagerank_local"].iloc[i] + - compare_pr2["pagerank_dask"].iloc[i] + ) if diff > tol * 1.1: err2 = err2 + 1 print("Mismatches in ", input_data_path2, ": ", err2) diff --git a/python/cugraph/tests/dask/test_mg_degree.py b/python/cugraph/tests/dask/test_mg_degree.py index f7e206b8e75..a903f69d05a 100644 --- a/python/cugraph/tests/dask/test_mg_degree.py +++ b/python/cugraph/tests/dask/test_mg_degree.py @@ -18,6 +18,7 @@ import cugraph.comms as Comms import cugraph import dask_cudf +from cugraph.dask.common.mg_utils import is_single_gpu # Move to conftest from dask_cuda import LocalCUDACluster @@ -36,6 +37,9 @@ def client_connection(): cluster.close() +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) def test_dask_mg_degree(client_connection): gc.collect() @@ -43,23 +47,31 @@ def test_dask_mg_degree(client_connection): chunksize = cugraph.dask.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) - - df = cudf.read_csv(input_data_path, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) dg = cugraph.DiGraph() - dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') + dg.from_dask_cudf_edgelist(ddf, "src", "dst") g = cugraph.DiGraph() - g.from_cudf_edgelist(df, 'src', 'dst') + g.from_cudf_edgelist(df, "src", "dst") - merge_df = dg.in_degree().merge( - g.in_degree(), on="vertex", suffixes=['_dg', '_g']).compute() + merge_df = ( + dg.in_degree() + .merge(g.in_degree(), on="vertex", suffixes=["_dg", "_g"]) + .compute() + ) - assert merge_df['degree_dg'].equals(merge_df['degree_g']) + assert merge_df["degree_dg"].equals(merge_df["degree_g"]) diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index 23210596df9..b4655b02a8c 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -20,16 +20,20 @@ import dask_cudf from dask_cuda import LocalCUDACluster from cugraph.tests import utils +from cugraph.dask.common.mg_utils import is_single_gpu try: from rapids_pytest_benchmark import setFixtureParamNames except ImportError: - print("\n\nWARNING: rapids_pytest_benchmark is not installed, " - "falling back to pytest_benchmark fixtures.\n") + print( + "\n\nWARNING: rapids_pytest_benchmark is not installed, " + "falling back to pytest_benchmark fixtures.\n" + ) # if rapids_pytest_benchmark is not available, just perfrom time-only # benchmarking and replace the util functions with nops import pytest_benchmark + gpubenchmark = pytest_benchmark.plugin.benchmark def setFixtureParamNames(*args, **kwargs): @@ -53,8 +57,10 @@ def client_connection(): cluster.close() -@pytest.fixture(scope="module", - params=utils.DATASETS_UNDIRECTED) +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +@pytest.fixture(scope="module", params=utils.DATASETS_UNDIRECTED) def daskGraphFromDataset(request, client_connection): """ Returns a new dask dataframe created from the dataset file param. @@ -65,18 +71,24 @@ def daskGraphFromDataset(request, client_connection): dataset = request.param chunksize = dcg.get_chunksize(dataset) - ddf = dask_cudf.read_csv(dataset, chunksize=chunksize, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + ddf = dask_cudf.read_csv( + dataset, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) dg = cugraph.DiGraph() - dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') + dg.from_dask_cudf_edgelist(ddf, "src", "dst") return dg ############################################################################### # Tests +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) def test_mg_louvain_with_edgevals(daskGraphFromDataset): # FIXME: daskGraphFromDataset returns a DiGraph, which Louvain is currently # accepting. In the future, an MNMG symmeterize will need to be called to diff --git a/python/cugraph/tests/dask/test_mg_pagerank.py b/python/cugraph/tests/dask/test_mg_pagerank.py index aad164a45c5..a2340e139d1 100644 --- a/python/cugraph/tests/dask/test_mg_pagerank.py +++ b/python/cugraph/tests/dask/test_mg_pagerank.py @@ -20,6 +20,7 @@ import dask_cudf import cudf from dask_cuda import LocalCUDACluster +from cugraph.dask.common.mg_utils import is_single_gpu # The function selects personalization_perc% of accessible vertices in graph M # and randomly assigns them personalization values @@ -30,19 +31,20 @@ def personalize(v, personalization_perc): if personalization_perc != 0: personalization = {} nnz_vtx = np.arange(0, v) - personalization_count = int((nnz_vtx.size * - personalization_perc)/100.0) - nnz_vtx = np.random.choice(nnz_vtx, - min(nnz_vtx.size, personalization_count), - replace=False) + personalization_count = int( + (nnz_vtx.size * personalization_perc) / 100.0 + ) + nnz_vtx = np.random.choice( + nnz_vtx, min(nnz_vtx.size, personalization_count), replace=False + ) nnz_val = np.random.random(nnz_vtx.size) - nnz_val = nnz_val/sum(nnz_val) + nnz_val = nnz_val / sum(nnz_val) for vtx, val in zip(nnz_vtx, nnz_val): personalization[vtx] = val - k = np.fromiter(personalization.keys(), dtype='int32') - v = np.fromiter(personalization.values(), dtype='float32') - cu_personalization = cudf.DataFrame({'vertex': k, 'values': v}) + k = np.fromiter(personalization.keys(), dtype="int32") + v = np.fromiter(personalization.values(), dtype="float32") + cu_personalization = cudf.DataFrame({"vertex": k, "values": v}) return cu_personalization @@ -63,39 +65,48 @@ def client_connection(): cluster.close() -@pytest.mark.parametrize('personalization_perc', PERSONALIZATION_PERC) +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +@pytest.mark.parametrize("personalization_perc", PERSONALIZATION_PERC) def test_dask_pagerank(client_connection, personalization_perc): gc.collect() input_data_path = r"../datasets/karate.csv" chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) - df = cudf.read_csv(input_data_path, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) g = cugraph.DiGraph() - g.from_cudf_edgelist(df, 'src', 'dst') + g.from_cudf_edgelist(df, "src", "dst") dg = cugraph.DiGraph() - dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') + dg.from_dask_cudf_edgelist(ddf, "src", "dst") # Pre compute local data and personalize personalization = None if personalization_perc != 0: - dg.compute_local_data(by='dst') - personalization = personalize(dg.number_of_vertices(), - personalization_perc) + dg.compute_local_data(by="dst") + personalization = personalize( + dg.number_of_vertices(), personalization_perc + ) - expected_pr = cugraph.pagerank(g, - personalization=personalization, - tol=1e-6) + expected_pr = cugraph.pagerank( + g, personalization=personalization, tol=1e-6 + ) result_pr = dcg.pagerank(dg, personalization=personalization, tol=1e-6) err = 0 @@ -104,12 +115,14 @@ def test_dask_pagerank(client_connection, personalization_perc): assert len(expected_pr) == len(result_pr) compare_pr = expected_pr.merge( - result_pr, on="vertex", suffixes=['_local', '_dask'] + result_pr, on="vertex", suffixes=["_local", "_dask"] ) for i in range(len(compare_pr)): - diff = abs(compare_pr['pagerank_local'].iloc[i] - - compare_pr['pagerank_dask'].iloc[i]) + diff = abs( + compare_pr["pagerank_local"].iloc[i] + - compare_pr["pagerank_dask"].iloc[i] + ) if diff > tol * 1.1: err = err + 1 assert err == 0 diff --git a/python/cugraph/tests/dask/test_mg_renumber.py b/python/cugraph/tests/dask/test_mg_renumber.py index ceeeeb77a5a..b981a49a0de 100644 --- a/python/cugraph/tests/dask/test_mg_renumber.py +++ b/python/cugraph/tests/dask/test_mg_renumber.py @@ -29,6 +29,7 @@ from dask_cuda import LocalCUDACluster from cugraph.tests import utils from cugraph.structure.number_map import NumberMap +from cugraph.dask.common.mg_utils import is_single_gpu @pytest.fixture @@ -45,6 +46,9 @@ def client_connection(): # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) def test_mg_renumber(graph_file, client_connection): gc.collect() @@ -85,6 +89,9 @@ def test_mg_renumber(graph_file, client_connection): # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) def test_mg_renumber2(graph_file, client_connection): gc.collect() @@ -109,9 +116,9 @@ def test_mg_renumber2(graph_file, client_connection): ) check_src = num2.from_internal_vertex_id(ren2, "src").compute() - check_src = check_src.sort_values('weight').reset_index(drop=True) + check_src = check_src.sort_values("weight").reset_index(drop=True) check_dst = num2.from_internal_vertex_id(ren2, "dst").compute() - check_dst = check_dst.sort_values('weight').reset_index(drop=True) + check_dst = check_dst.sort_values("weight").reset_index(drop=True) assert check_src["0"].to_pandas().equals(gdf["src"].to_pandas()) assert check_src["1"].to_pandas().equals(gdf["src_old"].to_pandas()) @@ -120,6 +127,9 @@ def test_mg_renumber2(graph_file, client_connection): # Test all combinations of default/managed and pooled/non-pooled allocation +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", utils.DATASETS_UNRENUMBERED) def test_mg_renumber3(graph_file, client_connection): gc.collect() @@ -143,39 +153,47 @@ def test_mg_renumber3(graph_file, client_connection): ddf, ["src", "src_old"], ["dst", "dst_old"] ) - test_df = gdf[['src', 'src_old']].head() + test_df = gdf[["src", "src_old"]].head() # # This call raises an exception in branch-0.15 # prior to this PR # - test_df = num2.add_internal_vertex_id(test_df, 'src', ['src', 'src_old']) - assert(True) + test_df = num2.add_internal_vertex_id(test_df, "src", ["src", "src_old"]) + assert True +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) def test_dask_pagerank(client_connection): gc.collect() - pandas.set_option('display.max_rows', 10000) + pandas.set_option("display.max_rows", 10000) input_data_path = r"../datasets/karate.csv" chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) - df = cudf.read_csv(input_data_path, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) g = cugraph.DiGraph() - g.from_cudf_edgelist(df, 'src', 'dst') + g.from_cudf_edgelist(df, "src", "dst") dg = cugraph.DiGraph() - dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') + dg.from_dask_cudf_edgelist(ddf, "src", "dst") # Pre compute local data # dg.compute_local_data(by='dst') @@ -189,12 +207,14 @@ def test_dask_pagerank(client_connection): assert len(expected_pr) == len(result_pr) compare_pr = expected_pr.merge( - result_pr, on="vertex", suffixes=['_local', '_dask'] + result_pr, on="vertex", suffixes=["_local", "_dask"] ) for i in range(len(compare_pr)): - diff = abs(compare_pr['pagerank_local'].iloc[i] - - compare_pr['pagerank_dask'].iloc[i]) + diff = abs( + compare_pr["pagerank_local"].iloc[i] + - compare_pr["pagerank_dask"].iloc[i] + ) if diff > tol * 1.1: err = err + 1 print("Mismatches:", err) diff --git a/python/cugraph/tests/dask/test_mg_replication.py b/python/cugraph/tests/dask/test_mg_replication.py index 4932e0fd970..d8a2676b32b 100644 --- a/python/cugraph/tests/dask/test_mg_replication.py +++ b/python/cugraph/tests/dask/test_mg_replication.py @@ -12,10 +12,10 @@ # limitations under the License. import cugraph -from cugraph.tests.dask.mg_context import (MGContext, - skip_if_not_enough_devices) +from cugraph.tests.dask.mg_context import MGContext, skip_if_not_enough_devices import cudf import cugraph.dask.structure.replication as replication +from cugraph.dask.common.mg_utils import is_single_gpu import cugraph.tests.utils as utils import pytest import gc @@ -26,66 +26,86 @@ MG_DEVICE_COUNT_OPTIONS = [1] +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_replicate_cudf_dataframe_with_weights(input_data_path, - mg_device_count): +def test_replicate_cudf_dataframe_with_weights( + input_data_path, mg_device_count +): gc.collect() skip_if_not_enough_devices(mg_device_count) - df = cudf.read_csv(input_data_path, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) with MGContext(mg_device_count): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() - assert df.equals(replicated_df), "There is a mismatch in one " \ - "of the replications" + assert df.equals(replicated_df), ( + "There is a mismatch in one " "of the replications" + ) +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_replicate_cudf_dataframe_no_weights(input_data_path, - mg_device_count): +def test_replicate_cudf_dataframe_no_weights(input_data_path, mg_device_count): gc.collect() skip_if_not_enough_devices(mg_device_count) - df = cudf.read_csv(input_data_path, - delimiter=' ', - names=['src', 'dst'], - dtype=['int32', 'int32']) + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst"], + dtype=["int32", "int32"], + ) with MGContext(mg_device_count): worker_to_futures = replication.replicate_cudf_dataframe(df) for worker in worker_to_futures: replicated_df = worker_to_futures[worker].result() - assert df.equals(replicated_df), "There is a mismatch in one " \ - "of the replications" + assert df.equals(replicated_df), ( + "There is a mismatch in one " "of the replications" + ) +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("input_data_path", DATASETS_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_replicate_cudf_series(input_data_path, - mg_device_count): +def test_replicate_cudf_series(input_data_path, mg_device_count): gc.collect() skip_if_not_enough_devices(mg_device_count) - df = cudf.read_csv(input_data_path, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) with MGContext(mg_device_count): for column in df.columns.values: series = df[column] worker_to_futures = replication.replicate_cudf_series(series) for worker in worker_to_futures: replicated_series = worker_to_futures[worker].result() - assert series.equals(replicated_series), "There is a " \ - "mismatch in one of the replications" + assert series.equals(replicated_series), ( + "There is a " "mismatch in one of the replications" + ) # FIXME: If we do not clear this dictionary, when comparing # results for the 2nd column, one of the workers still # has a value from the 1st column worker_to_futures = {} +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) @@ -98,11 +118,15 @@ def test_enable_batch_no_context(graph_file, directed, mg_device_count): G.enable_batch() +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_enable_batch_no_context_view_adj(graph_file, directed, - mg_device_count): +def test_enable_batch_no_context_view_adj( + graph_file, directed, mg_device_count +): gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) @@ -110,11 +134,15 @@ def test_enable_batch_no_context_view_adj(graph_file, directed, G.view_adj_list() +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_enable_batch_context_then_views(graph_file, directed, - mg_device_count): +def test_enable_batch_context_then_views( + graph_file, directed, mg_device_count +): gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) @@ -122,9 +150,9 @@ def test_enable_batch_context_then_views(graph_file, directed, assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" - assert G.batch_edgelists is not None, "The graph should have " \ - "been created with an " \ - "edgelist" + assert G.batch_edgelists is not None, ( + "The graph should have " "been created with an " "edgelist" + ) assert G.batch_adjlists is None G.view_adj_list() assert G.batch_adjlists is not None @@ -134,11 +162,13 @@ def test_enable_batch_context_then_views(graph_file, directed, assert G.batch_transposed_adjlists is not None +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_enable_batch_view_then_context(graph_file, directed, - mg_device_count): +def test_enable_batch_view_then_context(graph_file, directed, mg_device_count): gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) @@ -155,18 +185,22 @@ def test_enable_batch_view_then_context(graph_file, directed, assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" - assert G.batch_edgelists is not None, "The graph should have " \ - "been created with an " \ - "edgelist" + assert G.batch_edgelists is not None, ( + "The graph should have " "been created with an " "edgelist" + ) assert G.batch_adjlists is not None assert G.batch_transposed_adjlists is not None +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_enable_batch_context_no_context_views(graph_file, directed, - mg_device_count): +def test_enable_batch_context_no_context_views( + graph_file, directed, mg_device_count +): gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) @@ -174,19 +208,23 @@ def test_enable_batch_context_no_context_views(graph_file, directed, assert G.batch_enabled is False, "Internal property should be False" G.enable_batch() assert G.batch_enabled is True, "Internal property should be True" - assert G.batch_edgelists is not None, "The graph should have " \ - "been created with an " \ - "edgelist" + assert G.batch_edgelists is not None, ( + "The graph should have " "been created with an " "edgelist" + ) G.view_edge_list() G.view_adj_list() G.view_transposed_adj_list() +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_enable_batch_edgelist_replication(graph_file, directed, - mg_device_count): +def test_enable_batch_edgelist_replication( + graph_file, directed, mg_device_count +): gc.collect() skip_if_not_enough_devices(mg_device_count) G = utils.generate_cugraph_graph_from_file(graph_file, directed) @@ -198,20 +236,27 @@ def test_enable_batch_edgelist_replication(graph_file, directed, assert df.equals(replicated_df), "Replication of edgelist failed" +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_enable_batch_adjlist_replication_weights(graph_file, directed, - mg_device_count): +def test_enable_batch_adjlist_replication_weights( + graph_file, directed, mg_device_count +): gc.collect() skip_if_not_enough_devices(mg_device_count) - df = cudf.read_csv(graph_file, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + df = cudf.read_csv( + graph_file, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) G = cugraph.DiGraph() if directed else cugraph.Graph() - G.from_cudf_edgelist(df, source='src', destination='dst', - edge_attr='value') + G.from_cudf_edgelist( + df, source="src", destination="dst", edge_attr="value" + ) with MGContext(mg_device_count): G.enable_batch() G.view_adj_list() @@ -220,30 +265,37 @@ def test_enable_batch_adjlist_replication_weights(graph_file, directed, indices = adjlist.indices weights = adjlist.weights for worker in G.batch_adjlists: - (rep_offsets, - rep_indices, - rep_weights) = G.batch_adjlists[worker] - assert offsets.equals(rep_offsets.result()), "Replication of " \ - "adjlist offsets failed" - assert indices.equals(rep_indices.result()), "Replication of " \ - "adjlist indices failed" - assert weights.equals(rep_weights.result()), "Replication of " \ - "adjlist weights failed" + (rep_offsets, rep_indices, rep_weights) = G.batch_adjlists[worker] + assert offsets.equals(rep_offsets.result()), ( + "Replication of " "adjlist offsets failed" + ) + assert indices.equals(rep_indices.result()), ( + "Replication of " "adjlist indices failed" + ) + assert weights.equals(rep_weights.result()), ( + "Replication of " "adjlist weights failed" + ) +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.parametrize("graph_file", DATASETS_OPTIONS) @pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) @pytest.mark.parametrize("mg_device_count", MG_DEVICE_COUNT_OPTIONS) -def test_enable_batch_adjlist_replication_no_weights(graph_file, directed, - mg_device_count): +def test_enable_batch_adjlist_replication_no_weights( + graph_file, directed, mg_device_count +): gc.collect() skip_if_not_enough_devices(mg_device_count) - df = cudf.read_csv(graph_file, - delimiter=' ', - names=['src', 'dst'], - dtype=['int32', 'int32']) + df = cudf.read_csv( + graph_file, + delimiter=" ", + names=["src", "dst"], + dtype=["int32", "int32"], + ) G = cugraph.DiGraph() if directed else cugraph.Graph() - G.from_cudf_edgelist(df, source='src', destination='dst') + G.from_cudf_edgelist(df, source="src", destination="dst") with MGContext(mg_device_count): G.enable_batch() G.view_adj_list() @@ -252,11 +304,11 @@ def test_enable_batch_adjlist_replication_no_weights(graph_file, directed, indices = adjlist.indices weights = adjlist.weights for worker in G.batch_adjlists: - (rep_offsets, - rep_indices, - rep_weights) = G.batch_adjlists[worker] - assert offsets.equals(rep_offsets.result()), "Replication of " \ - "adjlist offsets failed" - assert indices.equals(rep_indices.result()), "Replication of " \ - "adjlist indices failed" + (rep_offsets, rep_indices, rep_weights) = G.batch_adjlists[worker] + assert offsets.equals(rep_offsets.result()), ( + "Replication of " "adjlist offsets failed" + ) + assert indices.equals(rep_indices.result()), ( + "Replication of " "adjlist indices failed" + ) assert weights is None and rep_weights is None diff --git a/python/cugraph/tests/dask/test_mg_utility.py b/python/cugraph/tests/dask/test_mg_utility.py index a26101b9f7a..f1becb051ad 100644 --- a/python/cugraph/tests/dask/test_mg_utility.py +++ b/python/cugraph/tests/dask/test_mg_utility.py @@ -21,6 +21,7 @@ import pytest from cugraph.dask.common.part_utils import concat_within_workers from cugraph.dask.common.read_utils import get_n_workers +from cugraph.dask.common.mg_utils import is_single_gpu import os import time import numpy as np @@ -40,60 +41,68 @@ def client_connection(): cluster.close() -@pytest.mark.skip(reason="skipping MG testing on a SG system") +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) def test_compute_local_data(client_connection): gc.collect() input_data_path = r"../datasets/karate.csv" chunksize = dcg.get_chunksize(input_data_path) - ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, - delimiter=' ', - names=['src', 'dst', 'value'], - dtype=['int32', 'int32', 'float32']) + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) dg = cugraph.DiGraph() - dg.from_dask_cudf_edgelist(ddf, source='src', destination='dst', - edge_attr='value') + dg.from_dask_cudf_edgelist( + ddf, source="src", destination="dst", edge_attr="value" + ) # Compute_local_data - dg.compute_local_data(by='dst') - data = dg.local_data['data'] - by = dg.local_data['by'] + dg.compute_local_data(by="dst") + data = dg.local_data["data"] + by = dg.local_data["by"] - assert by == 'dst' + assert by == "dst" assert Comms.is_initialized() - global_num_edges = data.local_data['edges'].sum() + global_num_edges = data.local_data["edges"].sum() assert global_num_edges == dg.number_of_edges() - global_num_verts = data.local_data['verts'].sum() + global_num_verts = data.local_data["verts"].sum() assert global_num_verts == dg.number_of_nodes() +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) @pytest.mark.skip(reason="MG not supported on CI") def test_parquet_concat_within_workers(client_connection): - if not os.path.exists('test_files_parquet'): + if not os.path.exists("test_files_parquet"): print("Generate data... ") - os.mkdir('test_files_parquet') + os.mkdir("test_files_parquet") for x in range(10): - if not os.path.exists('test_files_parquet/df'+str(x)): - df = utils.random_edgelist(e=100, - ef=16, - dtypes={'src': np.int32, - 'dst': np.int32}, - seed=x) - df.to_parquet('test_files_parquet/df'+str(x), index=False) + if not os.path.exists("test_files_parquet/df" + str(x)): + df = utils.random_edgelist( + e=100, ef=16, dtypes={"src": np.int32, "dst": np.int32}, seed=x + ) + df.to_parquet("test_files_parquet/df" + str(x), index=False) n_gpu = get_n_workers() print("Read_parquet... ") t1 = time.time() - ddf = dask_cudf.read_parquet('test_files_parquet/*', - dtype=['int32', 'int32']) + ddf = dask_cudf.read_parquet( + "test_files_parquet/*", dtype=["int32", "int32"] + ) ddf = ddf.persist() futures_of(ddf) wait(ddf) - t1 = time.time()-t1 + t1 = time.time() - t1 print("*** Read Time: ", t1, "s") print(ddf) @@ -105,7 +114,7 @@ def test_parquet_concat_within_workers(client_connection): ddf = ddf.persist() futures_of(ddf) wait(ddf) - t2 = time.time()-t2 + t2 = time.time() - t2 print("*** Drop duplicate time: ", t2, "s") assert t2 < t1 @@ -120,7 +129,7 @@ def test_parquet_concat_within_workers(client_connection): ddf = ddf.persist() futures_of(ddf) wait(ddf) - t3 = time.time()-t3 + t3 = time.time() - t3 print("*** repartition Time: ", t3, "s") print(ddf) From 213c482dac7a25e8bf62c6df93aebf4804291e5a Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 29 Sep 2020 15:16:19 -0400 Subject: [PATCH 63/74] BUG fix misspelling of function calls in asserts causing debug build to fail (#1166) * fix misspelling of function calls in asserts causing debug build to fail * update changelog --- CHANGELOG.md | 1 + cpp/include/patterns/any_of_adj_matrix_row.cuh | 4 ++-- cpp/include/patterns/copy_to_adj_matrix_col.cuh | 8 ++++---- cpp/include/patterns/copy_to_adj_matrix_row.cuh | 8 ++++---- cpp/include/patterns/copy_v_transform_reduce_nbr.cuh | 12 ++++++------ cpp/include/patterns/count_if_e.cuh | 4 ++-- cpp/include/patterns/transform_reduce_e.cuh | 4 ++-- .../transform_reduce_v_with_adj_matrix_row.cuh | 4 ++-- .../patterns/update_frontier_v_push_if_out_nbr.cuh | 4 ++-- 9 files changed, 25 insertions(+), 24 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b4ca31277d2..a91f88bec22 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ - PR #1155 Remove RMM library dependency and CXX11 ABI handling - PR #1158 Pass size_t* & size_t* instead of size_t[] & int[] for raft allgatherv's input parameters recvcounts & displs - PR #1168 Disabled MG tests on single GPU +- PR #1166 Fix misspelling of function calls in asserts causing debug build to fail # cuGraph 0.15.0 (26 Aug 2020) diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/patterns/any_of_adj_matrix_row.cuh index 32602991cc3..e75273272e5 100644 --- a/cpp/include/patterns/any_of_adj_matrix_row.cuh +++ b/cpp/include/patterns/any_of_adj_matrix_row.cuh @@ -43,9 +43,9 @@ namespace experimental { * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row properties * for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param row_op Unary predicate operator that takes *(@p adj_matrix_row_value_input_first + i) - * (where i = [0, @p graph_view.get_number_of_adj_matrix_local_rows()) and returns either + * (where i = [0, @p graph_view.get_number_of_local_adj_matrix_partition_rows()) and returns either * true or false. * @return true If the predicate returns true at least once (in any process in multi-GPU). * @return false If the predicate never returns true (in any process in multi-GPU). diff --git a/cpp/include/patterns/copy_to_adj_matrix_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_col.cuh index c2c96dca586..35f757ccd60 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_col.cuh +++ b/cpp/include/patterns/copy_to_adj_matrix_col.cuh @@ -48,7 +48,7 @@ namespace experimental { * @param adj_matrix_col_value_output_first Iterator pointing to the adjacency matrix column output * property variables for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). */ template on(handle.get_stream()), vertex_value_input_first, vertex_value_input_first + graph_view.get_number_of_local_vertices(), @@ -96,7 +96,7 @@ void copy_to_adj_matrix_col(raft::handle_t const& handle, * @param adj_matrix_col_value_output_first Iterator pointing to the adjacency matrix column output * property variables for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). */ template on(handle.get_stream()), val_first, diff --git a/cpp/include/patterns/copy_to_adj_matrix_row.cuh b/cpp/include/patterns/copy_to_adj_matrix_row.cuh index 626562d6fae..507edf44f07 100644 --- a/cpp/include/patterns/copy_to_adj_matrix_row.cuh +++ b/cpp/include/patterns/copy_to_adj_matrix_row.cuh @@ -48,7 +48,7 @@ namespace experimental { * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output * property variables for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_rows(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_rows(). */ template on(handle.get_stream()), vertex_value_input_first, vertex_value_input_first + graph_view.get_number_of_local_vertices(), @@ -96,7 +96,7 @@ void copy_to_adj_matrix_row(raft::handle_t const& handle, * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output * property variables for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_rows(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_rows(). */ template on(handle.get_stream()), val_first, diff --git a/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh index 5e975dbc10a..549a1c43c10 100644 --- a/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh +++ b/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh @@ -200,11 +200,11 @@ __global__ void for_all_major_for_all_nbr_low_out_degree( * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input * properties for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, @@ -249,9 +249,9 @@ void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle, } assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_adj_matrix_local_rows()); + graph_view.get_number_of_local_adj_matrix_partition_rows()); assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_adj_matrix_local_cols()); + graph_view.get_number_of_local_adj_matrix_partition_cols()); detail::for_all_major_for_all_nbr_low_out_degree <<>>( matrix_partition, @@ -283,11 +283,11 @@ void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle, * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input * properties for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index adf75a1f6c8..2de96cdb04a 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -157,11 +157,11 @@ __global__ void for_all_major_for_all_nbr_low_out_degree( * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input * properties for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh index 184f1fffac5..c4db3355e99 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -180,11 +180,11 @@ __global__ void for_all_major_for_all_nbr_low_out_degree( * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input * properties for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, diff --git a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh index e9cc476e221..26a05787221 100644 --- a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh +++ b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh @@ -49,7 +49,7 @@ namespace experimental { * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param v_op Binary operator takes *(@p vertex_value_input_first + i) and *(@p * adj_matrix_row_value_input_first + j) (where i and j are set for a vertex and the matching row) * and returns a transformed value to be reduced. @@ -73,7 +73,7 @@ T transform_reduce_v_with_adj_matrix_row( CUGRAPH_FAIL("unimplemented."); } else { assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_adj_matrix_local_rows()); + graph_view.get_number_of_local_adj_matrix_partition_rows()); auto input_first = thrust::make_zip_iterator( thrust::make_tuple(vertex_value_input_first, adj_matrix_row_value_input_first)); auto v_op_wrapper = [v_op] __device__(auto v_and_row_val) { diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index f376ace1267..7ba21abac8b 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -278,11 +278,11 @@ __global__ void update_frontier_and_vertex_output_values( * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input * properties for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, From b96b1a5d8429781cff8432a94956cc24fdb029eb Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Tue, 29 Sep 2020 19:22:45 -0400 Subject: [PATCH 64/74] [REVIEW] ENH added more Nx compatibility (#1165) * updated Katz API * added entry for edge betweennees centrality * Added SSSP, BFS, and subgraph as Nx compatible Co-authored-by: BradReesWork --- CHANGELOG.md | 1 + ci/gpu/test-notebooks.sh | 2 +- docs/source/api.rst | 7 + .../nx_cugraph_bc_benchmarking.ipynb | 202 ++++++++++++++++++ python/cugraph/__init__.py | 8 +- python/cugraph/centrality/katz_centrality.py | 11 +- .../cugraph/community/subgraph_extraction.py | 7 + python/cugraph/tests/test_bfs.py | 28 ++- python/cugraph/tests/test_sssp.py | 45 +++- .../cugraph/tests/test_subgraph_extraction.py | 30 +++ python/cugraph/traversal/__init__.py | 5 +- python/cugraph/traversal/bfs.py | 69 ++++++ python/cugraph/traversal/sssp.py | 63 +++++- 13 files changed, 466 insertions(+), 12 deletions(-) create mode 100644 notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb diff --git a/CHANGELOG.md b/CHANGELOG.md index a91f88bec22..e9585c0e616 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - PR #1149 Parquet read and concat within workers - PR #1152 graph container cleanup, added arg for instantiating legacy types and switch statements to factory function - PR #1162 enhanced networkx testing +- PR #1165 updated remaining algorithms to be NetworkX compatible ## Bug Fixes - PR #1131 Show style checker errors with set +e diff --git a/ci/gpu/test-notebooks.sh b/ci/gpu/test-notebooks.sh index 247eb328d2e..389d3be0bfd 100755 --- a/ci/gpu/test-notebooks.sh +++ b/ci/gpu/test-notebooks.sh @@ -23,7 +23,7 @@ TOPLEVEL_NB_FOLDERS=$(find . -name *.ipynb |cut -d'/' -f2|sort -u) # Add notebooks that should be skipped here # (space-separated list of filenames without paths) -SKIPNBS="uvm.ipynb bfs_benchmark.ipynb louvain_benchmark.ipynb pagerank_benchmark.ipynb sssp_benchmark.ipynb release.ipynb" +SKIPNBS="uvm.ipynb bfs_benchmark.ipynb louvain_benchmark.ipynb pagerank_benchmark.ipynb sssp_benchmark.ipynb release.ipynb nx_cugraph_bc_benchmarking.ipynb" ## Check env env diff --git a/docs/source/api.rst b/docs/source/api.rst index b194aa0e03c..d334b488d72 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -40,6 +40,13 @@ Betweenness Centrality :members: :undoc-members: +Edge Betweenness Centrality +--------------------------- + +.. automodule:: cugraph.centrality.edge_betweenness_centrality + :members: + :undoc-members: + Katz Centrality --------------- diff --git a/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb b/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb new file mode 100644 index 00000000000..6f76868f9a4 --- /dev/null +++ b/notebooks/cugraph_benchmarks/nx_cugraph_bc_benchmarking.ipynb @@ -0,0 +1,202 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Benchmarking NetworkX compatibility\n", + "This notebook benchmark the use of a NetworkX Graph object as input into algorithms.

\n", + "The intention of the feature is to be able to drop cuGraph into existing NetworkX code in spot where performance is not optimal.\n", + "\n", + "\n", + "### Betweenness Centrality\n", + "Both NetworkX and cuGraph allow for estimating the betweenness centrality score by using a subset of vertices rather than all the vertices. WHile that does produce a less accurate answer, it dramatically improves performance when the sample is small. For this test, the algorithms will use only 10% of the vertices to compute the estimate \n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "__Notebook Credits__\n", + "\n", + "* Original Authors: Bradley Rees\n", + "* Last Edit: 09/27/2020\n", + "\n", + "RAPIDS Versions: 0.16\n", + "\n", + "Test Hardware\n", + "```\n", + " GV100 32G, CUDA 10,0\n", + " Intel(R) Core(TM) CPU i7-7800X @ 3.50GHz\n", + " 32GB system memory\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import networkx as nx\n", + "import cugraph as cnx\n", + "import time\n", + "import operator" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# starting number of Nodes\n", + "N = 100" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# average degree\n", + "M = 16" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_nx(G, k=None):\n", + " t1 = time.time()\n", + " bc = nx.betweenness_centrality(G, k)\n", + " t2 = time.time() - t1\n", + " return t2, bc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def run_cu(G, k=None):\n", + " t1 = time.time()\n", + " bc = cnx.betweenness_centrality(G, k)\n", + " t2 = time.time() - t1\n", + " return t2, bc" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"Betweenness Cenytrality - use all nodes - limit scale to 3,200 nodes so that executing time is not days\")\n", + "print(f\"Node \\tEdges \\tSpeedup \\t\\tcreate time \\t\\tnx time \\t\\tcu time \")\n", + "\n", + "for x in range(6):\n", + " if x == 0:\n", + " n = N\n", + " else:\n", + " n = n * 2\n", + "\n", + " \n", + " t1 = time.time() \n", + " # create a random graph\n", + " G = nx.barabasi_albert_graph(n, M)\n", + " g_time = time.time() - t1\n", + " \n", + " num_edges = G.number_of_edges()\n", + " num_nodes = G.number_of_nodes()\n", + " \n", + " time_nx, bc = run_nx(G)\n", + " time_cu, bcc = run_cu(G)\n", + "\n", + " speedup = time_nx / time_cu\n", + " print(f\"{num_nodes}\\t{num_edges}\\t{speedup}\\t{g_time}\\t{time_nx}\\t{time_cu}\")\n", + " " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "print(\"PageRank - run larger graph since algorithm is fast\")\n", + "print(f\"Node \\tEdges \\tSpeedup \\t\\tnx time \\t\\tcu time \")\n", + "\n", + "pr_speedup = []\n", + "\n", + "for x in range(15):\n", + " if x == 0:\n", + " n = N\n", + " else:\n", + " n = n * 2\n", + "\n", + " # create a random graph\n", + " G = nx.barabasi_albert_graph(n, M)\n", + " num_edges = G.number_of_edges()\n", + " num_nodes = G.number_of_nodes()\n", + " \n", + " t1 = time.time() \n", + " nx_pr = nx.pagerank(G)\n", + " time_nx = time.time() - t1\n", + " \n", + " t1 = time.time() \n", + " cp_pr = cnx.pagerank(G)\n", + " time_cu = time.time() - t1\n", + "\n", + " speedup = time_nx / time_cu\n", + " print(f\"{num_nodes}\\t{num_edges} \\t{speedup}\\t{time_nx}\\t{time_cu}\")\n", + " " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "___\n", + "Copyright (c) 2020, NVIDIA CORPORATION.\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\n", + "___" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "cugraph_dev", + "language": "python", + "name": "cugraph_dev" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.8.5" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index ee055b4a12e..0f20df00df1 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -59,7 +59,13 @@ overlap_w, ) -from cugraph.traversal import bfs, sssp, filter_unreachable +from cugraph.traversal import ( + bfs, + bfs_edges, + sssp, + shortest_path, + filter_unreachable, +) from cugraph.utilities import utils diff --git a/python/cugraph/centrality/katz_centrality.py b/python/cugraph/centrality/katz_centrality.py index 118825de4d7..3e2680a196f 100644 --- a/python/cugraph/centrality/katz_centrality.py +++ b/python/cugraph/centrality/katz_centrality.py @@ -16,7 +16,8 @@ def katz_centrality( - G, alpha=None, max_iter=100, tol=1.0e-6, nstart=None, normalized=True + G, alpha=None, beta=None, max_iter=100, tol=1.0e-6, + nstart=None, normalized=True ): """ Compute the Katz centrality for the nodes of the graph G. cuGraph does not @@ -46,6 +47,8 @@ def katz_centrality( (1/degree_max). Therefore, setting alpha to (1/degree_max) will guarantee that it will never exceed alpha_max thus in turn fulfilling the requirement for convergence. + beta : None + A weight scalar - currently Not Supported max_iter : int The maximum number of iterations before an answer is returned. This can be used to limit the execution time and do an early exit before the @@ -91,6 +94,12 @@ def katz_centrality( >>> kc = cugraph.katz_centrality(G) """ + if beta is not None: + raise NotImplementedError( + "The beta argument is " + "currently not supported" + ) + G, isNx = cugraph.utilities.check_nx_graph(G) if nstart is not None: diff --git a/python/cugraph/community/subgraph_extraction.py b/python/cugraph/community/subgraph_extraction.py index 70b49906184..8c702c2f58f 100644 --- a/python/cugraph/community/subgraph_extraction.py +++ b/python/cugraph/community/subgraph_extraction.py @@ -13,6 +13,8 @@ from cugraph.community import subgraph_extraction_wrapper from cugraph.structure.graph import null_check +from cugraph.utilities import check_nx_graph +from cugraph.utilities import cugraph_to_nx def subgraph(G, vertices): @@ -52,6 +54,8 @@ def subgraph(G, vertices): null_check(vertices) + G, isNx = check_nx_graph(G) + if G.renumbered: vertices = G.lookup_internal_vertex_id(vertices) @@ -70,4 +74,7 @@ def subgraph(G, vertices): else: result_graph.from_cudf_edgelist(df, source="src", destination="dst") + if isNx is True: + result_graph = cugraph_to_nx(result_graph) + return result_graph diff --git a/python/cugraph/tests/test_bfs.py b/python/cugraph/tests/test_bfs.py index 8eb175ad66d..5b5f7cf3737 100644 --- a/python/cugraph/tests/test_bfs.py +++ b/python/cugraph/tests/test_bfs.py @@ -12,9 +12,10 @@ # limitations under the License. import gc - +import pandas import cupy import numpy as np +import cudf import pytest import cugraph from cugraph.tests import utils @@ -110,7 +111,10 @@ def compare_bfs(graph_file, directed=True, return_sp_counter=False, seed=42): def _compare_bfs(G, Gnx, source): - df = cugraph.bfs(G, source, return_sp_counter=False) + df = cugraph.bfs_edges(G, source, return_sp_counter=False) + if isinstance(df, pandas.DataFrame): + df = cudf.from_pandas(df) + # This call should only contain 3 columns: # 'vertex', 'distance', 'predecessor' # It also confirms wether or not 'sp_counter' has been created by the call @@ -265,3 +269,23 @@ def test_bfs_spc_full(graph_file, directed): compare_bfs( graph_file, directed=directed, return_sp_counter=True, seed=None ) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("directed", DIRECTED_GRAPH_OPTIONS) +@pytest.mark.parametrize("seed", SUBSET_SEED_OPTIONS) +def test_bfs_nx(graph_file, directed, seed): + """Test BFS traversal on random source with distance and predecessors""" + prepare_test() + + M = utils.read_csv_for_nx(graph_file, read_weights_in_sp=False) + G = nx.from_pandas_edgelist( + M, source="0", target="1", + create_using=nx.Graph() + ) + + if isinstance(seed, int): + random.seed(seed) + start_vertex = random.sample(G.nodes(), 1)[0] + + _compare_bfs(G, G, start_vertex) diff --git a/python/cugraph/tests/test_sssp.py b/python/cugraph/tests/test_sssp.py index 3c3b575fdb5..8dfcc60da3c 100644 --- a/python/cugraph/tests/test_sssp.py +++ b/python/cugraph/tests/test_sssp.py @@ -16,7 +16,7 @@ import numpy as np import pytest - +import cudf import cugraph from cugraph.tests import utils @@ -213,3 +213,46 @@ def test_sssp_data_type_conversion(graph_file, source): err = err + 1 assert err == 0 + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("source", SOURCES) +def test_sssp_nx(graph_file, source): + print("DOING test_sssp : " + graph_file + "\n\n\n") + gc.collect() + + M = utils.read_csv_for_nx(graph_file) + nx_paths, Gnx = networkx_call(M, source) + + df = cugraph.shortest_path(Gnx, source) + df = cudf.from_pandas(df) + + if np.issubdtype(df["distance"].dtype, np.integer): + max_val = np.iinfo(df["distance"].dtype).max + else: + max_val = np.finfo(df["distance"].dtype).max + + verts_np = df["vertex"].to_array() + dist_np = df["distance"].to_array() + pred_np = df["predecessor"].to_array() + cu_paths = dict(zip(verts_np, zip(dist_np, pred_np))) + + # Calculating mismatch + err = 0 + for vid in cu_paths: + # Validate vertices that are reachable + # NOTE : If distance type is float64 then cu_paths[vid][0] + # should be compared against np.finfo(np.float64).max) + if cu_paths[vid][0] != max_val: + if cu_paths[vid][0] != nx_paths[vid]: + err = err + 1 + # check pred dist + 1 = current dist (since unweighted) + pred = cu_paths[vid][1] + if vid != source and cu_paths[pred][0] + 1 != cu_paths[vid][0]: + err = err + 1 + else: + if vid in nx_paths.keys(): + err = err + 1 + + assert err == 0 + print("DONE test_sssp : " + graph_file + "\n\n\n") diff --git a/python/cugraph/tests/test_subgraph_extraction.py b/python/cugraph/tests/test_subgraph_extraction.py index 9192495c6b2..a4f36af994a 100644 --- a/python/cugraph/tests/test_subgraph_extraction.py +++ b/python/cugraph/tests/test_subgraph_extraction.py @@ -100,3 +100,33 @@ def test_subgraph_extraction_Graph(graph_file): cu_sg = cugraph_call(M, verts, False) nx_sg = nx_call(M, verts, False) assert compare_edges(cu_sg, nx_sg) + + +@pytest.mark.parametrize("graph_file", utils.DATASETS) +def test_subgraph_extraction_Graph_nx(graph_file): + gc.collect() + directed = False + verts = np.zeros(3, dtype=np.int32) + verts[0] = 0 + verts[1] = 1 + verts[2] = 17 + + M = utils.read_csv_for_nx(graph_file) + + if directed: + G = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.DiGraph() + ) + else: + G = nx.from_pandas_edgelist( + M, source="0", target="1", create_using=nx.Graph() + ) + + nx_sub = nx.subgraph(G, verts) + nx_df = nx.to_pandas_edgelist(nx_sub).to_dict() + + cu_verts = cudf.Series(verts) + cu_sub = cugraph.subgraph(G, cu_verts) + cu_df = nx.to_pandas_edgelist(cu_sub).to_dict() + + assert nx_df == cu_df diff --git a/python/cugraph/traversal/__init__.py b/python/cugraph/traversal/__init__.py index 288c4edd2e3..52a1b9e2cfb 100644 --- a/python/cugraph/traversal/__init__.py +++ b/python/cugraph/traversal/__init__.py @@ -12,4 +12,7 @@ # limitations under the License. from cugraph.traversal.bfs import bfs -from cugraph.traversal.sssp import sssp, filter_unreachable +from cugraph.traversal.bfs import bfs_edges +from cugraph.traversal.sssp import sssp +from cugraph.traversal.sssp import shortest_path +from cugraph.traversal.sssp import filter_unreachable diff --git a/python/cugraph/traversal/bfs.py b/python/cugraph/traversal/bfs.py index 3a977a06baf..7e03d8ab016 100644 --- a/python/cugraph/traversal/bfs.py +++ b/python/cugraph/traversal/bfs.py @@ -15,6 +15,7 @@ from cugraph.traversal import bfs_wrapper from cugraph.structure.graph import Graph +from cugraph.utilities import check_nx_graph def bfs(G, start, return_sp_counter=False): @@ -72,3 +73,71 @@ def bfs(G, start, return_sp_counter=False): df["predecessor"].fillna(-1, inplace=True) return df + + +def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None, + return_sp_counter=False): + """ + Find the distances and predecessors for a breadth first traversal of a + graph. + + Parameters + ---------- + G : cugraph.graph or NetworkX.Graph + graph descriptor that contains connectivity information + source : Integer + The starting vertex index + reverse : boolean + If a directed graph, then process edges in a reverse direction + Currently not implemented + depth_limit : Int or None + Limit the depth of the search + Currently not implemented + sort_neighbors : None or Function + Currently not implemented + return_sp_counter : bool, optional, default=False + Indicates if shortest path counters should be returned + + Returns + ------- + df : cudf.DataFrame or Pandas.DataFrame + df['vertex'][i] gives the vertex id of the i'th vertex + + df['distance'][i] gives the path distance for the i'th vertex from the + starting vertex + + df['predecessor'][i] gives for the i'th vertex the vertex it was + reached from in the traversal + + df['sp_counter'][i] gives for the i'th vertex the number of shortest + path leading to it during traversal (Only if retrun_sp_counter is True) + + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> df = cugraph.bfs_edges(G, 0) + """ + + if reverse is True: + raise NotImplementedError( + "reverse processing of graph is " + "currently not supported" + ) + + if depth_limit is not None: + raise NotImplementedError( + "depth limit implementation of BFS " + "is not currently supported" + ) + + G, isNx = check_nx_graph(G) + + df = bfs(G, source, return_sp_counter) + + if isNx is True: + df = df.to_pandas() + + return df diff --git a/python/cugraph/traversal/sssp.py b/python/cugraph/traversal/sssp.py index 546407af2b6..a40755c6602 100644 --- a/python/cugraph/traversal/sssp.py +++ b/python/cugraph/traversal/sssp.py @@ -14,6 +14,7 @@ from cugraph.traversal import sssp_wrapper import numpy as np import cudf +from cugraph.utilities import check_nx_graph def sssp(G, source): @@ -38,11 +39,14 @@ def sssp(G, source): Returns ------- df : cudf.DataFrame - df['vertex'][i] gives the vertex id of the i'th vertex. - df['distance'][i] gives the path distance for the i'th vertex from the - starting vertex. - df['predecessor'][i] gives the vertex id of the vertex that was reached - before the i'th vertex in the traversal. + df['vertex'] + vertex id + + df['distance'] + gives the path distance from the starting vertex + + df['predecessor'] + the vertex it was reached from Examples -------- @@ -94,3 +98,52 @@ def filter_unreachable(df): return df[df.distance != max_val] else: raise TypeError("distance type unsupported") + + +def shortest_path(G, source): + """ + Compute the distance and predecessors for shortest paths from the specified + source to all the vertices in the graph. The distances column will store + the distance from the source to each vertex. The predecessors column will + store each vertex's predecessor in the shortest path. Vertices that are + unreachable will have a distance of infinity denoted by the maximum value + of the data type and the predecessor set as -1. The source vertex's + predecessor is also set to -1. Graphs with negative weight cycles are not + supported. + + Parameters + ---------- + graph : cuGraph.Graph or NetworkX.Graph + cuGraph graph descriptor with connectivity information. Edge weights, + if present, should be single or double precision floating point values. + source : int + Index of the source vertex. + + Returns + ------- + df : cudf.DataFrame or pandas.DataFrame + df['vertex'] + vertex id + + df['distance'] + gives the path distance from the starting vertex + + df['predecessor'] + the vertex it was reached from + + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, source='0', destination='1') + >>> distances = cugraph.shortest_path(G, 0) + """ + G, isNx = check_nx_graph(G) + + df = sssp(G, source) + + if isNx is True: + df = df.to_pandas() + + return df From fa60f093ef657d13455f4c2d9b964524d5ba7495 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Tue, 29 Sep 2020 18:23:53 -0500 Subject: [PATCH 65/74] [REVIEW] MG symmetrize and conda env updates (#1164) * saving progress * added missing dependencies for #1159 * init files and test for _df entry point * temporary debug print and alternate way of appending * should work now * removed debug print * disabled MG tests on single GPU * changelog * style * skiped tests using #1168 model * style * restoring flag as per review comment * Update symmetrize.py * Update symmetrize.py --- CHANGELOG.md | 1 + conda/recipes/cugraph/meta.yaml | 4 + python/cugraph/__init__.py | 1 + python/cugraph/structure/__init__.py | 2 +- python/cugraph/structure/graph.py | 215 ++++++++++++++---------- python/cugraph/structure/symmetrize.py | 116 ++++++++++--- python/cugraph/tests/test_symmetrize.py | 61 ++++++- 7 files changed, 286 insertions(+), 114 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e9585c0e616..d1032fccf25 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ - PR #1139 MNMG Louvain Python updates, Cython cleanup - PR #1149 Parquet read and concat within workers - PR #1152 graph container cleanup, added arg for instantiating legacy types and switch statements to factory function +- PR #1164 MG symmetrize and conda env updates - PR #1162 enhanced networkx testing - PR #1165 updated remaining algorithms to be NetworkX compatible diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 1a32fd2a4b1..1376a0e30d2 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -32,6 +32,10 @@ requirements: - python x.x - libcugraph={{ version }} - cudf={{ minor_version }} + - dask-cudf {{ minor_version }} + - dask-cuda {{ minor_version }} + - dask>=2.12.0 + - distributed>=2.12.0 - nccl>=2.5 - ucx-py {{ minor_version }} diff --git a/python/cugraph/__init__.py b/python/cugraph/__init__.py index 0f20df00df1..f8984f25978 100644 --- a/python/cugraph/__init__.py +++ b/python/cugraph/__init__.py @@ -33,6 +33,7 @@ hypergraph, symmetrize, symmetrize_df, + symmetrize_ddf, ) from cugraph.centrality import ( diff --git a/python/cugraph/structure/__init__.py b/python/cugraph/structure/__init__.py index 3a3515eef67..511e6773469 100644 --- a/python/cugraph/structure/__init__.py +++ b/python/cugraph/structure/__init__.py @@ -13,7 +13,7 @@ from cugraph.structure.graph import Graph, DiGraph from cugraph.structure.number_map import NumberMap -from cugraph.structure.symmetrize import symmetrize, symmetrize_df +from cugraph.structure.symmetrize import symmetrize, symmetrize_df , symmetrize_ddf from cugraph.structure.convert_matrix import from_cudf_edgelist from cugraph.structure.hypergraph import hypergraph from cugraph.structure.shuffle import shuffle diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index d6e1689e515..ce63eb52683 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -133,8 +133,10 @@ def enable_batch(self): comms = Comms.get_comms() if client is None or comms is None: - msg = "MG Batch needs a Dask Client and the " \ + msg = ( + "MG Batch needs a Dask Client and the " "Communicator needs to be initialized." + ) raise Exception(msg) self.batch_enabled = True @@ -159,9 +161,8 @@ def _replicate_edgelist(self): if client is None: return work_futures = replication.replicate_cudf_dataframe( - self.edgelist.edgelist_df, - client=client, - comms=comms) + self.edgelist.edgelist_df, client=client, comms=comms + ) self.batch_edgelists = work_futures @@ -175,22 +176,25 @@ def _replicate_adjlist(self): weights = None offsets_futures = replication.replicate_cudf_series( - self.adjlist.offsets, - client=client, - comms=comms) + self.adjlist.offsets, client=client, comms=comms + ) indices_futures = replication.replicate_cudf_series( - self.adjlist.indices, - client=client, - comms=comms) + self.adjlist.indices, client=client, comms=comms + ) if self.adjlist.weights is not None: weights = replication.replicate_cudf_series(self.adjlist.weights) else: weights = {worker: None for worker in offsets_futures} - merged_futures = {worker: [offsets_futures[worker], - indices_futures[worker], weights[worker]] - for worker in offsets_futures} + merged_futures = { + worker: [ + offsets_futures[worker], + indices_futures[worker], + weights[worker], + ] + for worker in offsets_futures + } self.batch_adjlists = merged_futures # FIXME: Not implemented yet @@ -227,23 +231,29 @@ def add_nodes_from(self, nodes, bipartite=None, multipartite=None): nodes of the partition named as multipartite argument. """ if bipartite is None and multipartite is None: - self._nodes['all_nodes'] = cudf.Series(nodes) + self._nodes["all_nodes"] = cudf.Series(nodes) else: - set_names = [i for i in self._nodes.keys() if i != 'all_nodes'] + set_names = [i for i in self._nodes.keys() if i != "all_nodes"] if multipartite is not None: if self.bipartite: - raise Exception("The Graph is already set as bipartite. " - "Use bipartite option instead.") + raise Exception( + "The Graph is already set as bipartite. " + "Use bipartite option instead." + ) self.multipartite = True elif bipartite is not None: if self.multipartite: - raise Exception("The Graph is set as multipartite. " - "Use multipartite option instead.") + raise Exception( + "The Graph is set as multipartite. " + "Use multipartite option instead." + ) self.bipartite = True multipartite = bipartite if multipartite not in set_names and len(set_names) == 2: - raise Exception("The Graph is set as bipartite and " - "already has two partitions initialized.") + raise Exception( + "The Graph is set as bipartite and " + "already has two partitions initialized." + ) self._nodes[multipartite] = cudf.Series(nodes) def is_bipartite(self): @@ -273,14 +283,15 @@ def sets(self): graph is not bipartite. """ # TO DO: Call coloring algorithm - set_names = [i for i in self._nodes.keys() if i != 'all_nodes'] + set_names = [i for i in self._nodes.keys() if i != "all_nodes"] if self.bipartite: top = self._nodes[set_names[0]] if len(set_names) == 2: bottom = self._nodes[set_names[1]] else: - bottom = cudf.Series(set(self.nodes().values_host) - - set(top.values_host)) + bottom = cudf.Series( + set(self.nodes().values_host) - set(top.values_host) + ) return top, bottom else: return {k: self._nodes[k] for k in set_names} @@ -353,40 +364,49 @@ def from_cudf_edgelist( s_col = [s_col] if not isinstance(d_col, list): d_col = [d_col] - if not (set(s_col).issubset(set(input_df.columns)) and - set(d_col).issubset(set(input_df.columns))): - raise Exception('source column names and/or destination column \ -names not found in input. Recheck the source and destination parameters') + if not ( + set(s_col).issubset(set(input_df.columns)) + and set(d_col).issubset(set(input_df.columns)) + ): + raise Exception( + "source column names and/or destination column \ +names not found in input. Recheck the source and destination parameters" + ) # Consolidation if isinstance(input_df, cudf.DataFrame): if len(input_df[source]) > 2147483100: - raise Exception('cudf dataFrame edge list is too big \ - to fit in a single GPU') + raise Exception( + "cudf dataFrame edge list is too big \ + to fit in a single GPU" + ) elist = input_df elif isinstance(input_df, dask_cudf.DataFrame): if len(input_df[source]) > 2147483100: - raise Exception('dask_cudf dataFrame edge list is too big \ - to fit in a single GPU') + raise Exception( + "dask_cudf dataFrame edge list is too big \ + to fit in a single GPU" + ) elist = input_df.compute().reset_index(drop=True) else: - raise Exception('input should be a cudf.DataFrame or \ - a dask_cudf dataFrame') + raise Exception( + "input should be a cudf.DataFrame or \ + a dask_cudf dataFrame" + ) renumber_map = None if renumber: # FIXME: Should SG do lazy evaluation like MG? elist, renumber_map = NumberMap.renumber( - elist, source, destination, - store_transposed=False + elist, source, destination, store_transposed=False ) - source = 'src' - destination = 'dst' + source = "src" + destination = "dst" self.renumbered = True self.renumber_map = renumber_map else: if type(source) is list and type(destination) is list: - raise Exception('set renumber to True for multi column ids') + raise Exception("set renumber to True for multi column ids") source_col = elist[source] dest_col = elist[destination] @@ -410,18 +430,21 @@ def from_cudf_edgelist( else: source_col, dest_col = symmetrize(source_col, dest_col) - self.edgelist = Graph.EdgeList( - source_col, dest_col, value_col - ) + self.edgelist = Graph.EdgeList(source_col, dest_col, value_col) if self.batch_enabled: self._replicate_edgelist() self.renumber_map = renumber_map - def from_dask_cudf_edgelist(self, input_ddf, source='source', - destination='destination', - edge_attr=None, renumber=True): + def from_dask_cudf_edgelist( + self, + input_ddf, + source="source", + destination="destination", + edge_attr=None, + renumber=True, + ): """ Initializes the distributed graph from the dask_cudf.DataFrame edgelist. Undirected Graphs are not currently supported. @@ -450,11 +473,11 @@ def from_dask_cudf_edgelist(self, input_ddf, source='source', is number of vertices, renumber argument should be True. """ if self.edgelist is not None or self.adjlist is not None: - raise Exception('Graph already has values') + raise Exception("Graph already has values") if not isinstance(input_ddf, dask_cudf.DataFrame): - raise Exception('input should be a dask_cudf dataFrame') + raise Exception("input should be a dask_cudf dataFrame") if type(self) is Graph: - raise Exception('Undirected distributed graph not supported') + raise Exception("Undirected distributed graph not supported") s_col = source d_col = destination @@ -462,10 +485,14 @@ def from_dask_cudf_edgelist(self, input_ddf, source='source', s_col = [s_col] if not isinstance(d_col, list): d_col = [d_col] - if not (set(s_col).issubset(set(input_ddf.columns)) and - set(d_col).issubset(set(input_ddf.columns))): - raise Exception('source column names and/or destination column \ -names not found in input. Recheck the source and destination parameters') + if not ( + set(s_col).issubset(set(input_ddf.columns)) + and set(d_col).issubset(set(input_ddf.columns)) + ): + raise Exception( + "source column names and/or destination column \ +names not found in input. Recheck the source and destination parameters" + ) # # Keep all of the original parameters so we can lazily # evaluate this function @@ -503,10 +530,10 @@ def compute_local_data(self, by, load_balance=True): if self.distributed: data = get_local_data(self, by, load_balance) self.local_data = {} - self.local_data['data'] = data - self.local_data['by'] = by + self.local_data["data"] = data + self.local_data["by"] = by else: - raise Exception('Graph should be a distributed graph') + raise Exception("Graph should be a distributed graph") def view_edge_list(self): """ @@ -674,9 +701,10 @@ def compute_renumber_edge_list(self, transposed=False): del self.edgelist renumbered_ddf, number_map = NumberMap.renumber( - self.input_df, self.source_columns, + self.input_df, + self.source_columns, self.destination_columns, - store_transposed=transposed + store_transposed=transposed, ) self.edgelist = self.EdgeList(renumbered_ddf) self.renumber_map = number_map @@ -760,8 +788,11 @@ def view_transposed_adj_list(self): self.adjlist.weights, ) else: - off, ind, vals = \ - graph_primtypes_wrapper.view_transposed_adj_list(self) + ( + off, + ind, + vals, + ) = graph_primtypes_wrapper.view_transposed_adj_list(self) self.transposedadjlist = self.transposedAdjList(off, ind, vals) if self.batch_enabled: @@ -811,12 +842,12 @@ def number_of_vertices(self): if self.node_count is None: if self.distributed: if self.edgelist is not None: - ddf = self.edgelist.edgelist_df[['src', 'dst']] + ddf = self.edgelist.edgelist_df[["src", "dst"]] self.node_count = ddf.max().max().compute() + 1 else: raise Exception("Graph is Empty") elif self.adjlist is not None: - self.node_count = len(self.adjlist.offsets)-1 + self.node_count = len(self.adjlist.offsets) - 1 elif self.transposedadjlist is not None: self.node_count = len(self.transposedadjlist.offsets) - 1 elif self.edgelist is not None: @@ -843,7 +874,7 @@ def number_of_edges(self, directed_edges=False): if self.edgelist is not None: return len(self.edgelist.edgelist_df) else: - raise ValueError('Graph is Empty') + raise ValueError("Graph is Empty") if directed_edges and self.edgelist is not None: return len(self.edgelist.edgelist_df) if self.edge_count is None: @@ -1028,8 +1059,11 @@ def degrees(self, vertex_subset=None): """ if self.distributed: raise Exception("Not supported for distributed graph") - vertex_col, in_degree_col, out_degree_col = \ - graph_primtypes_wrapper._degrees(self) + ( + vertex_col, + in_degree_col, + out_degree_col, + ) = graph_primtypes_wrapper._degrees(self) df = cudf.DataFrame() df["vertex"] = vertex_col @@ -1111,8 +1145,7 @@ def to_undirected(self): >>> G = DiG.to_undirected() """ - if self.distributed: - raise Exception("Not supported for distributed graph") + if type(self) is Graph: return self if type(self) is DiGraph: @@ -1120,6 +1153,7 @@ def to_undirected(self): df = self.edgelist.edgelist_df G.renumbered = self.renumbered G.renumber_map = self.renumber_map + G.multi = self.multi if self.edgelist.weights: source_col, dest_col, value_col = symmetrize( df["src"], df["dst"], df["weights"] @@ -1127,9 +1161,7 @@ def to_undirected(self): else: source_col, dest_col = symmetrize(df["src"], df["dst"]) value_col = None - G.edgelist = Graph.EdgeList( - source_col, dest_col, value_col - ) + G.edgelist = Graph.EdgeList(source_col, dest_col, value_col) return G @@ -1146,7 +1178,7 @@ def has_node(self, n): if self.edgelist is None: raise Exception("Graph has no Edgelist.") if self.distributed: - ddf = self.edgelist.edgelist_df[['src', 'dst']] + ddf = self.edgelist.edgelist_df[["src", "dst"]] return (ddf == n).any().any().compute() if self.renumbered: tmp = self.renumber_map.to_internal_vertex_id(cudf.Series([n])) @@ -1162,19 +1194,19 @@ def has_edge(self, u, v): if self.edgelist is None: raise Exception("Graph has no Edgelist.") if self.renumbered: - tmp = cudf.DataFrame({'src': [u, v]}) - tmp = tmp.astype({'src': 'int'}) + tmp = cudf.DataFrame({"src": [u, v]}) + tmp = tmp.astype({"src": "int"}) tmp = self.add_internal_vertex_id( - tmp, 'id', 'src', preserve_order=True + tmp, "id", "src", preserve_order=True ) - u = tmp['id'][0] - v = tmp['id'][1] + u = tmp["id"][0] + v = tmp["id"][1] df = self.edgelist.edgelist_df if self.distributed: - return ((df['src'] == u) & (df['dst'] == v)).any().compute() - return ((df['src'] == u) & (df['dst'] == v)).any() + return ((df["src"] == u) & (df["dst"] == v)).any().compute() + return ((df["src"] == u) & (df["dst"] == v)).any() def edges(self): """ @@ -1201,11 +1233,11 @@ def nodes(self): return self.renumber_map.implementation.df["0"] else: return cudf.concat([df["src"], df["dst"]]).unique() - if 'all_nodes' in self._nodes.keys(): - return self._nodes['all_nodes'] + if "all_nodes" in self._nodes.keys(): + return self._nodes["all_nodes"] else: - n = cudf.Series(dtype='int') - set_names = [i for i in self._nodes.keys() if i != 'all_nodes'] + n = cudf.Series(dtype="int") + set_names = [i for i in self._nodes.keys() if i != "all_nodes"] for k in set_names: n = n.append(self._nodes[k]) return n @@ -1215,7 +1247,7 @@ def neighbors(self, n): raise Exception("Graph has no Edgelist.") if self.distributed: ddf = self.edgelist.edgelist_df - return ddf[ddf['src'] == n]['dst'].reset_index(drop=True) + return ddf[ddf["src"] == n]["dst"].reset_index(drop=True) if self.renumbered: node = self.renumber_map.to_internal_vertex_id(cudf.Series([n])) if len(node) == 0: @@ -1291,9 +1323,14 @@ def lookup_internal_vertex_id(self, df, column_name=None): """ return self.renumber_map.to_internal_vertex_id(df, column_name) - def add_internal_vertex_id(self, df, internal_column_name, - external_column_name, - drop=True, preserve_order=False): + def add_internal_vertex_id( + self, + df, + internal_column_name, + external_column_name, + drop=True, + preserve_order=False, + ): """ Given a DataFrame containing external vertex ids in the identified columns, return a DataFrame containing the internal vertex ids as the @@ -1325,8 +1362,12 @@ def add_internal_vertex_id(self, df, internal_column_name, id """ return self.renumber_map.add_internal_vertex_id( - df, internal_column_name, external_column_name, - drop, preserve_order) + df, + internal_column_name, + external_column_name, + drop, + preserve_order, + ) class DiGraph(Graph): diff --git a/python/cugraph/structure/symmetrize.py b/python/cugraph/structure/symmetrize.py index cf3a823ca27..6ab34f6687e 100644 --- a/python/cugraph/structure/symmetrize.py +++ b/python/cugraph/structure/symmetrize.py @@ -13,6 +13,7 @@ from cugraph.structure import graph as csg import cudf +import dask_cudf def symmetrize_df(df, src_name, dst_name): @@ -21,19 +22,16 @@ def symmetrize_df(df, src_name, dst_name): the source and destination columns and create a new data frame using the same column names that symmetrize the graph so that all edges appear in both directions. - Note that if other columns exist in the data frame (e.g. edge weights) the other columns will also be replicated. That is, if (u,v,data) represents the source value (u), destination value (v) and some set of other columns (data) in the input data, then the output data will contain both (u,v,data) and (v,u,data) with matching data. - If (u,v,data1) and (v,u,data2) exist in the input data where data1 != data2 then this code will arbitrarily pick the smaller data element to keep, if this is not desired then the caller should should correct the data prior to calling symmetrize. - Parameters ---------- df : cudf.DataFrame @@ -44,14 +42,17 @@ def symmetrize_df(df, src_name, dst_name): Name of the column in the data frame containing the source ids dst_name : string Name of the column in the data frame containing the destination ids - Examples -------- - >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', - >>> dtype=['int32', 'int32', 'float32'], header=None) - >>> sym_df = cugraph.symmetrize(M, '0', '1') - >>> G = cugraph.Graph() - >>> G.add_edge_list(sym_df['0]', sym_df['1'], sym_df['2']) + >>> import cugraph.dask as dcg + >>> Comms.initialize() + >>> chunksize = dcg.get_chunksize(input_data_path) + >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, + delimiter=' ', + names=['src', 'dst', 'weight'], + dtype=['int32', 'int32', 'float32']) + >>> sym_ddf = cugraph.symmetrize_ddf(ddf, "src", "dst", "weight") + >>> Comms.destroy() """ gdf = cudf.DataFrame() @@ -75,28 +76,82 @@ def symmetrize_df(df, src_name, dst_name): return gdf.groupby(by=[src_name, dst_name], as_index=False).min() +def symmetrize_ddf(df, src_name, dst_name, weight_name=None): + """ + Take a COO stored in a distributed DataFrame, and the column names of + the source and destination columns and create a new data frame + using the same column names that symmetrize the graph so that all + edges appear in both directions. + + Note that if other columns exist in the data frame (e.g. edge weights) + the other columns will also be replicated. That is, if (u,v,data) + represents the source value (u), destination value (v) and some + set of other columns (data) in the input data, then the output + data will contain both (u,v,data) and (v,u,data) with matching + data. + + If (u,v,data1) and (v,u,data2) exist in the input data where data1 + != data2 then this code will arbitrarily pick the smaller data + element to keep, if this is not desired then the caller should + should correct the data prior to calling symmetrize. + + Parameters + ---------- + df : dask_cudf.DataFrame + Input data frame containing COO. Columns should contain source + ids, destination ids and any properties associated with the + edges. + src_name : string + Name of the column in the data frame containing the source ids + dst_name : string + Name of the column in the data frame containing the destination ids + + Examples + -------- + >>> M = cudf.read_csv('datasets/karate.csv', delimiter=' ', + >>> dtype=['int32', 'int32', 'float32'], header=None) + >>> sym_df = cugraph.symmetrize(M, '0', '1') + """ + if weight_name: + ddf2 = df[[dst_name, src_name, weight_name]] + ddf2.columns = [src_name, dst_name, weight_name] + else: + ddf2 = df[[dst_name, src_name]] + ddf2.columns = [src_name, dst_name] + + ddf = df.append(ddf2).reset_index(drop=True) + result = ( + ddf.groupby(by=[src_name, dst_name], as_index=False) + .min() + .reset_index() + ) + return result + + def symmetrize(source_col, dest_col, value_col=None): """ - Take a COO set of source destination pairs along with associated values and + Take a COO set of source destination pairs along with associated values + stored in a single GPU or distributed create a new COO set of source destination pairs along with values where all edges exist in both directions. - Return from this call will be a COO stored as two cudf Series - the - symmetrized source column and the symmetrized dest column, along with + Return from this call will be a COO stored as two cudf Series or + dask_cudf.Series -the symmetrized source column and the symmetrized dest + column, along with an optional cudf Series containing the associated values (only if the values are passed in). Parameters ---------- - source_col : cudf.Series + source_col : cudf.Series or dask_cudf.Series This cudf.Series wraps a gdf_column of size E (E: number of edges). The gdf column contains the source index for each edge. Source indices must be an integer type. - dest_col : cudf.Series + dest_col : cudf.Series or dask_cudf.Series This cudf.Series wraps a gdf_column of size E (E: number of edges). The gdf column contains the destination index for each edge. Destination indices must be an integer type. - value_col : cudf.Series (optional) + value_col : cudf.Series or dask_cudf.Series (optional) This cudf.Series wraps a gdf_column of size E (E: number of edges). The gdf column contains values associated with this edge. For this function the values can be any type, they are not @@ -110,19 +165,31 @@ def symmetrize(source_col, dest_col, value_col=None): >>> destinations = cudf.Series(M['1']) >>> values = cudf.Series(M['2']) >>> src, dst, val = cugraph.symmetrize(sources, destinations, values) - >>> G = cugraph.Graph() - >>> G.add_edge_list(src, dst, val) """ - csg.null_check(source_col) - csg.null_check(dest_col) - - input_df = cudf.DataFrame({"source": source_col, "destination": dest_col}) + input_df = None + weight_name = None + if type(source_col) is dask_cudf.Series: + # FIXME convoluted way of just wrapping dask cudf Series in a ddf + input_df = source_col.to_frame() + input_df = input_df.rename(columns={source_col.name: "source"}) + input_df["destination"] = dest_col + else: + input_df = cudf.DataFrame( + {"source": source_col, "destination": dest_col} + ) + csg.null_check(source_col) + csg.null_check(dest_col) if value_col is not None: - csg.null_check(value_col) + weight_name = "value" input_df.insert(len(input_df.columns), "value", value_col) - - output_df = symmetrize_df(input_df, "source", "destination") + output_df = None + if type(source_col) is dask_cudf.Series: + output_df = symmetrize_ddf( + input_df, "source", "destination", weight_name + ) + else: + output_df = symmetrize_df(input_df, "source", "destination") if value_col is not None: return ( @@ -130,5 +197,4 @@ def symmetrize(source_col, dest_col, value_col=None): output_df["destination"], output_df["value"], ) - return output_df["source"], output_df["destination"] diff --git a/python/cugraph/tests/test_symmetrize.py b/python/cugraph/tests/test_symmetrize.py index 4a49eddb70b..4a71dca5e96 100644 --- a/python/cugraph/tests/test_symmetrize.py +++ b/python/cugraph/tests/test_symmetrize.py @@ -19,6 +19,10 @@ import cudf import cugraph from cugraph.tests import utils +import cugraph.comms as Comms +from dask.distributed import Client +from dask_cuda import LocalCUDACluster +from cugraph.dask.common.mg_utils import is_single_gpu def test_version(): @@ -198,13 +202,68 @@ def test_symmetrize_weighted(graph_file): compare(cu_M["0"], cu_M["1"], cu_M["2"], sym_src, sym_dst, sym_w) +@pytest.fixture +def client_connection(): + cluster = LocalCUDACluster() + client = Client(cluster) + Comms.initialize() + + yield client + + Comms.destroy() + client.close() + cluster.close() + + +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_mg_symmetrize(graph_file, client_connection): + gc.collect() + + ddf = utils.read_dask_cudf_csv_file(graph_file) + sym_src, sym_dst = cugraph.symmetrize(ddf["src"], ddf["dst"]) + + # convert to regular cudf to facilitate comparison + df = ddf.compute() + + compare( + df["src"], df["dst"], None, sym_src.compute(), sym_dst.compute(), None + ) + + +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) +def test_mg_symmetrize_df(graph_file, client_connection): + gc.collect() + + ddf = utils.read_dask_cudf_csv_file(graph_file) + sym_ddf = cugraph.symmetrize_ddf(ddf, "src", "dst", "weight") + + # convert to regular cudf to facilitate comparison + df = ddf.compute() + sym_df = sym_ddf.compute() + + compare( + df["src"], + df["dst"], + df["weight"], + sym_df["src"], + sym_df["dst"], + sym_df["weight"], + ) + + # Test # NOTE: see https://github.com/rapidsai/cudf/issues/2636 # drop_duplicates doesn't work well with the pool allocator # list(product([False, True], [False, True]))) -@pytest.mark.parametrize("graph_file", utils.DATASETS) +@pytest.mark.parametrize("graph_file", utils.DATASETS_UNDIRECTED) def test_symmetrize_df(graph_file): gc.collect() From 1f58e1a01e951d703bca1a11289bfdaf697238f1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 30 Sep 2020 19:49:07 -0400 Subject: [PATCH 66/74] FEA Multinode extension for pattern accelerator based PageRank, Katz Centrality, BFS, and SSSP implementations (C++ part) (#1151) * draft pattern accelerator API for the pagerank pattern * implement pagerank using the pattern accelerator API * implement katz centrality using the pattern accelerator API * add handle to the pattern accelerator API * fix minor issues in pagerank & katz_centrality * add a pattern to support BFS * draft implementation of BFS using pattern accelerator APIs * move non-public APIs to the detail namespace * minor tweak to bfs * initial draft of sssp using pattern accelerator * merge e_op and e_pred_op and add reduce_op for bfs & sssp patterns * tweaking patterns for BFS & SSSP for better accelerator implementation * raise abstraction level for vertex queue * direction optimized to direction optimizing in BFS * update comments, class & function names, and several additional API changes to enable optimization * add FIXME comments to remove opg as a template parameter from graph analytics functions * rename frontier to better reflect that it is a froniter on adjacency matrix rows * updated pattern accelerator API for better expressiblity and to enable more performance optimizations in accelerator API implementations * remove template parameter bool opg from graph analytics * remove unnecessary code * .cuh to .cu and explicit instantiation for templated graph analytics functions * "split patterns.hpp to three files" * "add aliases vertex_type, edge_type, weight_type to Graph classes (to support e.g. GraphType::vertex_type)" * "add invalid_vertex_id and invalid_edge_id" * "add traits.hpp (initially supporting is_csr and is_csc)" * "fix typos" * add bfs.cu to CMakeLists.txt * misc. fixes * fix several compile errors * add graph_device_view class * add is_opg to graph classes * add a frontier queue class for pattern accelerators * fix bfs compile errors with pattern accelrator API * initail commit of two level pattern accelrators * fix frontier queue compile errors * few tweaks * initial commit of reduce_op.cuh * improve AdjMatrixRowFrontier implementation * first full implementation of two level patterns for BFS & SSSP * first full ipmlementation of BFS using a pattern accelerator * update copyright year and add min to reduce_op * add sssp to CMakeLists.txt * spilt two_levels_patterns.cuh to one file per pattern * thrust::raw_pointer_cast to data().get() following cuDF's convention * move pattern accelerator related files to the patterns directory * add edge_utils.cuh * add transform_reduce_e pattern accelerator implmentation * add utility functions * update bfs with pattern accelerator implementation * update sssp with pattern accelerator implementation * update graph_device_view * update queue implementation * update expand_and_transform_if_e pattern implementation * placeholder * fix merge error * move implemented patterns out from one_level_patterns.cuh * fix a conceptual bug (row in graph adjacency matrix is always source, CSC is a column major representation) * temporary commit to switch a branch * minor fixes on include statements * add experimental BFS to the test suites * use the real raft handle than the temporary placeholder * add experimental BFS test * several bug fixes * run clang-format * GraphType graph => GraphType const& graph_device_view in pattern accelerator * now BFS passes C++ tests * add depth_limit to the reference BFS implementation * run clang-format * remove dead code * fix to work with new RAFT based error handling mechanism * minor code restructuring * apply cutoff * add SSSP test * cosmetic updates to BFS test * SSSP bug fixes * SSSP code restructuring * update template bfs & sssp functions to take pointers for distances and predecessors instead of general iterators * now SSSP passes C++ tests * add fixme comments * temporary commit to change branch * bug fix in graph_device_view.cuh * compile error fix in bfs_test.cpp * add declarations for PageRank and Katz in algorithms.hpp * bug fix in is_column_major * fix namings * implement patter accelerator APIs for PageRank and Katz Centrality * remove unused file * bug fix * modify reference BFS & SSSP to take pointers instead of iterators * compute adj_matrix_out_weights_sums if not provided (nullptr) in PageRank * rename ..._v..._e to ..._v..._nbr * add utilities for atomic_add (better move this to RAFT) * update experimental SSSP test * bug fix in copy_v_transform_reduce_in|out_nbr * reorder pattern accelerator API input parameters * tweak pattern accelerator API * add PageRank tests with the pattern accelerator API * tweak katz centrality with the pattern accelerator * minor tweak for PageRank test code * add katz centrality test * remove experimental:: from graph classes * style fix (use T{} instead of static_cast for constant values * minor style fix * count_if_adj_matrix_row to any_of_adj_matrix_row (adj_matrix_row values are replicated in p_row processes assuming 2D partitioning and p = p_row * p_col, so count_if can be confusing) * AdjMatrixRowFrontier -> VertexFrontier * break update_frontier_v_push_if_out_nbr to two functions (the second part is replaced with copy_to_adj_matrix_row) * add pure_function flag to reduce_op (if this is defined and set to true, reduction_op can be executed in any GPU in OPG) * add documentation for experimental bfs, sssp, pagerank, and katz_centrality using the pattern accelerator API * add documentation * rename opg to multi-GPU * change get_number_of_edges return type from vertex_type to edge_type * fix compile errors * move the pattern accelerator API out from the detail namespace (this will make migration to RAFT easier) * thrust::cuda::par.on(handle.get_stream()) to rmm::exec_policy(handle.get_stream()).on(handle.get_stream()) * fix typo in comments * escape code from exp_graph.hpp to graph_device_view.cuh * partially update tests to work with the new graph class (more updates are necessary) * temp commit to change branch * update tests to use the new graph class * update algorithm public interface to work with the new graph class * update any_of_adj_matrix_row to support MG * temporary commit for branch change * update to work with the new graph class * fix compile errors * clang-format * replace graph_device_view.cuh with vertex_partition_device.cuh & matrix_partition_device.cuh * undo changes in include/graph.hpp, this file is no longer relevant to this PR * additionally undo changes in include/graph.hpp, this file is no longer relevant to this PR * remove unnecessary comments * remove unnecessary template parameters * add copy_to_adj_matrix_col * replace for loops with thrust algorithms and few minor cosmetic fixes * break unnecessary loop carried dependendy * bug fix * bug fix (previously used plus_thrust_tuple where plus_edge_op_result should be used) * fix erreneous comments * clang-format * fixed a bug (copy_v_transform_reduce_nbr worked with only raw pointer VertexValueOutputIterator type) * update change log * clang-format * remove cuda.cuh (this is replaced by raft) * clang-format * clang-format * update raft tag * multi-gpu extension for count_if_v & count_if_e & reduce_v * transform_reduce_v * mark FIXME * multi-gpu extension for transform_reduce_v_with_adj_matrix_row * remove unecessary code * update sG interface of graph_view.hpp to mirror MG interface * replace comm_p_row_key & comm_p_col_key with key_naming_t().row_name() and col_name() * delete unused code * update comment about hyper-graph partitioning based matrix partitioning * add missing #include * move block_reduce_edge_op_result form transform_reduce_e.cuh to edge_op_result.cuh * fix misnomer * add warp_reduce_edge_op_result * add vectorized (warp, block) functions * update to use collective_utils.cuh * add additional functions to graph_view * fixed confusing variable names * reorder functions to be consistent with elsewhere * fix a compiler error * accomodate the change of raft allgahterv's input parameter displs type from int[] to size_t[] * update change log * update change log * update RAFT tag * temporary commit of copy_to_adj_matrix_row.cuh and collective_utils.cuh to checkout another branch * fix a bug in matrix partitioning ranges * temporary commit * fix errors in previous merge conflicts * extend copy_to_adj_matrix_row.cuh for MNMG * rename collective_utils.cuh to comm_utils.cuh * rename copy_v_transform_reduce_nbr.cuh to copy_v_transform_reduce_in_out_nbr.cuh * merge copy_to_adj_matrix_row.cuh & copy_to_adj_matrix_col.cuh * extend copy_v_transform_reduce_(in|out)_nbr for MNMG * extend Bucket for MNMG * add get_vertex_partition_size * add more explicit instantiation cases for BFS, SSSP, PageRank, KatzCentrality * extend update_frontier_v_push_if_out_nbr.cuh for MNMG * code refinement * refactor copy_v_transform_reduce_in|out_nbr * bug fix (thanks Rick) * clang-format * bug fix * bug fixes * bug fix * add comments explaining major/minor * update change log * bug fix (calling raft communication collective with host memory pointer) --- CHANGELOG.md | 1 + cpp/include/experimental/graph.hpp | 7 +- cpp/include/experimental/graph_view.hpp | 125 ++- cpp/include/partition_manager.hpp | 7 - .../patterns/any_of_adj_matrix_row.cuh | 3 +- .../patterns/copy_to_adj_matrix_col.cuh | 127 --- .../patterns/copy_to_adj_matrix_row.cuh | 127 --- .../patterns/copy_to_adj_matrix_row_col.cuh | 576 +++++++++++++ .../copy_v_transform_reduce_in_out_nbr.cuh | 643 ++++++++++++++ .../patterns/copy_v_transform_reduce_nbr.cuh | 351 -------- cpp/include/patterns/count_if_e.cuh | 21 +- cpp/include/patterns/count_if_v.cuh | 7 +- cpp/include/patterns/edge_op_utils.cuh | 52 +- cpp/include/patterns/reduce_v.cuh | 7 +- cpp/include/patterns/transform_reduce_e.cuh | 40 +- cpp/include/patterns/transform_reduce_v.cuh | 7 +- ...transform_reduce_v_with_adj_matrix_row.cuh | 52 +- .../update_frontier_v_push_if_out_nbr.cuh | 372 ++++++--- cpp/include/patterns/vertex_frontier.cuh | 5 +- cpp/include/utilities/comm_utils.cuh | 788 ++++++++++++++++++ cpp/include/utilities/thrust_tuple_utils.cuh | 50 +- cpp/src/experimental/bfs.cu | 92 +- cpp/src/experimental/graph.cu | 12 +- cpp/src/experimental/katz_centrality.cu | 126 ++- cpp/src/experimental/pagerank.cu | 136 ++- cpp/src/experimental/sssp.cu | 84 +- 26 files changed, 2959 insertions(+), 859 deletions(-) delete mode 100644 cpp/include/patterns/copy_to_adj_matrix_col.cuh delete mode 100644 cpp/include/patterns/copy_to_adj_matrix_row.cuh create mode 100644 cpp/include/patterns/copy_to_adj_matrix_row_col.cuh create mode 100644 cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh delete mode 100644 cpp/include/patterns/copy_v_transform_reduce_nbr.cuh create mode 100644 cpp/include/utilities/comm_utils.cuh diff --git a/CHANGELOG.md b/CHANGELOG.md index d1032fccf25..eafd31a5933 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ - PR #838 Add pattern accelerator API functions and pattern accelerator API based implementations of PageRank, Katz Centrality, BFS, and SSSP - PR #1147 Added support for NetworkX graphs as input type - PR #1157 Louvain API update to use graph_container_t +- PR #1151 MNMG extension for pattern accelerator based PageRank, Katz Centrality, BFS, and SSSP implementations (C++ part) ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree diff --git a/cpp/include/experimental/graph.hpp b/cpp/include/experimental/graph.hpp index a6e22ce7009..88c84414cd0 100644 --- a/cpp/include/experimental/graph.hpp +++ b/cpp/include/experimental/graph.hpp @@ -70,11 +70,6 @@ class graph_t view() { std::vector offsets(adj_matrix_partition_offsets_.size(), nullptr); @@ -186,4 +181,4 @@ struct invalid_edge_id : invalid_idx { }; } // namespace experimental -} // namespace cugraph \ No newline at end of file +} // namespace cugraph diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 0e0bf40a01d..c655b1451ca 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -34,21 +34,28 @@ namespace experimental { /** * @brief store vertex partitioning map * - * Say P = P_row * P_col GPUs. We need to partition 1D vertex arrays (storing per vertex values) and - * the 2D graph adjacency matrix (or transposed 2D graph adjacency matrix) of G. An 1D vertex array - * of size V is divided to P linear partitions; each partition has the size close to V / P. We - * consider two different strategies to partition the 2D matrix: the default strategy and the - * hypergraph partitioning based strategy (the latter is for future extension). + * Say P = P_row * P_col GPUs. For communication, we need P_row row communicators of size P_col and + * P_col column communicators of size P_row. row_comm_size = P_col and col_comm_size = P_row. + * row_comm_rank & col_comm_rank are ranks within the row & column communicators, respectively. + * + * We need to partition 1D vertex arrays (storing per vertex values) and the 2D graph adjacency + * matrix (or transposed 2D graph adjacency matrix) of G. An 1D vertex array of size V is divided to + * P linear partitions; each partition has the size close to V / P. We consider two different + * strategies to partition the 2D matrix: the default strategy and the hypergraph partitioning based + * strategy (the latter is for future extension). + * FIXME: in the future we may use the latter for both as this leads to simpler communication + * patterns and better control over parallelism vs memory footprint trade-off. * * In the default case, one GPU will be responsible for 1 rectangular partition. The matrix will be * horizontally partitioned first to P_row slabs. Each slab will be further vertically partitioned * to P_col rectangles. Each rectangular partition will have the size close to V / P_row by V / * P_col. * - * To be more specific, a GPU with (row_rank, col_rank) will be responsible for one rectangular - * partition [a,b) by [c,d) where a = vertex_partition_offsets[P_col * row_rank], b = - * vertex_partition_offsets[p_col * (row_rank + 1)], c = vertex_partition_offsets[P_row * col_rank], - * and d = vertex_partition_offsets[p_row * (col_rank + 1)] + * To be more specific, a GPU with (col_comm_rank, row_comm_rank) will be responsible for one + * rectangular partition [a,b) by [c,d) where a = vertex_partition_offsets[row_comm_size * + * col_comm_rank], b = vertex_partition_offsets[row_comm_size * (col_comm_rank + 1)], c = + * vertex_partition_offsets[col_comm_size * row_comm_rank], and d = + * vertex_partition_offsets[col_comm_size * (row_comm_rank + 1)]. * * In the future, we may apply hyper-graph partitioning to divide V vertices to P groups minimizing * edge cuts across groups while balancing the number of vertices in each group. We will also @@ -56,13 +63,16 @@ namespace experimental { * will be more non-zeros in the diagonal partitions of the 2D graph adjacency matrix (or the * transposed 2D graph adjacency matrix) than the off-diagonal partitions. The default strategy does * not balance the number of nonzeros if hyper-graph partitioning is applied. To solve this problem, - * the matrix is first horizontally partitioned to P (instead of P_row) slabs, then each slab will - * be further vertically partitioned to P_col rectangles. One GPU will be responsible P_col - * rectangular partitions in this case. + * the matrix is first horizontally partitioned to P slabs, then each slab will be further + * vertically partitioned to P_row (instead of P_col in the default case) rectangles. One GPU will + * be responsible col_comm_size rectangular partitions in this case. * - * To be more specific, a GPU with (row_rank, col_rank) will be responsible for P_col rectangular - * partitions [a_i,b_i) by [c,d) where a_i = vertex_partition_offsets[P_row * i + row_rank] and b_i - * = vertex_partition_offsets[P_row * i + row_rank + 1]. c and d are same to 1) and i = [0, P_col). + * To be more specific, a GPU with (col_comm_rank, row_comm_rank) will be responsible for + * col_comm_size rectangular partitions [a_i,b_i) by [c,d) where a_i = + * vertex_partition_offsets[row_comm_size * i + row_comm_rank] and b_i = + * vertex_partition_offsets[row_comm_size * i + row_comm_rank + 1]. c is + * vertex_partition_offsets[row_comm_size * col_comm_rank] and d = + * vertex_partition_offsests[row_comm_size * (col_comm_rank + 1)]. * * See E. G. Boman et. al., “Scalable matrix computations on large scale-free graphs using 2D graph * partitioning”, 2013 for additional detail. @@ -87,7 +97,7 @@ class partition_t { col_comm_rank_(col_comm_rank) { CUGRAPH_EXPECTS( - vertex_partition_offsets.size() == static_cast(row_comm_size * col_comm_size), + vertex_partition_offsets.size() == static_cast(row_comm_size * col_comm_size + 1), "Invalid API parameter: erroneous vertex_partition_offsets.size()."); CUGRAPH_EXPECTS( @@ -104,15 +114,15 @@ class partition_t { } } - std::tuple get_vertex_partition_range() const + std::tuple get_local_vertex_range() const { return std::make_tuple(vertex_partition_offsets_[comm_rank_], vertex_partition_offsets_[comm_rank_ + 1]); } - vertex_t get_vertex_partition_first() const { return vertex_partition_offsets_[comm_rank_]; } + vertex_t get_local_vertex_first() const { return vertex_partition_offsets_[comm_rank_]; } - vertex_t get_vertex_partition_last() const { return vertex_partition_offsets_[comm_rank_ + 1]; } + vertex_t get_local_vertex_last() const { return vertex_partition_offsets_[comm_rank_ + 1]; } std::tuple get_vertex_partition_range(size_t vertex_partition_idx) const { @@ -130,22 +140,23 @@ class partition_t { return vertex_partition_offsets_[vertex_partition_idx + 1]; } + vertex_t get_vertex_partition_size(size_t vertex_partition_idx) const + { + return get_vertex_partition_last(vertex_partition_idx) - + get_vertex_partition_first(vertex_partition_idx); + } + size_t get_number_of_matrix_partitions() const { return hypergraph_partitioned_ ? col_comm_size_ : 1; } + // major: row of the graph adjacency matrix (if the graph adjacency matrix is stored as is) or + // column of the graph adjacency matrix (if the transposed graph adjacency matrix is stored). std::tuple get_matrix_partition_major_range(size_t partition_idx) const { - auto major_first = - hypergraph_partitioned_ - ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_] - : vertex_partition_offsets_[row_comm_rank_ * col_comm_size_]; - auto major_last = - hypergraph_partitioned_ - ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_ + 1] - : vertex_partition_offsets_[(row_comm_rank_ + 1) * col_comm_size_]; - + auto major_first = get_matrix_partition_major_first(partition_idx); + auto major_last = get_matrix_partition_major_last(partition_idx); return std::make_tuple(major_first, major_last); } @@ -153,14 +164,14 @@ class partition_t { { return hypergraph_partitioned_ ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_] - : vertex_partition_offsets_[row_comm_rank_ * col_comm_size_]; + : vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; } vertex_t get_matrix_partition_major_last(size_t partition_idx) const { return hypergraph_partitioned_ ? vertex_partition_offsets_[row_comm_size_ * partition_idx + row_comm_rank_ + 1] - : vertex_partition_offsets_[(row_comm_rank_ + 1) * col_comm_size_]; + : vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; } vertex_t get_matrix_partition_major_value_start_offset(size_t partition_idx) const @@ -168,24 +179,31 @@ class partition_t { return matrix_partition_major_value_start_offsets_[partition_idx]; } + // minor: column of the graph adjacency matrix (if the graph adjacency matrix is stored as is) or + // row of the graph adjacency matrix (if the transposed graph adjacency matrix is stored). std::tuple get_matrix_partition_minor_range() const { - auto minor_first = vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; - auto minor_last = vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; + auto minor_first = get_matrix_partition_minor_first(); + auto minor_last = get_matrix_partition_minor_last(); return std::make_tuple(minor_first, minor_last); } vertex_t get_matrix_partition_minor_first() const { - return vertex_partition_offsets_[col_comm_rank_ * row_comm_size_]; + return hypergraph_partitioned_ ? vertex_partition_offsets_[col_comm_rank_ * row_comm_size_] + : vertex_partition_offsets_[row_comm_rank_ * col_comm_size_]; } vertex_t get_matrix_partition_minor_last() const { - return vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_]; + return hypergraph_partitioned_ + ? vertex_partition_offsets_[(col_comm_rank_ + 1) * row_comm_size_] + : vertex_partition_offsets_[(row_comm_rank_ + 1) * col_comm_size_]; } + // FIXME: this function may be removed if we use the same partitioning strategy whether hypergraph + // partitioning is applied or not bool is_hypergraph_partitioned() const { return hypergraph_partitioned_; } private: @@ -305,12 +323,28 @@ class graph_view_tget_number_of_vertices(); } + vertex_t get_vertex_partition_first(size_t vertex_partition_idx) const { return vertex_t{0}; } + + vertex_t get_vertex_partition_last(size_t vertex_partition_idx) const + { + return this->get_number_of_vertices(); + } + + vertex_t get_vertex_partition_size(size_t vertex_partition_idx) const + { + return get_vertex_partition_last(vertex_partition_idx) - + get_vertex_partition_first(vertex_partition_idx); + } + constexpr bool is_local_vertex_nocheck(vertex_t v) const { return true; } constexpr size_t get_number_of_local_adj_matrix_partitions() const { return size_t(1); } @@ -525,6 +574,8 @@ class graph_view_t -std::string to_string(from_t const& value) -{ - std::stringstream ss; - ss << value; - return ss.str(); -} // default key-naming mechanism: // diff --git a/cpp/include/patterns/any_of_adj_matrix_row.cuh b/cpp/include/patterns/any_of_adj_matrix_row.cuh index e75273272e5..199e7c230ef 100644 --- a/cpp/include/patterns/any_of_adj_matrix_row.cuh +++ b/cpp/include/patterns/any_of_adj_matrix_row.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -63,7 +64,7 @@ bool any_of_adj_matrix_row(raft::handle_t const& handle, adj_matrix_row_value_input_first + graph_view.get_number_of_local_adj_matrix_partition_rows(), row_op); if (GraphViewType::is_multi_gpu) { - handle.get_comms().allreduce(&count, &count, 1, raft::comms::op_t::SUM, handle.get_stream()); + count = host_scalar_allreduce(handle.get_comms(), count, handle.get_stream()); } return (count > 0); } diff --git a/cpp/include/patterns/copy_to_adj_matrix_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_col.cuh deleted file mode 100644 index 35f757ccd60..00000000000 --- a/cpp/include/patterns/copy_to_adj_matrix_col.cuh +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include -#include - -#include -#include -#include - -namespace cugraph { -namespace experimental { - -/** - * @brief Copy vertex property values to the corresponding graph adjacency matrix column property - * variables. - * - * This version fills the entire set of graph adjacency matrix column property values. This function - * is inspired by thrust::copy(). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexValueInputIterator Type of the iterator for vertex properties. - * @tparam AdjMatrixColValueOutputIterator Type of the iterator for graph adjacency matrix column - * output property variables. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param vertex_value_input_first Iterator pointing to the vertex properties for the first - * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) - * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). - * @param adj_matrix_col_value_output_first Iterator pointing to the adjacency matrix column output - * property variables for the first (inclusive) column (assigned to this process in multi-GPU). - * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). - */ -template -void copy_to_adj_matrix_col(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexValueInputIterator vertex_value_input_first, - AdjMatrixColValueOutputIterator adj_matrix_col_value_output_first) -{ - if (GraphViewType::is_multi_gpu) { - CUGRAPH_FAIL("unimplemented."); - } else { - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_cols()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_input_first, - vertex_value_input_first + graph_view.get_number_of_local_vertices(), - adj_matrix_col_value_output_first); - } -} - -/** - * @brief Copy vertex property values to the corresponding graph adjacency matrix column property - * variables. - * - * This version fills only a subset of graph adjacency matrix column property values. [@p - * vertex_first, @p vertex_last) specifies the vertices with new values to be copied to graph - * adjacency matrix column property variables. This function is inspired by thrust::copy(). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexIterator Type of the iterator for vertex identifiers. - * @tparam VertexValueInputIterator Type of the iterator for vertex properties. - * @tparam AdjMatrixColValueOutputIterator Type of the iterator for graph adjacency matrix column - * output property variables. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param vertex_first Iterator pointing to the first (inclusive) vertex with new values to be - * copied. v in [vertex_first, vertex_last) should be distinct (and should belong to this process in - * multi-GPU), otherwise undefined behavior - * @param vertex_last Iterator pointing to the last (exclusive) vertex with new values to be copied. - * @param vertex_value_input_first Iterator pointing to the vertex properties for the first - * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) - * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). - * @param adj_matrix_col_value_output_first Iterator pointing to the adjacency matrix column output - * property variables for the first (inclusive) column (assigned to this process in multi-GPU). - * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). - */ -template -void copy_to_adj_matrix_col(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, - VertexValueInputIterator vertex_value_input_first, - AdjMatrixColValueOutputIterator adj_matrix_col_value_output_first) -{ - if (GraphViewType::is_multi_gpu) { - CUGRAPH_FAIL("unimplemented."); - } else { - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_cols()); - auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first); - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - val_first, - val_first + thrust::distance(vertex_first, vertex_last), - vertex_first, - adj_matrix_col_value_output_first); - } -} - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/patterns/copy_to_adj_matrix_row.cuh b/cpp/include/patterns/copy_to_adj_matrix_row.cuh deleted file mode 100644 index 507edf44f07..00000000000 --- a/cpp/include/patterns/copy_to_adj_matrix_row.cuh +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include - -#include -#include - -#include -#include -#include - -namespace cugraph { -namespace experimental { - -/** - * @brief Copy vertex property values to the corresponding graph adjacency matrix row property - * variables. - * - * This version fills the entire set of graph adjacency matrix row property values. This function is - * inspired by thrust::copy(). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexValueInputIterator Type of the iterator for vertex properties. - * @tparam AdjMatrixRowValueOutputIterator Type of the iterator for graph adjacency matrix row - * output property variables. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param vertex_value_input_first Iterator pointing to the vertex properties for the first - * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) - * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). - * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output - * property variables for the first (inclusive) row (assigned to this process in multi-GPU). - * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first - * + @p graph_view.get_number_of_local_adj_matrix_partition_rows(). - */ -template -void copy_to_adj_matrix_row(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexValueInputIterator vertex_value_input_first, - AdjMatrixRowValueOutputIterator adj_matrix_row_value_output_first) -{ - if (GraphViewType::is_multi_gpu) { - CUGRAPH_FAIL("unimplemented."); - } else { - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_rows()); - thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_input_first, - vertex_value_input_first + graph_view.get_number_of_local_vertices(), - adj_matrix_row_value_output_first); - } -} - -/** - * @brief Copy vertex property values to the corresponding graph adjacency matrix row property - * variables. - * - * This version fills only a subset of graph adjacency matrix row property values. [@p vertex_first, - * @p vertex_last) specifies the vertices with new values to be copied to graph adjacency matrix row - * property variables. This function is inspired by thrust::copy(). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam VertexIterator Type of the iterator for vertex identifiers. - * @tparam VertexValueInputIterator Type of the iterator for vertex properties. - * @tparam AdjMatrixRowValueOutputIterator Type of the iterator for graph adjacency matrix row - * output property variables. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param vertex_first Iterator pointing to the first (inclusive) vertex with new values to be - * copied. v in [vertex_first, vertex_last) should be distinct (and should belong to this process in - * multi-GPU), otherwise undefined behavior - * @param vertex_last Iterator pointing to the last (exclusive) vertex with new values to be copied. - * @param vertex_value_input_first Iterator pointing to the vertex properties for the first - * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) - * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). - * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output - * property variables for the first (inclusive) row (assigned to this process in multi-GPU). - * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first - * + @p graph_view.get_number_of_local_adj_matrix_partition_rows(). - */ -template -void copy_to_adj_matrix_row(raft::handle_t const& handle, - GraphViewType const& graph_view, - VertexIterator vertex_first, - VertexIterator vertex_last, - VertexValueInputIterator vertex_value_input_first, - AdjMatrixRowValueOutputIterator adj_matrix_row_value_output_first) -{ - if (GraphViewType::is_multi_gpu) { - CUGRAPH_FAIL("unimplemented."); - } else { - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_rows()); - auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first); - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - val_first, - val_first + thrust::distance(vertex_first, vertex_last), - vertex_first, - adj_matrix_row_value_output_first); - } -} - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh new file mode 100644 index 00000000000..e8e11b85913 --- /dev/null +++ b/cpp/include/patterns/copy_to_adj_matrix_row_col.cuh @@ -0,0 +1,576 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +#include +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +void copy_to_matrix_major(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + MatrixMajorValueOutputIterator matrix_major_value_output_first) +{ + if (GraphViewType::is_multi_gpu) { + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + std::vector rx_counts(row_comm_size, size_t{0}); + std::vector displacements(row_comm_size, size_t{0}); + for (int i = 0; i < row_comm_size; ++i) { + rx_counts[i] = graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i); + displacements[i] = (i == 0) ? 0 : displacements[i - 1] + rx_counts[i - 1]; + } + device_allgatherv(row_comm, + vertex_value_input_first, + matrix_major_value_output_first, + rx_counts, + displacements, + handle.get_stream()); + } + } else { + assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed + ? graph_view.get_number_of_adj_matrix_local_cols() + : graph_view.get_number_of_adj_matrix_local_rows()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + matrix_major_value_output_first); + } +} + +template +void copy_to_matrix_major(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + VertexValueInputIterator vertex_value_input_first, + MatrixMajorValueOutputIterator matrix_major_value_output_first) +{ + using vertex_t = typename GraphViewType::vertex_type; + + if (GraphViewType::is_multi_gpu) { + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + auto rx_counts = + host_scalar_allgather(row_comm, + static_cast(thrust::distance(vertex_first, vertex_last)), + handle.get_stream()); + std::vector displacements(row_comm_size, size_t{0}); + std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, displacements.begin() + 1); + + matrix_partition_device_t matrix_partition(graph_view, 0); + for (int i = 0; i < row_comm_size; ++i) { + rmm::device_uvector rx_vertices(rx_counts[i], handle.get_stream()); + auto rx_tmp_buffer = + allocate_comm_buffer::value_type>( + rx_counts[i], handle.get_stream()); + auto rx_value_first = get_comm_buffer_begin< + typename std::iterator_traits::value_type>(rx_tmp_buffer); + + if (i == row_comm_rank) { + // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a + // permutation iterator (and directly gathers to the internal buffer) + thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_first, + vertex_last, + vertex_value_input_first, + rx_value_first); + } + + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + device_bcast( + row_comm, vertex_first, rx_vertices.begin(), rx_counts[i], i, handle.get_stream()); + device_bcast( + row_comm, rx_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); + + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_major_offset_from_major_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_major_value_output_first); + } + } + } else { + assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed + ? graph_view.get_number_of_adj_matrix_local_cols() + : graph_view.get_number_of_adj_matrix_local_rows()); + auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first); + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + val_first, + val_first + thrust::distance(vertex_first, vertex_last), + vertex_first, + matrix_major_value_output_first); + } +} + +template +void copy_to_matrix_minor(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + MatrixMinorValueOutputIterator matrix_minor_value_output_first) +{ + if (GraphViewType::is_multi_gpu) { + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph + // partitioning + auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; + auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; + auto constexpr tuple_size = thrust_tuple_size_or_one< + typename std::iterator_traits::value_type>::value; + std::vector requests(2 * tuple_size); + device_isend( + comm, + vertex_value_input_first, + static_cast(graph_view.get_number_of_local_vertices()), + comm_dst_rank, + int{0} /* base_tag */, + requests.data()); + device_irecv( + comm, + matrix_minor_value_output_first + + (graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + col_comm_rank) - + graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)), + static_cast(graph_view.get_vertex_partition_size(comm_src_rank)), + comm_src_rank, + int{0} /* base_tag */, + requests.data() + tuple_size); + // FIXME: this waitall can fail if MatrixMinorValueOutputIterator is a discard iterator or a + // zip iterator having one or more discard iterator + comm.waitall(requests.size(), requests.data()); + + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + for (int i = 0; i < col_comm_size; ++i) { + auto offset = graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - + graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size); + auto count = graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + i); + device_bcast(col_comm, + matrix_minor_value_output_first + offset, + matrix_minor_value_output_first + offset, + count, + i, + handle.get_stream()); + } + } + } else { + assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed + ? graph_view.get_number_of_adj_matrix_local_rows() + : graph_view.get_number_of_adj_matrix_local_cols()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + matrix_minor_value_output_first); + } +} + +template +void copy_to_matrix_minor(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + VertexValueInputIterator vertex_value_input_first, + MatrixMinorValueOutputIterator matrix_minor_value_output_first) +{ + using vertex_t = typename GraphViewType::vertex_type; + + if (GraphViewType::is_multi_gpu) { + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + // FIXME: this P2P is unnecessary if apply the same partitioning scheme regardless of + // hypergraph partitioning is applied or not + auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; + auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; + auto constexpr tuple_size = thrust_tuple_size_or_one< + typename std::iterator_traits::value_type>::value; + + std::vector count_requests(2); + auto tx_count = thrust::distance(vertex_first, vertex_last); + auto rx_count = tx_count; + comm.isend(&tx_count, 1, comm_dst_rank, 0 /* tag */, count_requests.data()); + comm.irecv(&rx_count, 1, comm_src_rank, 0 /* tag */, count_requests.data() + 1); + comm.waitall(count_requests.size(), count_requests.data()); + + auto src_tmp_buffer = + allocate_comm_buffer::value_type>( + tx_count, handle.get_stream()); + auto src_value_first = + get_comm_buffer_begin::value_type>( + src_tmp_buffer); + + rmm::device_uvector dst_vertices(rx_count, handle.get_stream()); + auto dst_tmp_buffer = + allocate_comm_buffer::value_type>( + rx_count, handle.get_stream()); + auto dst_value_first = + get_comm_buffer_begin::value_type>( + dst_tmp_buffer); + + thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_first, + vertex_last, + vertex_value_input_first, + src_value_first); + + std::vector value_requests(2 * (1 + tuple_size)); + device_isend( + comm, vertex_first, tx_count, comm_dst_rank, int{0} /* base_tag */, value_requests.data()); + device_isend(comm, + src_value_first, + tx_count, + comm_dst_rank, + int{1} /* base_tag */, + value_requests.data() + 1); + device_irecv( + comm, + dst_vertices.begin(), + rx_count, + comm_src_rank, + int{0} /* base_tag */, + value_requests.data() + (1 + tuple_size)); + device_irecv( + comm, + dst_value_first, + rx_count, + comm_src_rank, + int{0} /* base_tag */, + value_requests.data() + ((1 + tuple_size) + 1)); + // FIXME: this waitall can fail if MatrixMinorValueOutputIterator is a discard iterator or a + // zip iterator having one or more discard iterator + comm.waitall(value_requests.size(), value_requests.data()); + + // FIXME: now we can clear tx_tmp_buffer + + auto rx_counts = host_scalar_allgather(col_comm, rx_count, handle.get_stream()); + std::vector displacements(col_comm_size, size_t{0}); + std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, displacements.begin() + 1); + + matrix_partition_device_t matrix_partition(graph_view, 0); + for (int i = 0; i < col_comm_size; ++i) { + rmm::device_uvector rx_vertices(rx_counts[i], handle.get_stream()); + auto rx_tmp_buffer = + allocate_comm_buffer::value_type>( + rx_counts[i], handle.get_stream()); + auto rx_value_first = get_comm_buffer_begin< + typename std::iterator_traits::value_type>(rx_tmp_buffer); + + // FIXME: these broadcast operations can be placed between ncclGroupStart() and + // ncclGroupEnd() + device_bcast(col_comm, + dst_vertices.begin(), + rx_vertices.begin(), + rx_counts[i], + i, + handle.get_stream()); + device_bcast( + col_comm, dst_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); + + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_minor_offset_from_minor_nocheck(v); + }); + + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_minor_value_output_first); + } + } + } else { + assert(graph_view.get_number_of_local_vertices() == + graph_view.get_number_of_adj_matrix_local_rows()); + auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first); + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + val_first, + val_first + thrust::distance(vertex_first, vertex_last), + vertex_first, + matrix_minor_value_output_first); + } +} + +} // namespace detail + +/** + * @brief Copy vertex property values to the corresponding graph adjacency matrix row property + * variables. + * + * This version fills the entire set of graph adjacency matrix row property values. This function is + * inspired by thrust::copy(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixRowValueOutputIterator Type of the iterator for graph adjacency matrix row + * output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output + * property variables for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_rows(). + */ +template +void copy_to_adj_matrix_row(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixRowValueOutputIterator adj_matrix_row_value_output_first) +{ + if (GraphViewType::is_adj_matrix_transposed) { + copy_to_matrix_minor( + handle, graph_view, vertex_value_input_first, adj_matrix_row_value_output_first); + } else { + copy_to_matrix_major( + handle, graph_view, vertex_value_input_first, adj_matrix_row_value_output_first); + } +} + +/** + * @brief Copy vertex property values to the corresponding graph adjacency matrix row property + * variables. + * + * This version fills only a subset of graph adjacency matrix row property values. [@p vertex_first, + * @p vertex_last) specifies the vertices with new values to be copied to graph adjacency matrix row + * property variables. This function is inspired by thrust::copy(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexIterator Type of the iterator for vertex identifiers. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixRowValueOutputIterator Type of the iterator for graph adjacency matrix row + * output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_first Iterator pointing to the first (inclusive) vertex with new values to be + * copied. v in [vertex_first, vertex_last) should be distinct (and should belong to this process in + * multi-GPU), otherwise undefined behavior + * @param vertex_last Iterator pointing to the last (exclusive) vertex with new values to be copied. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output + * property variables for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_rows(). + */ +template +void copy_to_adj_matrix_row(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixRowValueOutputIterator adj_matrix_row_value_output_first) +{ + if (GraphViewType::is_adj_matrix_transposed) { + copy_to_matrix_minor(handle, + graph_view, + vertex_first, + vertex_last, + vertex_value_input_first, + adj_matrix_row_value_output_first); + } else { + copy_to_matrix_major(handle, + graph_view, + vertex_first, + vertex_last, + vertex_value_input_first, + adj_matrix_row_value_output_first); + } +} + +/** + * @brief Copy vertex property values to the corresponding graph adjacency matrix column property + * variables. + * + * This version fills the entire set of graph adjacency matrix column property values. This function + * is inspired by thrust::copy(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixColValueOutputIterator Type of the iterator for graph adjacency matrix column + * output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_col_value_output_first Iterator pointing to the adjacency matrix column output + * property variables for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + */ +template +void copy_to_adj_matrix_col(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixColValueOutputIterator adj_matrix_col_value_output_first) +{ + if (GraphViewType::is_adj_matrix_transposed) { + copy_to_matrix_major( + handle, graph_view, vertex_value_input_first, adj_matrix_col_value_output_first); + } else { + copy_to_matrix_minor( + handle, graph_view, vertex_value_input_first, adj_matrix_col_value_output_first); + } +} + +/** + * @brief Copy vertex property values to the corresponding graph adjacency matrix column property + * variables. + * + * This version fills only a subset of graph adjacency matrix column property values. [@p + * vertex_first, @p vertex_last) specifies the vertices with new values to be copied to graph + * adjacency matrix column property variables. This function is inspired by thrust::copy(). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam VertexIterator Type of the iterator for vertex identifiers. + * @tparam VertexValueInputIterator Type of the iterator for vertex properties. + * @tparam AdjMatrixColValueOutputIterator Type of the iterator for graph adjacency matrix column + * output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param vertex_first Iterator pointing to the first (inclusive) vertex with new values to be + * copied. v in [vertex_first, vertex_last) should be distinct (and should belong to this process in + * multi-GPU), otherwise undefined behavior + * @param vertex_last Iterator pointing to the last (exclusive) vertex with new values to be copied. + * @param vertex_value_input_first Iterator pointing to the vertex properties for the first + * (inclusive) vertex (assigned to this process in multi-GPU). `vertex_value_input_last` (exclusive) + * is deduced as @p vertex_value_input_first + @p graph_view.get_number_of_local_vertices(). + * @param adj_matrix_col_value_output_first Iterator pointing to the adjacency matrix column output + * property variables for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + */ +template +void copy_to_adj_matrix_col(raft::handle_t const& handle, + GraphViewType const& graph_view, + VertexIterator vertex_first, + VertexIterator vertex_last, + VertexValueInputIterator vertex_value_input_first, + AdjMatrixColValueOutputIterator adj_matrix_col_value_output_first) +{ + if (GraphViewType::is_adj_matrix_transposed) { + copy_to_matrix_major(handle, + graph_view, + vertex_first, + vertex_last, + vertex_value_input_first, + adj_matrix_col_value_output_first); + } else { + copy_to_matrix_minor(handle, + graph_view, + vertex_first, + vertex_last, + vertex_value_input_first, + adj_matrix_col_value_output_first); + } +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh new file mode 100644 index 00000000000..7737a6b875c --- /dev/null +++ b/cpp/include/patterns/copy_v_transform_reduce_in_out_nbr.cuh @@ -0,0 +1,643 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// FIXME: block size requires tuning +int32_t constexpr copy_v_transform_reduce_nbr_for_all_block_size = 128; + +#if 0 +// FIXME: delete this once we verify that the thrust replace in for_all_major_for_all_nbr_low_degree is no slower than the original for loop based imoplementation +template +__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) +{ + lhs = plus_edge_op_result(lhs, rhs); +} + +template +__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) +{ + atomic_add(&lhs, rhs); +} +#endif + +template +__global__ void for_all_major_for_all_nbr_low_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type row_first, + typename GraphViewType::vertex_type row_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultValueOutputIterator result_value_output_first, + EdgeOp e_op, + T init /* relevent only if update_major == true */) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = T; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto idx = + static_cast(row_first - matrix_partition.get_major_first()) + static_cast(tid); + + while (idx < static_cast(row_last - matrix_partition.get_major_first())) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = + matrix_partition.get_local_edges(static_cast(idx)); +#if 1 + auto transform_op = [&matrix_partition, + &adj_matrix_row_value_input_first, + &adj_matrix_col_value_input_first, + &e_op, + idx, + indices, + weights] __device__(auto i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + return evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + }; + + if (update_major) { + *(result_value_output_first + idx) = thrust::transform_reduce( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_op, + init, + [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + } else { + thrust::for_each( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + [&matrix_partition, indices, &result_value_output_first, &transform_op] __device__(auto i) { + auto e_op_result = transform_op(i); + auto minor = indices[i]; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); + }); + } +#else + // FIXME: delete this once we verify that the code above is not slower than this. + e_op_result_t e_op_result_sum{init}; // relevent only if update_major == true + for (edge_t i = 0; i < local_degree; ++i) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (update_major) { + accumulate_edge_op_result(e_op_result_sum, e_op_result); + } else { + accumulate_edge_op_result(*(result_value_output_first + minor_offset), + e_op_result); + } + } + if (update_major) { *(result_value_output_first + idx) = e_op_result_sum; } +#endif + idx += gridDim.x * blockDim.x; + } +} + +template +__global__ void for_all_major_for_all_nbr_mid_degree( + matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type row_first, + typename GraphViewType::vertex_type row_last, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultValueOutputIterator result_value_output_first, + EdgeOp e_op, + T init /* relevent only if update_major == true */) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = T; + + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + static_assert(copy_v_transform_reduce_nbr_for_all_block_size % raft::warp_size() == 0); + auto const lane_id = tid % raft::warp_size(); + auto idx = static_cast(row_first - matrix_partition.get_major_first()) + + static_cast(tid / raft::warp_size()); + + while (idx < static_cast(row_last - matrix_partition.get_major_first())) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); + auto e_op_result_sum = + lane_id == 0 ? init : e_op_result_t{}; // relevent only if update_major == true + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (update_major) { + e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + } else { + atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); + } + } + if (update_major) { + e_op_result_sum = warp_reduce_edge_op_result().compute(e_op_result_sum); + if (lane_id == 0) { *(result_value_output_first + idx) = e_op_result_sum; } + } + + idx += gridDim.x * (blockDim.x / raft::warp_size()); + } +} + +template +__global__ void for_all_major_for_all_nbr_high_degree( + matrix_partition_device_t matrix_partition, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + ResultValueOutputIterator result_value_output_first, + EdgeOp e_op, + T init /* relevent only if update_major == true */) +{ + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; + using weight_t = typename GraphViewType::weight_type; + using e_op_result_t = T; + + auto idx = static_cast(row_first - matrix_partition.get_major_first()) + + static_cast(blockIdx.x); + + while (idx < static_cast(row_last - matrix_partition.get_major_first())) { + vertex_t const* indices{nullptr}; + weight_t const* weights{nullptr}; + edge_t local_degree{}; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); + auto e_op_result_sum = + threadIdx.x == 0 ? init : e_op_result_t{}; // relevent only if update_major == true + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + auto minor = indices[i]; + auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; + auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); + auto row = GraphViewType::is_adj_matrix_transposed + ? minor + : matrix_partition.get_major_from_major_offset_nocheck(idx); + auto col = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_from_major_offset_nocheck(idx) + : minor; + auto row_offset = + GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); + auto col_offset = + GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto e_op_result = evaluate_edge_op() + .compute(row, + col, + weight, + *(adj_matrix_row_value_input_first + row_offset), + *(adj_matrix_col_value_input_first + col_offset), + e_op); + if (update_major) { + e_op_result_sum = plus_edge_op_result(e_op_result_sum, e_op_result); + } else { + atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); + } + } + if (update_major) { + e_op_result_sum = + block_reduce_edge_op_result() + .compute(e_op_result_sum); + if (threadIdx.x == 0) { *(result_value_output_first + idx) = e_op_result_sum; } + } + + idx += gridDim.x; + } +} + +template +void copy_v_transform_reduce_nbr(raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + T init, + VertexValueOutputIterator vertex_value_output_first) +{ + using vertex_t = typename GraphViewType::vertex_type; + + static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); + + auto loop_count = size_t{1}; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + loop_count = graph_view.is_hypergraph_partitioned() + ? graph_view.get_number_of_local_adj_matrix_partitions() + : static_cast(row_comm_size); + } + + for (size_t i = 0; i < loop_count; ++i) { + matrix_partition_device_t matrix_partition( + graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + + auto tmp_buffer_size = vertex_t{0}; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + + tmp_buffer_size = + in ? GraphViewType::is_adj_matrix_transposed + ? graph_view.is_hypergraph_partitioned() + ? matrix_partition.get_major_size() + : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i) + : matrix_partition.get_minor_size() + : GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_minor_size() + : graph_view.is_hypergraph_partitioned() + ? matrix_partition.get_major_size() + : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i); + } + auto tmp_buffer = allocate_comm_buffer(tmp_buffer_size, handle.get_stream()); + auto buffer_first = get_comm_buffer_begin(tmp_buffer); + + auto local_init = init; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + if (in == GraphViewType::is_adj_matrix_transposed) { + local_init = graph_view.is_hypergraph_partitioned() ? (col_comm_rank == 0) ? init : T{} + : (row_comm_rank == 0) ? init : T{}; + } else { + local_init = graph_view.is_hypergraph_partitioned() ? (row_comm_rank == 0) ? init : T{} + : (col_comm_rank == 0) ? init : T{}; + } + } + + if (in != GraphViewType::is_adj_matrix_transposed) { + if (GraphViewType::is_multi_gpu) { + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + buffer_first, + buffer_first + tmp_buffer_size, + local_init); + } else { + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_number_of_local_vertices(), + local_init); + } + } + + int comm_root_rank = 0; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + comm_root_rank = graph_view.is_hypergraph_partitioned() ? i * row_comm_size + row_comm_rank + : col_comm_rank * row_comm_size + i; + } + + raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + + vertex_t row_value_input_offset = + GraphViewType::is_adj_matrix_transposed + ? 0 + : graph_view.is_hypergraph_partitioned() + ? matrix_partition.get_major_value_start_offset() + : graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - + graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size); + vertex_t col_value_input_offset = + GraphViewType::is_adj_matrix_transposed + ? graph_view.is_hypergraph_partitioned() + ? matrix_partition.get_major_value_start_offset() + : graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - + graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size) + : 0; + + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + buffer_first, + e_op, + local_init); + } else { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + vertex_value_output_first, + e_op, + local_init); + } + + if (GraphViewType::is_multi_gpu) { + if (in == GraphViewType::is_adj_matrix_transposed) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + if (graph_view.is_hypergraph_partitioned()) { + device_reduce( + col_comm, + buffer_first, + vertex_value_output_first, + static_cast(graph_view.get_vertex_partition_size(i * row_comm_size + i)), + raft::comms::op_t::SUM, + i, + handle.get_stream()); + } else { + for (int j = 0; j < row_comm_size; ++j) { + auto comm_root_rank = col_comm_rank * row_comm_size + j; + device_reduce( + row_comm, + buffer_first + (graph_view.get_vertex_partition_first(comm_root_rank) - + graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size)), + vertex_value_output_first, + static_cast(graph_view.get_vertex_partition_size(comm_root_rank)), + raft::comms::op_t::SUM, + j, + handle.get_stream()); + } + } + } else { + CUGRAPH_FAIL("unimplemented."); + } + } + } +} + +} // namespace detail + +/** + * @brief Iterate over the incoming edges to update vertex properties. + * + * This function is inspired by thrust::transfrom_reduce() (iteration over the incoming edges part) + * and thrust::copy() (update vertex properties part, take transform_reduce output as copy input). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternraft::grid_1d_thread_t + update_grid(matrix_partition.get_major_size(), + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]);ary (or + quinary) edge operator. + * @tparam T Type of the initial value for reduction over the incoming edges. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge + * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. + * @param init Initial value to be added to the reduced @e_op return values for each vertex. + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first + * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.get_number_of_local_vertices(). + */ +template +void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + T init, + VertexValueOutputIterator vertex_value_output_first) +{ + detail::copy_v_transform_reduce_nbr(handle, + graph_view, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + e_op, + init, + vertex_value_output_first); +} + +/** + * @brief Iterate over the outgoing edges to update vertex properties. + * + * This function is inspired by thrust::transfrom_reduce() (iteration over the outgoing edges + * part) and thrust::copy() (update vertex properties part, take transform_reduce output as copy + * input). + * + * @tparam GraphViewType Type of the passed non-owning graph object. + * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row + * input properties. + * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column + * input properties. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. + * @tparam T Type of the initial value for reduction over the outgoing edges. + * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Non-owning graph object. + * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input + * properties for the first (inclusive) row (assigned to this process in multi-GPU). + * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + * + + * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input + * properties for the first (inclusive) column (assigned to this process in multi-GPU). + * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p + * adj_matrix_col_value_output_first + * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional + * edge weight), *(@p adj_matrix_row_value_input_first + i), and *(@p + * adj_matrix_col_value_input_first + j) (where i is in [0, + * graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, + * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. + * @param init Initial value to be added to the reduced @e_op return values for each vertex. + * @param vertex_value_output_first Iterator pointing to the vertex property variables for the + * first (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` + * (exclusive) is deduced as @p vertex_value_output_first + @p + * graph_view.get_number_of_local_vertices(). + */ +template +void copy_v_transform_reduce_out_nbr( + raft::handle_t const& handle, + GraphViewType const& graph_view, + AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, + AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, + EdgeOp e_op, + T init, + VertexValueOutputIterator vertex_value_output_first) +{ + detail::copy_v_transform_reduce_nbr(handle, + graph_view, + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + e_op, + init, + vertex_value_output_first); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh b/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh deleted file mode 100644 index 549a1c43c10..00000000000 --- a/cpp/include/patterns/copy_v_transform_reduce_nbr.cuh +++ /dev/null @@ -1,351 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include -#include -#include -#include -#include - -#include -#include - -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace cugraph { -namespace experimental { - -namespace detail { - -// FIXME: block size requires tuning -int32_t constexpr copy_v_transform_reduce_nbr_for_all_low_out_degree_block_size = 128; - -#if 0 -// FIXME: delete this once we verify that the thrust replace in for_all_major_for_all_nbr_low_out_degree is no slower than the original for loop based imoplementation -template -__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) -{ - lhs = plus_edge_op_result(lhs, rhs); -} - -template -__device__ std::enable_if_t accumulate_edge_op_result(T& lhs, T const& rhs) -{ - atomic_add(&lhs, rhs); -} -#endif - -template -__global__ void for_all_major_for_all_nbr_low_out_degree( - matrix_partition_device_t matrix_partition, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - ResultValueOutputIterator result_value_output_first, - EdgeOp e_op, - T init /* relevent only if update_major == true */) -{ - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using weight_t = typename GraphViewType::weight_type; - using e_op_result_t = T; - - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto idx = static_cast(tid); - - while (idx < static_cast(matrix_partition.get_major_size())) { - vertex_t const* indices{nullptr}; - weight_t const* weights{nullptr}; - edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); -#if 1 - auto transform_op = [&matrix_partition, - &adj_matrix_row_value_input_first, - &adj_matrix_col_value_input_first, - &e_op, - idx, - indices, - weights] __device__(auto i) { - auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) - : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; - return evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - }; - - if (update_major) { - *(result_value_output_first + idx) = thrust::transform_reduce( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - transform_op, - init, - [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); - } else { - thrust::for_each( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - [&matrix_partition, indices, &result_value_output_first, &transform_op] __device__(auto i) { - auto e_op_result = transform_op(i); - auto minor = indices[i]; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - atomic_accumulate_edge_op_result(result_value_output_first + minor_offset, e_op_result); - }); - } -#else - // FIXME: delete this once we verify that the code above is not slower than this. - e_op_result_t e_op_result_sum{init}; // relevent only if update_major == true - for (edge_t i = 0; i < local_degree; ++i) { - auto minor = indices[i]; - auto weight = weights != nullptr ? weights[i] : weight_t{1.0}; - auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); - auto row = GraphViewType::is_adj_matrix_transposed - ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); - auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) - : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; - auto e_op_result = evaluate_edge_op() - .compute(row, - col, - weight, - *(adj_matrix_row_value_input_first + row_offset), - *(adj_matrix_col_value_input_first + col_offset), - e_op); - if (update_major) { - accumulate_edge_op_result(e_op_result_sum, e_op_result); - } else { - accumulate_edge_op_result(*(result_value_output_first + minor_offset), - e_op_result); - } - } - if (update_major) { *(result_value_output_first + idx) = e_op_result_sum; } -#endif - idx += gridDim.x * blockDim.x; - } -} - -} // namespace detail - -/** - * @brief Iterate over the incoming edges to update vertex properties. - * - * This function is inspired by thrust::transfrom_reduce() (iteration over the incoming edges part) - * and thrust::copy() (update vertex properties part, take transform_reduce output as copy input). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row - * input properties. - * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column - * input properties. - * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. - * @tparam T Type of the initial value for reduction over the incoming edges. - * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input - * properties for the first (inclusive) row (assigned to this process in multi-GPU). - * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). - * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input - * properties for the first (inclusive) column (assigned to this process in multi-GPU). - * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). - * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge - * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + - * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, - * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. - * @param init Initial value to be added to the reduced @e_op return values for each vertex. - * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first - * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` - * (exclusive) is deduced as @p vertex_value_output_first + @p - * graph_view.get_number_of_local_vertices(). - */ -template -void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle, - GraphViewType const& graph_view, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - EdgeOp e_op, - T init, - VertexValueOutputIterator vertex_value_output_first) -{ - static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); - - if (GraphViewType::is_multi_gpu) { - CUGRAPH_FAIL("unimplemented."); - } else { - matrix_partition_device_t matrix_partition(graph_view, 0); - - raft::grid_1d_thread_t update_grid( - matrix_partition.get_major_size(), - detail::copy_v_transform_reduce_nbr_for_all_low_out_degree_block_size, - handle.get_device_properties().maxGridSize[0]); - - if (!GraphViewType::is_adj_matrix_transposed) { - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_output_first, - vertex_value_output_first + graph_view.get_number_of_local_vertices(), - init); - } - - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_rows()); - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_cols()); - detail::for_all_major_for_all_nbr_low_out_degree - <<>>( - matrix_partition, - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - vertex_value_output_first, - e_op, - init); - } -} - -/** - * @brief Iterate over the outgoing edges to update vertex properties. - * - * This function is inspired by thrust::transfrom_reduce() (iteration over the outgoing edges part) - * and thrust::copy() (update vertex properties part, take transform_reduce output as copy input). - * - * @tparam GraphViewType Type of the passed non-owning graph object. - * @tparam AdjMatrixRowValueInputIterator Type of the iterator for graph adjacency matrix row - * input properties. - * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column - * input properties. - * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. - * @tparam T Type of the initial value for reduction over the outgoing edges. - * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. - * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and - * handles to various CUDA libraries) to run graph algorithms. - * @param graph_view Non-owning graph object. - * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input - * properties for the first (inclusive) row (assigned to this process in multi-GPU). - * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). - * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input - * properties for the first (inclusive) column (assigned to this process in multi-GPU). - * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). - * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge - * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + - * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, - * get_number_of_local_adj_matrix_partition_cols())) and returns a value to be reduced. - * @param init Initial value to be added to the reduced @e_op return values for each vertex. - * @param vertex_value_output_first Iterator pointing to the vertex property variables for the first - * (inclusive) vertex (assigned to tihs process in multi-GPU). `vertex_value_output_last` - * (exclusive) is deduced as @p vertex_value_output_first + @p - * graph_view.get_number_of_local_vertices(). - */ -template -void copy_v_transform_reduce_out_nbr( - raft::handle_t const& handle, - GraphViewType const& graph_view, - AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, - AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, - EdgeOp e_op, - T init, - VertexValueOutputIterator vertex_value_output_first) -{ - static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); - - if (GraphViewType::is_multi_gpu) { - CUGRAPH_FAIL("unimplemented."); - } else { - matrix_partition_device_t matrix_partition(graph_view, 0); - - raft::grid_1d_thread_t update_grid( - matrix_partition.get_major_size(), - detail::copy_v_transform_reduce_nbr_for_all_low_out_degree_block_size, - handle.get_device_properties().maxGridSize[0]); - - if (GraphViewType::is_adj_matrix_transposed) { - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_output_first, - vertex_value_output_first + graph_view.get_number_of_local_vertices(), - init); - } - - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_rows()); - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_cols()); - detail::for_all_major_for_all_nbr_low_out_degree - <<>>( - matrix_partition, - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, - vertex_value_output_first, - e_op, - init); - } -} - -} // namespace experimental -} // namespace cugraph diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 2de96cdb04a..04f22033f91 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -18,10 +18,12 @@ #include #include #include +#include #include #include #include +#include #include #include @@ -35,14 +37,14 @@ namespace experimental { namespace detail { // FIXME: block size requires tuning -int32_t constexpr count_if_e_for_all_low_out_degree_block_size = 128; +int32_t constexpr count_if_e_for_all_block_size = 128; // FIXME: function names conflict if included with transform_reduce_e.cuh template -__global__ void for_all_major_for_all_nbr_low_out_degree( +__global__ void for_all_major_for_all_nbr_low_degree( matrix_partition_device_t matrix_partition, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, @@ -132,7 +134,7 @@ __global__ void for_all_major_for_all_nbr_low_out_degree( idx += gridDim.x * blockDim.x; } - using BlockReduce = cub::BlockReduce; + using BlockReduce = cub::BlockReduce; __shared__ typename BlockReduce::TempStorage temp_storage; count = BlockReduce(temp_storage).Sum(count); if (threadIdx.x == 0) { *(block_counts + blockIdx.x) = count; } @@ -192,15 +194,15 @@ typename GraphViewType::edge_type count_if_e( GraphViewType::is_adj_matrix_transposed ? matrix_partition.get_major_value_start_offset() : 0; raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), - detail::count_if_e_for_all_low_out_degree_block_size, + detail::count_if_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); rmm::device_vector block_counts(update_grid.num_blocks); - detail::for_all_major_for_all_nbr_low_out_degree<<>>( + detail::for_all_major_for_all_nbr_low_degree<<>>( matrix_partition, adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, @@ -220,8 +222,7 @@ typename GraphViewType::edge_type count_if_e( } if (GraphViewType::is_multi_gpu) { - // need to reduce count - CUGRAPH_FAIL("unimplemented."); + count = host_scalar_allreduce(handle.get_comms(), count, handle.get_stream()); } return count; diff --git a/cpp/include/patterns/count_if_v.cuh b/cpp/include/patterns/count_if_v.cuh index 6e4ddeee16f..c90b259cdde 100644 --- a/cpp/include/patterns/count_if_v.cuh +++ b/cpp/include/patterns/count_if_v.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -59,8 +60,7 @@ typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle, vertex_value_input_first + graph_view.get_number_of_local_vertices(), v_op); if (GraphViewType::is_multi_gpu) { - // need to reduce count - CUGRAPH_FAIL("unimplemented."); + count = host_scalar_allreduce(handle.get_comms(), count, handle.get_stream()); } return count; } @@ -96,8 +96,7 @@ typename GraphViewType::vertex_type count_if_v(raft::handle_t const& handle, auto count = thrust::count_if( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), input_first, input_last, v_op); if (GraphViewType::is_multi_gpu) { - // need to reduce count - CUGRAPH_FAIL("unimplemented."); + count = host_scalar_allreduce(handle.get_comms(), count, handle.get_stream()); } return count; } diff --git a/cpp/include/patterns/edge_op_utils.cuh b/cpp/include/patterns/edge_op_utils.cuh index 184d1f1e794..58fb31c7605 100644 --- a/cpp/include/patterns/edge_op_utils.cuh +++ b/cpp/include/patterns/edge_op_utils.cuh @@ -91,6 +91,13 @@ __host__ __device__ std::enable_if_t::value, T> plus_edge_op_ return plus_thrust_tuple()(lhs, rhs); } +template +__device__ std::enable_if_t::value, void> +atomic_accumulate_edge_op_result(Iterator iter, T const& value) +{ + // no-op +} + template __device__ std::enable_if_t::value_type, T>::value && @@ -101,15 +108,6 @@ __device__ atomicAdd(&(thrust::raw_reference_cast(*iter)), value); } -template -__device__ std::enable_if_t::value && - std::is_arithmetic::value, - void> -atomic_accumulate_edge_op_result(Iterator iter, T const& value) -{ - // no-op -} - template __device__ std::enable_if_t::value_type>::value && @@ -123,5 +121,41 @@ __device__ return; } +template +struct warp_reduce_edge_op_result { // only warp lane 0 has a valid result + template + __device__ std::enable_if_t::value, T> compute(T const& edge_op_result) + { + auto ret = edge_op_result; + for (auto offset = raft::warp_size() / 2; offset > 0; offset /= 2) { + ret += __shfl_down_sync(raft::warp_full_mask(), ret, offset); + } + return ret; + } + + template + __device__ std::enable_if_t::value, T> compute(T const& edge_op_result) + { + return warp_reduce_thrust_tuple()(edge_op_result); + } +}; + +template +struct block_reduce_edge_op_result { + template + __device__ std::enable_if_t::value, T> compute(T const& edge_op_result) + { + using BlockReduce = cub::BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + return BlockReduce(temp_storage).Sum(edge_op_result); + } + + template + __device__ std::enable_if_t::value, T> compute(T const& edge_op_result) + { + return block_reduce_thrust_tuple()(edge_op_result); + } +}; + } // namespace experimental } // namespace cugraph diff --git a/cpp/include/patterns/reduce_v.cuh b/cpp/include/patterns/reduce_v.cuh index bc12f13225d..12224dc55f4 100644 --- a/cpp/include/patterns/reduce_v.cuh +++ b/cpp/include/patterns/reduce_v.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -55,8 +56,7 @@ T reduce_v(raft::handle_t const& handle, vertex_value_input_first + graph_view.get_number_of_local_vertices(), init); if (GraphViewType::is_multi_gpu) { - // need to reduce ret - CUGRAPH_FAIL("unimplemented."); + ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream()); } return ret; } @@ -89,8 +89,7 @@ T reduce_v(raft::handle_t const& handle, auto ret = thrust::reduce( rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), input_first, input_last, init); if (GraphViewType::is_multi_gpu) { - // need to reduce ret - CUGRAPH_FAIL("unimplemented."); + ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream()); } return ret; } diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh index c4db3355e99..3f334ceff00 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -25,7 +25,6 @@ #include #include -#include #include #include @@ -36,31 +35,14 @@ namespace experimental { namespace detail { // FIXME: block size requires tuning -int32_t constexpr transform_reduce_e_for_all_low_out_degree_block_size = 128; - -template -struct block_reduce_edge_op_result { - template - __device__ std::enable_if_t::value, T> compute(T const& edge_op_result) - { - using BlockReduce = cub::BlockReduce; - __shared__ typename BlockReduce::TempStorage temp_storage; - return BlockReduce(temp_storage).Sum(edge_op_result); - } - - template - __device__ std::enable_if_t::value, T> compute(T const& edge_op_result) - { - return block_reduce_thrust_tuple()(edge_op_result); - } -}; +int32_t constexpr transform_reduce_e_for_all_block_size = 128; template -__global__ void for_all_major_for_all_nbr_low_out_degree( +__global__ void for_all_major_for_all_nbr_low_degree( matrix_partition_device_t matrix_partition, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, @@ -154,9 +136,8 @@ __global__ void for_all_major_for_all_nbr_low_out_degree( } e_op_result_sum = - block_reduce_edge_op_result() - .compute(e_op_result_sum); + block_reduce_edge_op_result().compute( + e_op_result_sum); if (threadIdx.x == 0) { *(block_result_first + blockIdx.x) = e_op_result_sum; } } @@ -217,15 +198,15 @@ T transform_reduce_e(raft::handle_t const& handle, GraphViewType::is_adj_matrix_transposed ? matrix_partition.get_major_value_start_offset() : 0; raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), - detail::transform_reduce_e_for_all_low_out_degree_block_size, + detail::transform_reduce_e_for_all_block_size, handle.get_device_properties().maxGridSize[0]); rmm::device_vector block_results(update_grid.num_blocks); - detail::for_all_major_for_all_nbr_low_out_degree<<>>( + detail::for_all_major_for_all_nbr_low_degree<<>>( matrix_partition, adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first + col_value_input_offset, @@ -248,8 +229,7 @@ T transform_reduce_e(raft::handle_t const& handle, } if (GraphViewType::is_multi_gpu) { - // need reduction - CUGRAPH_FAIL("unimplemented."); + result = host_scalar_allreduce(handle.get_comms(), result, handle.get_stream()); } return plus_edge_op_result(init, result); diff --git a/cpp/include/patterns/transform_reduce_v.cuh b/cpp/include/patterns/transform_reduce_v.cuh index 0d31df19b35..02538c36f47 100644 --- a/cpp/include/patterns/transform_reduce_v.cuh +++ b/cpp/include/patterns/transform_reduce_v.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -62,8 +63,7 @@ T transform_reduce_v(raft::handle_t const& handle, init, thrust::plus()); if (GraphViewType::is_multi_gpu) { - // need to reduce ret - CUGRAPH_FAIL("unimplemented."); + ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream()); } return ret; } @@ -106,8 +106,7 @@ T transform_reduce_v(raft::handle_t const& handle, init, thrust::plus()); if (GraphViewType::is_multi_gpu) { - // need to reduce ret - CUGRAPH_FAIL("unimplemented."); + ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream()); } return ret; } diff --git a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh index 26a05787221..f5af03d647c 100644 --- a/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh +++ b/cpp/include/patterns/transform_reduce_v_with_adj_matrix_row.cuh @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include @@ -69,23 +70,44 @@ T transform_reduce_v_with_adj_matrix_row( VertexOp v_op, T init) { + T ret{}; + + auto vertex_first = graph_view.get_local_vertex_first(); + auto vertex_last = graph_view.get_local_vertex_last(); + for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { + auto row_first = graph_view.get_local_adj_matrix_partition_row_first(i); + auto row_last = graph_view.get_local_adj_matrix_partition_row_last(i); + + auto range_first = std::max(vertex_first, row_first); + auto range_last = std::min(vertex_last, row_last); + + if (range_last > range_first) { + matrix_partition_device_t matrix_partition(graph_view, i); + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? 0 + : matrix_partition.get_major_value_start_offset(); + + auto input_first = thrust::make_zip_iterator(thrust::make_tuple( + vertex_value_input_first + (range_first - vertex_first), + adj_matrix_row_value_input_first + row_value_input_offset + (range_first - row_first))); + auto v_op_wrapper = [v_op] __device__(auto v_and_row_val) { + return v_op(thrust::get<0>(v_and_row_val), thrust::get<1>(v_and_row_val)); + }; + ret += + thrust::transform_reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + input_first, + input_first + (range_last - range_first), + v_op_wrapper, + T{}, + thrust::plus()); + } + } + if (GraphViewType::is_multi_gpu) { - CUGRAPH_FAIL("unimplemented."); - } else { - assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_local_adj_matrix_partition_rows()); - auto input_first = thrust::make_zip_iterator( - thrust::make_tuple(vertex_value_input_first, adj_matrix_row_value_input_first)); - auto v_op_wrapper = [v_op] __device__(auto v_and_row_val) { - return v_op(thrust::get<0>(v_and_row_val), thrust::get<1>(v_and_row_val)); - }; - return thrust::transform_reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - input_first, - input_first + graph_view.get_number_of_local_vertices(), - v_op_wrapper, - init, - thrust::plus()); + ret = host_scalar_allreduce(handle.get_comms(), ret, handle.get_stream()); } + + return init + ret; } } // namespace experimental diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index 7ba21abac8b..a1d18e26d1c 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -17,14 +17,18 @@ #include #include +#include #include #include +#include #include #include #include #include +#include +#include #include #include #include @@ -33,6 +37,7 @@ #include #include +#include #include #include @@ -42,8 +47,8 @@ namespace experimental { namespace detail { // FIXME: block size requires tuning -int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_low_out_degree_block_size = 128; -int32_t constexpr update_frontier_v_push_if_out_nbr_update_block_size = 128; +int32_t constexpr update_frontier_v_push_if_out_nbr_for_all_block_size = 128; +int32_t constexpr update_frontier_v_push_if_out_nbr_update_block_size = 128; template -__global__ void for_all_frontier_row_for_all_nbr_low_out_degree( +__global__ void for_all_frontier_row_for_all_nbr_low_degree( matrix_partition_device_t matrix_partition, RowIterator row_first, RowIterator row_last, @@ -142,6 +147,9 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, // split one thrust::reduce_by_key call to multiple thrust::reduce_by_key calls if the // temporary buffer size exceeds the maximum buffer size (may be definied as percentage of the // system HBM size or a function of the maximum number of threads in the system)) + // FIXME: actually, we can find how many unique keys are here by now. + // FIXME: if GraphViewType::is_multi_gpu is true, this should be executed on the GPU holding the + // vertex unless reduce_op is a pure function. rmm::device_vector keys(num_buffer_elements); rmm::device_vector values(num_buffer_elements); auto it = thrust::reduce_by_key(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), @@ -162,6 +170,9 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, values.begin(), values.begin() + num_reduced_buffer_elements, buffer_payload_output_first); + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is necessary as kyes & values will become out-of-scope once + // this function returns return num_reduced_buffer_elements; } } @@ -330,122 +341,158 @@ void update_frontier_v_push_if_out_nbr( static_assert(!GraphViewType::is_adj_matrix_transposed, "GraphViewType should support the push model."); - using vertex_t = typename GraphViewType::vertex_type; - using edge_t = typename GraphViewType::edge_type; - using reduce_op_input_t = typename ReduceOp::type; + using vertex_t = typename GraphViewType::vertex_type; + using edge_t = typename GraphViewType::edge_type; - std::vector frontier_adj_matrix_partition_offsets( - graph_view.get_number_of_local_adj_matrix_partitions() + 1, - 0); // relevant only if GraphViewType::is_multi_gpu is true - thrust::device_vector - frontier_rows{}; // relevant only if GraphViewType::is_multi_gpu is true - edge_t max_pushes{0}; + // 1. fill the buffer + + vertex_frontier.set_buffer_idx_value(0); + auto loop_count = size_t{1}; if (GraphViewType::is_multi_gpu) { - // need to merge row_frontier and update frontier_offsets; - CUGRAPH_FAIL("unimplemented."); - -#if 0 // comment out to suppress "loop is not reachable warning till the merge part is - // implemented." - for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); - - max_pushes += thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_rows.begin() + frontier_adj_matrix_partition_offsets[i], - frontier_rows.begin() + frontier_adj_matrix_partition_offsets[i + 1], - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()); - } -#endif - } else { - matrix_partition_device_t matrix_partition(graph_view, 0); - - max_pushes = thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_first, - vertex_last, - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + loop_count = graph_view.is_hypergraph_partitioned() + ? graph_view.get_number_of_local_adj_matrix_partitions() + : static_cast(row_comm_size); } - // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain - // additional per column data for filtering in e_op). If we can pause & resume execution if - // buffer needs to be increased (and if we reserve address space to avoid expensive - // reallocation; - // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management/), we can - // start with a smaller buffer size (especially when the frontier size is large). - vertex_frontier.resize_buffer(max_pushes); - vertex_frontier.set_buffer_idx_value(0); - auto buffer_first = vertex_frontier.buffer_begin(); - auto buffer_key_first = std::get<0>(buffer_first); - auto buffer_payload_first = std::get<1>(buffer_first); + for (size_t i = 0; i < loop_count; ++i) { + matrix_partition_device_t matrix_partition( + graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); + + rmm::device_uvector frontier_rows( + 0, handle.get_stream()); // relevant only if GraphViewType::is_multi_gpu is true + + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + + auto sub_comm_rank = graph_view.is_hypergraph_partitioned() ? col_comm_rank : row_comm_rank; + auto frontier_size = (static_cast(sub_comm_rank) == i) + ? thrust::distance(vertex_first, vertex_last) + : size_t{0}; + if (graph_view.is_hypergraph_partitioned()) { + col_comm.bcast(&frontier_size, 1, i, handle.get_stream()); + } else { + row_comm.bcast(&frontier_size, 1, i, handle.get_stream()); + } + if (static_cast(sub_comm_rank) != i) { + frontier_rows.resize(frontier_size, handle.get_stream()); + } + device_bcast(graph_view.is_hypergraph_partitioned() ? col_comm : row_comm, + vertex_first, + frontier_rows.begin(), + frontier_rows.size(), + i, + handle.get_stream()); + } - if (GraphViewType::is_multi_gpu) { - for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { - matrix_partition_device_t matrix_partition(graph_view, i); - auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed - ? 0 - : matrix_partition.get_major_value_start_offset(); - - raft::grid_1d_thread_t for_all_low_out_degree_grid( - frontier_adj_matrix_partition_offsets[i + 1] - frontier_adj_matrix_partition_offsets[i], - detail::update_frontier_v_push_if_out_nbr_for_all_low_out_degree_block_size, - handle.get_device_properties().maxGridSize[0]); - - // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber - // vertices to insure that rows within a partition are sorted by their out-degree in - // decreasing order, we will apply this kernel only to low out-degree vertices. - detail:: - for_all_frontier_row_for_all_nbr_low_out_degree<<>>( - matrix_partition, - frontier_rows.begin() + frontier_adj_matrix_partition_offsets[i], - frontier_rows.begin() + frontier_adj_matrix_partition_offsets[i + 1], - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - buffer_key_first, - buffer_payload_first, - vertex_frontier.get_buffer_idx_ptr(), - e_op); + edge_t max_pushes = + frontier_rows.size() > 0 + ? thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_rows.begin(), + frontier_rows.end(), + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()) + : thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_first, + vertex_last, + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()); + + // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain + // additional per column data for filtering in e_op). If we can pause & resume execution if + // buffer needs to be increased (and if we reserve address space to avoid expensive + // reallocation; + // https://devblogs.nvidia.com/introducing-low-level-gpu-virtual-memory-management/), we can + // start with a smaller buffer size (especially when the frontier size is large). + // for special cases when we can assure that there is no more than one push per destination + // (e.g. if cugraph::experimental::reduce_op::any is used), we can limit the buffer size to + // std::min(max_pushes, matrix_partition.get_minor_size()). + // For Volta+, we can limit the buffer size to std::min(max_pushes, + // matrix_partition.get_minor_size()) if the reduction operation is a pure function if we use + // locking. + // FIXME: if i != 0, this will require costly reallocation if we don't use the new CUDA feature + // to reserve address space. + vertex_frontier.resize_buffer(vertex_frontier.get_buffer_idx_value() + max_pushes); + auto buffer_first = vertex_frontier.buffer_begin(); + auto buffer_key_first = std::get<0>(buffer_first); + auto buffer_payload_first = std::get<1>(buffer_first); + + vertex_t row_value_input_offset = 0; + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + row_value_input_offset = + graph_view.is_hypergraph_partitioned() + ? matrix_partition.get_major_value_start_offset() + : graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - + graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size); } - } else { - matrix_partition_device_t matrix_partition(graph_view, 0); - raft::grid_1d_thread_t for_all_low_out_degree_grid( - thrust::distance(vertex_first, vertex_last), - detail::update_frontier_v_push_if_out_nbr_for_all_low_out_degree_block_size, + raft::grid_1d_thread_t for_all_low_degree_grid( + frontier_rows.size() > 0 ? frontier_rows.size() : thrust::distance(vertex_first, vertex_last), + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, handle.get_device_properties().maxGridSize[0]); // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber - // vertices to insure that rows within a partition are sorted by their out-degree in - // decreasing order, we will apply this kernel only to low out-degree vertices. - detail:: - for_all_frontier_row_for_all_nbr_low_out_degree<<>>( + // vertices to insure that rows within a partition are sorted by their out-degree in decreasing + // order, we will apply this kernel only to low out-degree vertices. + if (frontier_rows.size() > 0) { + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( + matrix_partition, + frontier_rows.begin(), + frontier_rows.begin(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + buffer_key_first, + buffer_payload_first, + vertex_frontier.get_buffer_idx_ptr(), + e_op); + } else { + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( matrix_partition, vertex_first, vertex_last, - adj_matrix_row_value_input_first, + adj_matrix_row_value_input_first + row_value_input_offset, adj_matrix_col_value_input_first, buffer_key_first, buffer_payload_first, vertex_frontier.get_buffer_idx_ptr(), e_op); + } } + // 2. reduce the buffer + + auto num_buffer_offset = edge_t{0}; + + auto buffer_first = vertex_frontier.buffer_begin(); + auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; + auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; + auto num_buffer_elements = detail::reduce_buffer_elements(handle, buffer_key_first, buffer_payload_first, @@ -453,11 +500,134 @@ void update_frontier_v_push_if_out_nbr( reduce_op); if (GraphViewType::is_multi_gpu) { - // need to exchange buffer elements (and may reduce again) - CUGRAPH_FAIL("unimplemented."); + auto& comm = handle.get_comms(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + std::vector h_vertex_lasts(graph_view.is_hypergraph_partitioned() ? row_comm_size + : col_comm_size); + for (size_t i = 0; i < h_vertex_lasts.size(); ++i) { + h_vertex_lasts[i] = graph_view.get_vertex_partition_last( + graph_view.is_hypergraph_partitioned() ? col_comm_rank * row_comm_size + i + : row_comm_rank * col_comm_size + i); + } + rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); + raft::update_device( + d_vertex_lasts.data(), h_vertex_lasts.data(), h_vertex_lasts.size(), handle.get_stream()); + rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), + handle.get_stream()); + thrust::upper_bound(d_vertex_lasts.begin(), + d_vertex_lasts.end(), + buffer_key_first, + buffer_key_first + num_buffer_elements, + d_tx_buffer_last_boundaries.begin()); + std::vector h_tx_buffer_last_boundaries(d_tx_buffer_last_boundaries.size()); + raft::update_host(h_tx_buffer_last_boundaries.data(), + d_tx_buffer_last_boundaries.data(), + d_tx_buffer_last_boundaries.size(), + handle.get_stream()); + std::vector tx_counts(h_tx_buffer_last_boundaries.size()); + std::adjacent_difference( + h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); + + std::vector rx_counts(graph_view.is_hypergraph_partitioned() ? row_comm_size + : col_comm_size); + std::vector count_requests(tx_counts.size() + rx_counts.size()); + for (size_t i = 0; i < tx_counts.size(); ++i) { + comm.isend(&tx_counts[i], + 1, + graph_view.is_hypergraph_partitioned() ? col_comm_rank * row_comm_size + i + : row_comm_rank * col_comm_size + i, + 0 /* tag */, + count_requests.data() + i); + } + for (size_t i = 0; i < rx_counts.size(); ++i) { + comm.irecv(&rx_counts[i], + 1, + graph_view.is_hypergraph_partitioned() ? col_comm_rank * row_comm_size + i + : row_comm_rank + i * row_comm_size, + 0 /* tag */, + count_requests.data() + tx_counts.size() + i); + } + comm.waitall(count_requests.size(), count_requests.data()); + + std::vector tx_offsets(tx_counts.size() + 1, edge_t{0}); + std::partial_sum(tx_counts.begin(), tx_counts.end(), tx_offsets.begin() + 1); + std::vector rx_offsets(rx_counts.size() + 1, edge_t{0}); + std::partial_sum(rx_counts.begin(), rx_counts.end(), rx_offsets.begin() + 1); + + // FIXME: this will require costly reallocation if we don't use the new CUDA feature to reserve + // address space. + vertex_frontier.resize_buffer(num_buffer_elements + rx_offsets.back()); + + auto buffer_first = vertex_frontier.buffer_begin(); + auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; + auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; + + auto constexpr tuple_size = thrust_tuple_size_or_one< + typename std::iterator_traits::value_type>::value; + + std::vector buffer_requests((tx_counts.size() + rx_counts.size()) * + (1 + tuple_size)); + for (size_t i = 0; i < tx_counts.size(); ++i) { + auto comm_dst_rank = graph_view.is_hypergraph_partitioned() + ? col_comm_rank * row_comm_size + i + : row_comm_rank * col_comm_size + i; + comm.isend(detail::iter_to_raw_ptr(buffer_key_first + tx_offsets[i]), + static_cast(tx_counts[i]), + comm_dst_rank, + int{0} /* tag */, + buffer_requests.data() + i * (1 + tuple_size)); + device_isend( + comm, + buffer_payload_first + tx_offsets[i], + static_cast(tx_counts[i]), + comm_dst_rank, + int{1} /* base tag */, + buffer_requests.data() + (i * (1 + tuple_size) + 1)); + } + for (size_t i = 0; i < rx_counts.size(); ++i) { + auto comm_src_rank = graph_view.is_hypergraph_partitioned() + ? col_comm_rank * row_comm_size + i + : row_comm_rank + i * row_comm_size; + comm.irecv(detail::iter_to_raw_ptr(buffer_key_first + num_buffer_elements + rx_offsets[i]), + static_cast(rx_counts[i]), + comm_src_rank, + int{0} /* tag */, + buffer_requests.data() + ((tx_counts.size() + i) * (1 + tuple_size))); + device_irecv( + comm, + buffer_payload_first + num_buffer_elements + rx_offsets[i], + static_cast(rx_counts[i]), + comm_src_rank, + int{1} /* base tag */, + buffer_requests.data() + ((tx_counts.size() + i) * (1 + tuple_size) + 1)); + } + comm.waitall(buffer_requests.size(), buffer_requests.data()); + + // FIXME: this does not exploit the fact that each segment is sorted. Lost performance + // optimization opportunities. + // FIXME: we can use [vertex_frontier.buffer_begin(), vertex_frontier.buffer_begin() + + // num_buffer_elements) as temporary buffer inside reduce_buffer_elements(). + num_buffer_offset = num_buffer_elements; + num_buffer_elements = detail::reduce_buffer_elements(handle, + buffer_key_first + num_buffer_elements, + buffer_payload_first + num_buffer_elements, + rx_offsets.back(), + reduce_op); } + // 3. update vertex properties + if (num_buffer_elements > 0) { + auto buffer_first = vertex_frontier.buffer_begin(); + auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; + auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; + raft::grid_1d_thread_t update_grid(num_buffer_elements, detail::update_frontier_v_push_if_out_nbr_update_block_size, handle.get_device_properties().maxGridSize[0]); @@ -491,14 +661,12 @@ void update_frontier_v_push_if_out_nbr( /* FIXME: -is_fully_functional type trait (???) for reduce_op iterating over lower triangular (or upper triangular) : triangle counting LRB might be necessary if the cost of processing an edge (i, j) is a function of degree(i) and degree(j) : triangle counting push-pull switching support (e.g. DOBFS), in this case, we need both CSR & CSC (trade-off execution time vs memory requirement, unless graph is symmetric) -should I take multi-GPU support as a template argument? if graph is symmetric, there will be additional optimization opportunities (e.g. in-degree == out-degree) For BFS, sending a bit vector (for the entire set of dest vertices per partitoin may work better we can use thrust::set_intersection for triangle counting think about adding thrust diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index fba6326fd8d..3b4b05ffb2f 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -163,8 +164,7 @@ class Bucket { template std::enable_if_t aggregate_size() const { - CUGRAPH_FAIL("unimplemented."); - return size_; + return host_scalar_allreduce(handle_ptr_->get_comms(), size_, handle_ptr_->get_stream()); } template @@ -354,6 +354,7 @@ class VertexFrontier { size_t buffer_capacity_{0}; rmm::device_scalar buffer_idx_{}; + // FIXME: better pick between this apporach or the approach used in allocate_comm_buffer size_t compute_aggregate_buffer_size_in_bytes(size_t size) { size_t aggregate_buffer_size_in_bytes = diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/comm_utils.cuh new file mode 100644 index 00000000000..6cd6e62bc3a --- /dev/null +++ b/cpp/include/utilities/comm_utils.cuh @@ -0,0 +1,788 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include +#include + +#include +#include +#include + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +template +struct update_vector_of_tuple_scalar_elements_from_tuple_impl { + void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + *ptr = thrust::get(tuple); + update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + tuple_scalar_elements, tuple); + } +}; + +template +struct update_vector_of_tuple_scalar_elements_from_tuple_impl { + void update(std::vector& tuple_scalar_elements, TupleType const& tuple) const { return; } +}; + +template +struct update_tuple_from_vector_of_tuple_scalar_elements_impl { + void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + thrust::get(tuple) = *ptr; + update_tuple_from_vector_of_tuple_scalar_elements_impl().update( + tuple, tuple_scalar_elements); + } +}; + +template +struct update_tuple_from_vector_of_tuple_scalar_elements_impl { + void update(TupleType& tuple, std::vector const& tuple_scalar_elements) const { return; } +}; + +template +struct host_allreduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + cudaStream_t stream) const + { + using element_t = typename thrust::tuple_element::type; + static_assert(sizeof(element_t) <= sizeof(int64_t)); + auto ptr = reinterpret_cast(tuple_scalar_elements.data() + I); + comm.allreduce(ptr, ptr, 1, raft::comms::op_t::SUM, stream); + host_allreduce_tuple_scalar_element_impl().run( + comm, tuple_scalar_elements, stream); + } +}; + +template +struct host_allreduce_tuple_scalar_element_impl { + void run(raft::comms::comms_t const& comm, + rmm::device_uvector& tuple_scalar_elements, + cudaStream_t stream) const + { + } +}; + +template +T* iter_to_raw_ptr(T* ptr) +{ + return ptr; +} + +template +T* iter_to_raw_ptr(thrust::device_ptr ptr) +{ + return thrust::raw_pointer_cast(ptr); +} + +template +auto iter_to_raw_ptr(thrust::detail::normal_iterator> iter) +{ + return thrust::raw_pointer_cast(iter.base()); +} + +template +std::enable_if_t::value, void> +device_isend_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t count, + int dst, + int tag, + raft::comms::request_t* request) +{ + // no-op +} + +template +std::enable_if_t::value, void> device_isend_impl( + raft::comms::comms_t const& comm, + InputIterator input_first, + size_t count, + int dst, + int tag, + raft::comms::request_t* request) +{ + static_assert( + std::is_same::value_type, OutputValueType>::value); + comm.isend(iter_to_raw_ptr(input_first), count, dst, tag, request); +} + +template +struct device_isend_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t count, + int dst, + int base_tag, + raft::comms::request_t* requests) const + { + using output_value_t = typename thrust:: + tuple_element::value_type>::type; + auto tuple_element_input_first = thrust::get(input_first.get_iterator_tuple()); + device_isend_impl( + comm, tuple_element_input_first, count, dst, static_cast(base_tag + I), requests + I); + device_isend_tuple_iterator_element_impl().run( + comm, input_first, count, dst, base_tag, requests); + } +}; + +template +struct device_isend_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t count, + int dst, + int base_tag, + raft::comms::request_t* requests) const + { + } +}; + +template +std::enable_if_t::value, void> +device_irecv_impl(raft::comms::comms_t const& comm, + OutputIterator output_first, + size_t count, + int src, + int tag, + raft::comms::request_t* request) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_irecv_impl(raft::comms::comms_t const& comm, + OutputIterator output_first, + size_t count, + int src, + int tag, + raft::comms::request_t* request) +{ + static_assert( + + std::is_same::value_type>::value); + comm.irecv(iter_to_raw_ptr(output_first), count, src, tag, request); +} + +template +struct device_irecv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + OutputIterator output_first, + size_t count, + int src, + int base_tag, + raft::comms::request_t* requests) const + { + using input_value_t = typename thrust:: + tuple_element::value_type>::type; + auto tuple_element_output_first = thrust::get(output_first.get_iterator_tuple()); + device_irecv_impl( + comm, tuple_element_output_first, count, src, static_cast(base_tag + I), requests + I); + device_irecv_tuple_iterator_element_impl().run( + comm, output_first, count, src, base_tag, requests); + } +}; + +template +struct device_irecv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + OutputIterator output_first, + size_t count, + int src, + int base_tag, + raft::comms::request_t* requests) const + { + } +}; + +template +std::enable_if_t::value, void> +device_bcast_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + int root, + cudaStream_t stream) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_bcast_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + int root, + cudaStream_t stream) +{ + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); + if (comm.get_rank() == root) { + comm.bcast(iter_to_raw_ptr(input_first), count, root, stream); + } else { + comm.bcast(iter_to_raw_ptr(output_first), count, root, stream); + } +} + +template +struct device_bcast_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + int root, + cudaStream_t stream) const + { + device_bcast_impl(comm, + thrust::get(input_first.get_iterator_tuple()), + thrust::get(output_first.get_iterator_tuple()), + count, + root, + stream); + device_bcast_tuple_iterator_element_impl( + comm, input_first, output_first, count, root, stream); + } +}; + +template +struct device_bcast_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + int root, + cudaStream_t stream) const + { + } +}; + +template +std::enable_if_t::value, void> +device_reduce_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + int root, + cudaStream_t stream) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_reduce_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + int root, + cudaStream_t stream) +{ + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); + comm.reduce(iter_to_raw_ptr(input_first), iter_to_raw_ptr(output_first), count, op, root, stream); +} + +template +struct device_reduce_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + int root, + cudaStream_t stream) const + { + device_reduce_impl(comm, + thrust::get(input_first.get_iterator_tuple()), + thrust::get(output_first.get_iterator_tuple()), + count, + op, + root, + stream); + device_reduce_tuple_iterator_element_impl( + comm, input_first, output_first, count, op, root, stream); + } +}; + +template +struct device_reduce_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + int root, + cudaStream_t stream) const + { + } +}; + +template +std::enable_if_t::value, void> +device_allgatherv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + std::vector const& recvcounts, + std::vector const& displacements, + cudaStream_t stream) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_allgatherv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + std::vector const& recvcounts, + std::vector const& displacements, + cudaStream_t stream) +{ + static_assert(std::is_same::value_type, + typename std::iterator_traits::value_type>::value); + comm.allgatherv(iter_to_raw_ptr(input_first), + iter_to_raw_ptr(output_first), + recvcounts.data(), + displacements.data(), + stream); +} + +template +struct device_allgatherv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + std::vector const& recvcounts, + std::vector const& displacements, + cudaStream_t stream) const + { + device_allgatherv_impl(comm, + thrust::get(input_first.get_iterator_tuple()), + thrust::get(output_first.get_iterator_tuple()), + recvcounts, + displacements, + stream); + device_allgatherv_tuple_iterator_element_impl().run( + comm, input_first, output_first, recvcounts, displacements, stream); + } +}; + +template +struct device_allgatherv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + std::vector const& recvcounts, + std::vector const& displacements, + cudaStream_t stream) const + { + } +}; + +template +auto allocate_comm_buffer_tuple_element_impl(size_t buffer_size, cudaStream_t stream) +{ + using element_t = typename thrust::tuple_element::type; + return rmm::device_uvector(buffer_size, stream); +} + +template +auto allocate_comm_buffer_tuple_impl(std::index_sequence, + size_t buffer_size, + cudaStream_t stream) +{ + return thrust::make_tuple( + allocate_comm_buffer_tuple_element_impl(buffer_size, stream)...); +} + +template +auto get_comm_buffer_begin_tuple_element_impl(BufferType& buffer) +{ + using element_t = typename thrust::tuple_element::type; + return thrust::get(buffer).begin(); +} + +template +auto get_comm_buffer_begin_tuple_impl(std::index_sequence, BufferType& buffer) +{ + return thrust::make_tuple(get_comm_buffer_begin_tuple_element_impl(buffer)...); +} + +} // namespace detail + +template +std::enable_if_t::value, T> host_scalar_allreduce( + raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + raft::update_device(d_input.data(), &input, 1, stream); + comm.allreduce(d_input.data(), d_input.data(), 1, raft::comms::op_t::SUM, stream); + T h_input{}; + raft::update_host(&h_input, d_input.data(), 1, stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +template +std::enable_if_t::value, T> +host_scalar_allreduce(raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + T ret{}; + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + detail::host_allreduce_tuple_scalar_element_impl().run( + comm, d_tuple_scalar_elements, stream); + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl().update( + ret, h_tuple_scalar_elements); + + return ret; +} + +template +std::enable_if_t::value, std::vector> host_scalar_allgather( + raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + std::vector rx_counts(comm.get_size(), size_t{1}); + std::vector displacements(rx_counts.size(), size_t{0}); + std::iota(displacements.begin(), displacements.end(), size_t{0}); + rmm::device_uvector d_outputs(rx_counts.size(), stream); + raft::update_device(d_outputs.data() + comm.get_rank(), &input, 1, stream); + comm.allgatherv(d_outputs.data() + comm.get_rank(), + d_outputs.data(), + rx_counts.data(), + displacements.data(), + stream); + std::vector h_outputs(rx_counts.size(), size_t{0}); + raft::update_host(h_outputs.data(), d_outputs.data(), rx_counts.size(), stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_outputs; +} + +template +std::enable_if_t::value, std::vector> +host_scalar_allgather(raft::comms::comms_t const& comm, T input, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector rx_counts(comm.get_size(), tuple_size); + std::vector displacements(rx_counts.size(), size_t{0}); + for (size_t i = 0; i < displacements.size(); ++i) { displacements[i] = i * tuple_size; } + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size, + stream); + + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl().update( + h_tuple_scalar_elements, input); + raft::update_device(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, + h_tuple_scalar_elements.data(), + tuple_size, + stream); + comm.allgatherv(d_allgathered_tuple_scalar_elements.data() + comm.get_rank() * tuple_size, + d_allgathered_tuple_scalar_elements.data(), + rx_counts.data(), + displacements.data(), + stream); + std::vector h_allgathered_tuple_scalar_elements(comm.get_size() * tuple_size); + raft::update_host(h_allgathered_tuple_scalar_elements.data(), + d_allgathered_tuple_scalar_elements.data(), + comm.get_size() * tuple_size, + stream); + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + + std::vector ret(comm.get_size()); + for (size_t i = 0; i < ret.size(); ++i) { + std::vector h_tuple_scalar_elements( + h_allgathered_tuple_scalar_elements.data() + i * tuple_size, + h_allgathered_tuple_scalar_elements.data() + (i + 1) * tuple_size); + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret[i], h_tuple_scalar_elements); + } + + return ret; +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_isend(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t count, + int dst, + int base_tag /* actual tag = base tag */, + raft::comms::request_t* requests) +{ + detail::device_isend_impl::value_type>( + comm, input_first, count, dst, base_tag, requests); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_isend(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t count, + int dst, + int base_tag /* actual tag = base_tag + tuple index */, + raft::comms::request_t* requests) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail:: + device_isend_tuple_iterator_element_impl() + .run(comm, input_first, count, dst, base_tag, requests); +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_irecv(raft::comms::comms_t const& comm, + OutputIterator output_first, + size_t count, + int src, + int base_tag /* actual tag = base tag */, + raft::comms::request_t* requests) +{ + detail::device_irecv_impl::value_type, + OutputIterator>(comm, output_first, count, src, base_tag, requests); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_irecv(raft::comms::comms_t const& comm, + OutputIterator output_first, + size_t count, + int src, + int base_tag /* actual tag = base_tag + tuple index */, + raft::comms::request_t* requests) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail:: + device_irecv_tuple_iterator_element_impl() + .run(comm, output_first, count, src, base_tag, requests); +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_bcast(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + int root, + cudaStream_t stream) +{ + detail::device_bcast_impl(comm, input_first, output_first, count, root, stream); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_bcast(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + int root, + cudaStream_t stream) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail:: + device_bcast_tuple_iterator_element_impl( + comm, input_first, output_first, count, root, stream); +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_reduce(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + int root, + cudaStream_t stream) +{ + detail::device_reduce_impl(comm, input_first, output_first, count, op, root, stream); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_reduce(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + size_t count, + raft::comms::op_t op, + int root, + cudaStream_t stream) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail:: + device_reduce_tuple_iterator_element_impl( + comm, input_first, output_first, count, op, root, stream); +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_allgatherv(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + std::vector const& recvcounts, + std::vector const& displacements, + cudaStream_t stream) +{ + detail::device_allgatherv_impl( + comm, input_first, output_first, recvcounts, displacements, stream); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_allgatherv(raft::comms::comms_t const& comm, + InputIterator input_first, + OutputIterator output_first, + std::vector const& recvcounts, + std::vector const& displacements, + cudaStream_t stream) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + detail::device_allgatherv_tuple_iterator_element_impl() + .run(comm, input_first, output_first, recvcounts, displacements, stream); +} + +template ::value>* = nullptr> +auto allocate_comm_buffer(size_t buffer_size, cudaStream_t stream) +{ + return rmm::device_uvector(buffer_size, stream); +} + +template ::value>* = nullptr> +auto allocate_comm_buffer(size_t buffer_size, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return detail::allocate_comm_buffer_tuple_impl( + std::make_index_sequence(), buffer_size, stream); +} + +template ::value>* = nullptr> +auto get_comm_buffer_begin(BufferType& buffer) +{ + return buffer.begin(); +} + +template ::value>* = nullptr> +auto get_comm_buffer_begin(BufferType& buffer) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + return thrust::make_zip_iterator( + detail::get_comm_buffer_begin_tuple_impl(std::make_index_sequence(), buffer)); +} + +} // namespace experimental +} // namespace cugraph \ No newline at end of file diff --git a/cpp/include/utilities/thrust_tuple_utils.cuh b/cpp/include/utilities/thrust_tuple_utils.cuh index f2b18adafce..0ad71ba5e05 100644 --- a/cpp/include/utilities/thrust_tuple_utils.cuh +++ b/cpp/include/utilities/thrust_tuple_utils.cuh @@ -81,13 +81,6 @@ struct plus_thrust_tuple_impl { __host__ __device__ constexpr void compute(TupleType& lhs, TupleType const& rhs) const {} }; -template -__device__ std::enable_if_t::value, void> atomic_accumulate_impl(T& lhs, - T const& rhs) -{ - atomicAdd(&lhs, rhs); -} - template __device__ std::enable_if_t::value, void> atomic_accumulate_impl( thrust::detail::any_assign& /* dereferencing thrust::discard_iterator results in this type */ lhs, @@ -96,6 +89,13 @@ __device__ std::enable_if_t::value, void> atomic_accumulat // no-op } +template +__device__ std::enable_if_t::value, void> atomic_accumulate_impl(T& lhs, + T const& rhs) +{ + atomicAdd(&lhs, rhs); +} + template struct atomic_accumulate_thrust_tuple_impl { __device__ constexpr void compute(Iterator iter, TupleType const& value) const @@ -111,6 +111,22 @@ struct atomic_accumulate_thrust_tuple_impl { __device__ constexpr void compute(Iterator iter, TupleType const& value) const {} }; +template +struct warp_reduce_thrust_tuple_impl { + __device__ void compute(TupleType& tuple) const + { + auto& val = thrust::get(tuple); + for (auto offset = raft::warp_size() / 2; offset > 0; offset /= 2) { + val += __shfl_down_sync(raft::warp_full_mask(), val, offset); + } + } +}; + +template +struct warp_reduce_thrust_tuple_impl { + __device__ void compute(TupleType& tuple) const {} +}; + template struct block_reduce_thrust_tuple_impl { __device__ void compute(TupleType& tuple) const @@ -162,6 +178,15 @@ struct is_arithmetic_or_thrust_tuple_of_arithmetic> : std::integral_constant>::value> { }; +template +struct thrust_tuple_size_or_one : std::integral_constant { +}; + +template +struct thrust_tuple_size_or_one> + : std::integral_constant>::value> { +}; + template struct compute_thrust_tuple_element_sizes { auto operator()() const @@ -209,6 +234,17 @@ struct atomic_accumulate_thrust_tuple { } }; +template +struct warp_reduce_thrust_tuple { // only warp lane 0 has a valid result + __device__ TupleType operator()(TupleType const& tuple) const + { + size_t constexpr tuple_size = thrust::tuple_size::value; + auto ret = tuple; + detail::warp_reduce_thrust_tuple_impl().compute(ret); + return ret; + } +}; + template struct block_reduce_thrust_tuple { __device__ TupleType operator()(TupleType const& tuple) const diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index adcdd65f645..d9d7cb1a245 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -196,6 +196,60 @@ void bfs(raft::handle_t &handle, // explicit instantiation +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int32_t *distances, + int32_t *predecessors, + int32_t source_vertex, + bool direction_optimizing, + int32_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int32_t *distances, + int32_t *predecessors, + int32_t source_vertex, + bool direction_optimizing, + int32_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int32_t *distances, + int32_t *predecessors, + int32_t source_vertex, + bool direction_optimizing, + int32_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int32_t *distances, + int32_t *predecessors, + int32_t source_vertex, + bool direction_optimizing, + int32_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int64_t *distances, + int64_t *predecessors, + int64_t source_vertex, + bool direction_optimizing, + int64_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int64_t *distances, + int64_t *predecessors, + int64_t source_vertex, + bool direction_optimizing, + int64_t depth_limit, + bool do_expensive_check); + template void bfs(raft::handle_t &handle, graph_view_t const &graph_view, int32_t *distances, @@ -206,7 +260,16 @@ template void bfs(raft::handle_t &handle, bool do_expensive_check); template void bfs(raft::handle_t &handle, - graph_view_t const &graph_view, + graph_view_t const &graph_view, + int32_t *distances, + int32_t *predecessors, + int32_t source_vertex, + bool direction_optimizing, + int32_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, int32_t source_vertex, @@ -214,5 +277,32 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int32_t *distances, + int32_t *predecessors, + int32_t source_vertex, + bool direction_optimizing, + int32_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int64_t *distances, + int64_t *predecessors, + int64_t source_vertex, + bool direction_optimizing, + int64_t depth_limit, + bool do_expensive_check); + +template void bfs(raft::handle_t &handle, + graph_view_t const &graph_view, + int64_t *distances, + int64_t *predecessors, + int64_t source_vertex, + bool direction_optimizing, + int64_t depth_limit, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index eb791206c3c..02f02ac6792 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -191,8 +192,8 @@ std::vector segment_degree_sorted_vertex_partition(raft::handle_t cons CUDA_TRY(cudaStreamSynchronize( handle.get_stream())); // this is necessary as d_segment_offsets will become out-of-scope once - // this functions and returning a host variable which can be used right - // after return. + // this function returns and this function returns a host variable which + // can be used right after return. return h_segment_offsets; } @@ -279,11 +280,8 @@ graph_tget_handle_ptr()->get_comms().allreduce(&number_of_local_edges_sum, - &number_of_local_edges_sum, - 1, - raft::comms::op_t::SUM, - default_stream); + number_of_local_edges_sum = + host_scalar_allreduce(comm, number_of_local_edges_sum, default_stream); CUGRAPH_EXPECTS(number_of_local_edges_sum == this->get_number_of_edges(), "Invalid API parameter: the sum of local edges doe counts not match with " "number_of_local_edges."); diff --git a/cpp/src/experimental/katz_centrality.cu b/cpp/src/experimental/katz_centrality.cu index 95bf66dabd3..86b534bc0f3 100644 --- a/cpp/src/experimental/katz_centrality.cu +++ b/cpp/src/experimental/katz_centrality.cu @@ -16,8 +16,8 @@ #include #include -#include -#include +#include +#include #include #include #include @@ -190,6 +190,78 @@ void katz_centrality(raft::handle_t &handle, // explicit instantiation +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + float *betas, + float *katz_centralities, + float alpha, + float beta, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + double *betas, + double *katz_centralities, + double alpha, + double beta, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + float *betas, + float *katz_centralities, + float alpha, + float beta, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + double *betas, + double *katz_centralities, + double alpha, + double beta, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + float *betas, + float *katz_centralities, + float alpha, + float beta, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + double *betas, + double *katz_centralities, + double alpha, + double beta, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + template void katz_centrality(raft::handle_t &handle, graph_view_t const &graph_view, float *betas, @@ -203,7 +275,19 @@ template void katz_centrality(raft::handle_t &handle, bool do_expensive_check); template void katz_centrality(raft::handle_t &handle, - graph_view_t const &graph_view, + graph_view_t const &graph_view, + double *betas, + double *katz_centralities, + double alpha, + double beta, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, float *betas, float *katz_centralities, float alpha, @@ -214,5 +298,41 @@ template void katz_centrality(raft::handle_t &handle, bool normalize, bool do_expensive_check); +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + double *betas, + double *katz_centralities, + double alpha, + double beta, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + float *betas, + float *katz_centralities, + float alpha, + float beta, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + +template void katz_centrality(raft::handle_t &handle, + graph_view_t const &graph_view, + double *betas, + double *katz_centralities, + double alpha, + double beta, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool normalize, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 0eb5da952f3..5948d329d64 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -17,8 +17,8 @@ #include #include #include -#include -#include +#include +#include #include #include #include @@ -319,6 +319,84 @@ void pagerank(raft::handle_t& handle, // explicit instantiation +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + float* adj_matrix_row_out_weight_sums, + int32_t* personalization_vertices, + float* personalization_values, + int32_t personalization_vector_size, + float* pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + double* adj_matrix_row_out_weight_sums, + int32_t* personalization_vertices, + double* personalization_values, + int32_t personalization_vector_size, + double* pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + float* adj_matrix_row_out_weight_sums, + int32_t* personalization_vertices, + float* personalization_values, + int32_t personalization_vector_size, + float* pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + double* adj_matrix_row_out_weight_sums, + int32_t* personalization_vertices, + double* personalization_values, + int32_t personalization_vector_size, + double* pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + float* adj_matrix_row_out_weight_sums, + int64_t* personalization_vertices, + float* personalization_values, + int64_t personalization_vector_size, + float* pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + double* adj_matrix_row_out_weight_sums, + int64_t* personalization_vertices, + double* personalization_values, + int64_t personalization_vector_size, + double* pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + template void pagerank(raft::handle_t& handle, graph_view_t const& graph_view, float* adj_matrix_row_out_weight_sums, @@ -333,7 +411,20 @@ template void pagerank(raft::handle_t& handle, bool do_expensive_check); template void pagerank(raft::handle_t& handle, - graph_view_t const& graph_view, + graph_view_t const& graph_view, + double* adj_matrix_row_out_weight_sums, + int32_t* personalization_vertices, + double* personalization_values, + int32_t personalization_vector_size, + double* pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, float* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, float* personalization_values, @@ -345,5 +436,44 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + double* adj_matrix_row_out_weight_sums, + int32_t* personalization_vertices, + double* personalization_values, + int32_t personalization_vector_size, + double* pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + float* adj_matrix_row_out_weight_sums, + int64_t* personalization_vertices, + float* personalization_values, + int64_t personalization_vector_size, + float* pageranks, + float alpha, + float epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + +template void pagerank(raft::handle_t& handle, + graph_view_t const& graph_view, + double* adj_matrix_row_out_weight_sums, + int64_t* personalization_vertices, + double* personalization_values, + int64_t personalization_vector_size, + double* pageranks, + double alpha, + double epsilon, + size_t max_iterations, + bool has_initial_guess, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index 3c3f43631ec..e0679ad0d56 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include @@ -265,6 +265,54 @@ void sssp(raft::handle_t &handle, // explicit instantiation +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + float *distances, + int32_t *predecessors, + int32_t source_vertex, + float cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + double *distances, + int32_t *predecessors, + int32_t source_vertex, + double cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + float *distances, + int32_t *predecessors, + int32_t source_vertex, + float cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + double *distances, + int32_t *predecessors, + int32_t source_vertex, + double cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + float *distances, + int64_t *predecessors, + int64_t source_vertex, + float cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + double *distances, + int64_t *predecessors, + int64_t source_vertex, + double cutoff, + bool do_expensive_check); + template void sssp(raft::handle_t &handle, graph_view_t const &graph_view, float *distances, @@ -274,12 +322,44 @@ template void sssp(raft::handle_t &handle, bool do_expensive_check); template void sssp(raft::handle_t &handle, - graph_view_t const &graph_view, + graph_view_t const &graph_view, + double *distances, + int32_t *predecessors, + int32_t source_vertex, + double cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, float *distances, int32_t *predecessors, int32_t source_vertex, float cutoff, bool do_expensive_check); +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + double *distances, + int32_t *predecessors, + int32_t source_vertex, + double cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + float *distances, + int64_t *predecessors, + int64_t source_vertex, + float cutoff, + bool do_expensive_check); + +template void sssp(raft::handle_t &handle, + graph_view_t const &graph_view, + double *distances, + int64_t *predecessors, + int64_t source_vertex, + double cutoff, + bool do_expensive_check); + } // namespace experimental } // namespace cugraph From ae76f7da60e36ad69f8f12b36ed04f02b33e865b Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Thu, 1 Oct 2020 08:31:38 -0400 Subject: [PATCH 67/74] [REVIEW] Update ci/local/README.md (#1176) * update ci/local/README.md * Update CHANGELOG.md --- CHANGELOG.md | 1 + ci/local/README.md | 8 ++++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index eafd31a5933..467c14cb7bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - PR #1164 MG symmetrize and conda env updates - PR #1162 enhanced networkx testing - PR #1165 updated remaining algorithms to be NetworkX compatible +- PR #1176 Update ci/local/README.md ## Bug Fixes - PR #1131 Show style checker errors with set +e diff --git a/ci/local/README.md b/ci/local/README.md index 28bbe3590ea..07e2041d0a3 100644 --- a/ci/local/README.md +++ b/ci/local/README.md @@ -18,19 +18,19 @@ Build and test your local repository using a base gpuCI Docker image where: -H Show this help text -r Path to repository (defaults to working directory) - -i Use Docker image (default is gpuci/rapidsai-base:cuda10.0-ubuntu16.04-gcc5-py3.6) + -i Use Docker image (default is gpuci/rapidsai:${NIGHTLY_VERSION}-cuda10.1-devel-ubuntu16.04-py3.7) -s Skip building and testing and start an interactive shell in a container of the Docker image ``` Example Usage: -`bash build.sh -r ~/rapids/cugraph -i gpuci/rapidsai-base:cuda10.1-ubuntu16.04-gcc5-py3.6` +`bash build.sh -r ~/rapids/cugraph -i gpuci/rapidsai:0.16-cuda10.2-devel-ubuntu16.04-py3.7` For a full list of available gpuCI docker images, visit our [DockerHub](https://hub.docker.com/r/gpuci/rapidsai/tags) page. Style Check: ```bash $ bash ci/local/build.sh -r ~/rapids/cugraph -s -$ source activate gdf #Activate gpuCI conda environment +$ source activate rapids # Activate gpuCI conda environment $ cd rapids $ flake8 python ``` @@ -42,7 +42,7 @@ There are some caveats to be aware of when using this script, especially if you ### Docker Image Build Repository -The docker image will generate build artifacts in a folder on your machine located in the `root` directory of the repository you passed to the script. For the above example, the directory is named `~/rapids/cugraph/build_rapidsai-base_cuda10.1-ubuntu16.04-gcc5-py3.6/`. Feel free to remove this directory after the script is finished. +The docker image will generate build artifacts in a folder on your machine located in the `root` directory of the repository you passed to the script. For the above example, the directory is named `~/rapids/cugraph/build_rapidsai_cuda10.1-ubuntu16.04-py3.7/`. Feel free to remove this directory after the script is finished. *Note*: The script *will not* override your local build repository. Your local environment stays in tact. From 891bf43a5692520738890e3808982555bb1e49f3 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 1 Oct 2020 10:42:07 -0500 Subject: [PATCH 68/74] [REVIEW] ENH Integrate 2D shuffling and latest Louvain API (#1163) * Minor update to comment to describe array sizes. * Changed graph container to use smart pointers, added arg for instantiating legacy types and switch statements for it to factory function. * Added PR 1152 to CHANGELOG.md * Removing unnecessary .get() call on unique_ptr instance * Using make_unique() instead of new * Updated to call drop() correctly after cudf API update. * Added args to support calling get_vertex_identifiers(). * Style fixes, removed commented out code meant for a future change. * Updated comment with description of new 'identifiers' arg. * Safety commit, still WIP, does not compile - updates for 2D graph support and upcoming 2D shuffle support * safety commit, does not pass tests: updated enough to be able to run the MG Louvain test. * Updated call_louvain() to use the new graph_t types. Still WIP, needs louvain updates to compile. * WIP: updates for incorporating new 2D shuffle data, still does not pass test. * Adding updates from iroy30 for calling shuffle from louvain.py * Updated to extract and pass the partition_t info and call the graph_t ctor. Now having a problem finding the right subcommunicator. * Updates to set up subcomms - having a problem with something needed by subcomms not being initialized: "address not mapped to object at address (nil)" * Added p2p flag to comms initialize() to enable initialization of UCX endpoints needed for MG test. * safety commit: committing with debug prints to allow other team members to debug in parallel. * safety commit: more updates to address problems instantiating graph_t (using num edges for partition instead of global for edgelist) and for debugging (print statments). * Changing how row and col rank are obtained, added debug prints for edge lists info * Fixes to partition_t get_matrix_partition_major/minor methods based on feedback. * Update shuffle.py * Integrating changes from iroy30 to produce "option 1" shuffle output by default, with an option to enable "option 2", temporarily enabled graph expensive checks for debugging. * Addressed review feedback: made var names consistent, fixed weights=None bug in cython code, added copyright to shuffle.py, changed how ranks are retrieved from the raft handle. * Removed debug prints. * Added PR 1163 to CHANGELOG.md * Removed extra newlines accidentally added to clean up diff in the PR, updated comment in cython code. * Added specific newlines back so file does not differ unnecessarily. * Disabled graph_t expensive check that was left enabled for debugging. Co-authored-by: Rick Ratzel Co-authored-by: Iroy30 <41401566+Iroy30@users.noreply.github.com> --- CHANGELOG.md | 1 + cpp/include/utilities/cython.hpp | 122 +++---- cpp/src/community/louvain.cu | 38 ++ cpp/src/utilities/cython.cpp | 337 ++++++++++-------- python/cugraph/community/louvain.pxd | 3 +- python/cugraph/community/louvain_wrapper.pyx | 23 +- python/cugraph/dask/common/input_utils.py | 9 + python/cugraph/dask/community/louvain.pxd | 2 + python/cugraph/dask/community/louvain.py | 43 ++- .../dask/community/louvain_wrapper.pyx | 86 ++--- python/cugraph/structure/graph_primtypes.pxd | 34 +- python/cugraph/structure/shuffle.py | 70 +++- python/cugraph/tests/dask/test_mg_louvain.py | 2 +- 13 files changed, 462 insertions(+), 308 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 467c14cb7bc..5e7466b44a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - PR #1147 Added support for NetworkX graphs as input type - PR #1157 Louvain API update to use graph_container_t - PR #1151 MNMG extension for pattern accelerator based PageRank, Katz Centrality, BFS, and SSSP implementations (C++ part) +- PR #1163 Integrated 2D shuffling and Louvain updates ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index b53ef8451d7..eb2852fecdf 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -35,14 +35,14 @@ enum class graphTypeEnum : int { GraphCSCViewDouble, GraphCOOViewFloat, GraphCOOViewDouble, - graph_view_t_float, - graph_view_t_double, - graph_view_t_float_mg, - graph_view_t_double_mg, - graph_view_t_float_transposed, - graph_view_t_double_transposed, - graph_view_t_float_mg_transposed, - graph_view_t_double_mg_transposed + graph_t_float, + graph_t_double, + graph_t_float_mg, + graph_t_double_mg, + graph_t_float_transposed, + graph_t_double_transposed, + graph_t_float_mg_transposed, + graph_t_double_mg_transposed }; // Enum for the high-level type of GraphC??View* class to instantiate. @@ -66,22 +66,18 @@ struct graph_container_t { std::unique_ptr> GraphCSCViewDoublePtr; std::unique_ptr> GraphCOOViewFloatPtr; std::unique_ptr> GraphCOOViewDoublePtr; - std::unique_ptr> - graph_view_t_float_ptr; - std::unique_ptr> - graph_view_t_double_ptr; - std::unique_ptr> - graph_view_t_float_mg_ptr; - std::unique_ptr> - graph_view_t_double_mg_ptr; - std::unique_ptr> - graph_view_t_float_transposed_ptr; - std::unique_ptr> - graph_view_t_double_transposed_ptr; - std::unique_ptr> - graph_view_t_float_mg_transposed_ptr; - std::unique_ptr> - graph_view_t_double_mg_transposed_ptr; + std::unique_ptr> graph_t_float_ptr; + std::unique_ptr> graph_t_double_ptr; + std::unique_ptr> graph_t_float_mg_ptr; + std::unique_ptr> graph_t_double_mg_ptr; + std::unique_ptr> + graph_t_float_transposed_ptr; + std::unique_ptr> + graph_t_double_transposed_ptr; + std::unique_ptr> + graph_t_float_mg_transposed_ptr; + std::unique_ptr> + graph_t_double_mg_transposed_ptr; }; graph_container_t() : graph_ptr_union{nullptr}, graph_ptr_type{graphTypeEnum::null} {} @@ -96,12 +92,12 @@ struct graph_container_t { graph_container_t(const graph_container_t&) = delete; graph_container_t& operator=(const graph_container_t&) = delete; - void get_vertex_identifiers(void* c_identifier); - graphPtrUnion graph_ptr_union; graphTypeEnum graph_ptr_type; }; +// FIXME: finish description for vertex_partition_offsets +// // Factory function for populating an empty graph container with a new graph // object from basic types, and sets the corresponding meta-data. Args are: // @@ -120,16 +116,20 @@ struct graph_container_t { // raft::handle_t const& handle // Raft handle to be set on the new graph instance in the container // -// void* offsets, indices, weights -// Pointer to an array of values representing offsets, indices, and weights -// respectively. The value types of the array are specified using -// numberTypeEnum values separately (see below). offsets should be size +// void* src_vertices, dst_vertices, weights +// Pointer to an array of values representing source and destination vertices, +// and edge weights respectively. The value types of the array are specified +// using numberTypeEnum values separately (see below). offsets should be size // num_vertices+1, indices should be size num_edges, weights should also be // size num_edges // -// numberTypeEnum offsetType, indexType, weightType -// numberTypeEnum enum value describing the data type for the offsets, -// indices, and weights arrays respectively. These enum values are used to +// void* vertex_partition_offsets +// Pointer to an array of vertexType values representing offsets into the +// individual partitions for a multi-GPU paritioned graph. The offsets are used for ... +// +// numberTypeEnum vertexType, edgeType, weightType +// numberTypeEnum enum value describing the data type for the vertices, +// offsets, and weights arrays respectively. These enum values are used to // instantiate the proper templated graph type and for casting the arrays // accordingly. // @@ -137,21 +137,6 @@ struct graph_container_t { // The number of vertices and edges respectively in the graph represented by // the above arrays. // -// int* local_vertices, local_edges -// Arrays containing the number of vertices and number of edges, -// respectively. For example, if there are a total of 7 vertices, 16 edges, -// and the algorithm is distributed over 3 GPUs, the local_vertices may contain -// [2,2,3] and local_edges may contain [5,5,6]. -// NOTE: these parameters are only needed for legacy GraphC??View* classes and -// may not be present in future versions. -// -// int* local_offsets -// Array containing the offsets between the local_* arrays and those for the -// global graph, allowing the array to start at position zero yet still be -// mapped to a position in the global array. -// NOTE: this parameter is only needed for legacy GraphC??View* classes and -// may not be present in future versions. -// // bool transposed // true if the resulting graph object should store a transposed adjacency // matrix @@ -162,26 +147,43 @@ struct graph_container_t { // // FIXME: Should local_* values be void* as well? void populate_graph_container(graph_container_t& graph_container, - legacyGraphTypeEnum legacyType, - raft::handle_t const& handle, - void* offsets, - void* indices, + raft::handle_t& handle, + void* src_vertices, + void* dst_vertices, void* weights, - numberTypeEnum offsetType, - numberTypeEnum indexType, + void* vertex_partition_offsets, + numberTypeEnum vertexType, + numberTypeEnum edgeType, numberTypeEnum weightType, - int num_vertices, - int num_edges, - int* local_vertices, - int* local_edges, - int* local_offsets, + int num_partition_edges, + size_t num_global_vertices, + size_t num_global_edges, + size_t row_comm_size, // pcols + size_t col_comm_size, // prows bool transposed, bool multi_gpu); +// FIXME: comment this function +void populate_graph_container_legacy(graph_container_t& graph_container, + legacyGraphTypeEnum legacyType, + raft::handle_t const& handle, + void* offsets, + void* indices, + void* weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + size_t num_global_vertices, + size_t num_global_edges, + int* local_vertices, + int* local_edges, + int* local_offsets); + // Wrapper for calling Louvain using a graph container template std::pair call_louvain(raft::handle_t const& handle, graph_container_t const& graph_container, + void* identifiers, void* parts, size_t max_level, weight_t resolution); diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 2360544dc29..094d950010c 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -110,4 +110,42 @@ template std::pair louvain( size_t, double); +// instantations with multi_gpu = true +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int32_t *, + size_t, + float); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int32_t *, + size_t, + double); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int32_t *, + size_t, + float); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int32_t *, + size_t, + double); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int64_t *, + size_t, + float); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int64_t *, + size_t, + double); + } // namespace cugraph diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp index 166ce7792a7..82191307cd3 100644 --- a/cpp/src/utilities/cython.cpp +++ b/cpp/src/utilities/cython.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -27,28 +28,161 @@ namespace cython { // Populates a graph_container_t with a pointer to a new graph object and sets // the meta-data accordingly. The graph container owns the pointer and it is // assumed it will delete it on destruction. -// -// FIXME: Should local_* values be void* as well? void populate_graph_container(graph_container_t& graph_container, - legacyGraphTypeEnum legacyType, - raft::handle_t const& handle, - void* offsets, - void* indices, + raft::handle_t& handle, + void* src_vertices, + void* dst_vertices, void* weights, - numberTypeEnum offsetType, - numberTypeEnum indexType, + void* vertex_partition_offsets, + numberTypeEnum vertexType, + numberTypeEnum edgeType, numberTypeEnum weightType, - int num_vertices, - int num_edges, - int* local_vertices, - int* local_edges, - int* local_offsets, + int num_partition_edges, + size_t num_global_vertices, + size_t num_global_edges, + size_t row_comm_size, // pcols + size_t col_comm_size, // prows bool transposed, bool multi_gpu) { CUGRAPH_EXPECTS(graph_container.graph_ptr_type == graphTypeEnum::null, "populate_graph_container() can only be called on an empty container."); + bool do_expensive_check{false}; + bool hypergraph_partitioned{false}; + + // FIXME: Consider setting up the subcomms right after initializing comms, no + // need to delay to this point. + // Setup the subcommunicators needed for this partition on the handle. + partition_2d::subcomm_factory_t subcomm_factory(handle, + row_comm_size); + + // FIXME: once the subcomms are set up earlier (outside this function), remove + // the row/col_comm_size params and retrieve them from the handle (commented + // out lines below) + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + // auto const row_comm_size = row_comm.get_size(); // pcols + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + // auto const col_comm_size = col_comm.get_size(); // prows + + // Copy the contents of the vertex_partition_offsets (host array) to a vector + // as needed by the partition_t ctor. + int* vertex_partition_offsets_array = reinterpret_cast(vertex_partition_offsets); + + // FIXME: this needs to be vertex_t, not int? + std::vector vertex_partition_offsets_vect( + vertex_partition_offsets_array, + vertex_partition_offsets_array + (col_comm_size * row_comm_size) + 1); + + experimental::partition_t partition(vertex_partition_offsets_vect, + hypergraph_partitioned, + row_comm_size, + col_comm_size, + row_comm_rank, + col_comm_rank); + + experimental::graph_properties_t graph_props{.is_symmetric = false, .is_multigraph = false}; + + auto src_vertices_array = reinterpret_cast(src_vertices); + auto dst_vertices_array = reinterpret_cast(dst_vertices); + + if (multi_gpu) { + bool sorted_by_global_degree_within_vertex_partition{false}; + + if (weightType == numberTypeEnum::floatType) { + // vector of 1 representing the indivdual partition for this worker + std::vector> edge_lists; + edge_lists.push_back( + experimental::edgelist_t{src_vertices_array, + dst_vertices_array, + reinterpret_cast(weights), + num_partition_edges}); + auto g = new experimental::graph_t( + handle, + edge_lists, + partition, + num_global_vertices, + num_global_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); + + graph_container.graph_ptr_union.graph_t_float_mg_ptr = + std::unique_ptr>(g); + graph_container.graph_ptr_type = graphTypeEnum::graph_t_float_mg; + + } else { + std::vector> edge_lists; + edge_lists.push_back( + experimental::edgelist_t{src_vertices_array, + dst_vertices_array, + reinterpret_cast(weights), + num_partition_edges}); + auto g = new experimental::graph_t( + handle, + edge_lists, + partition, + num_global_vertices, + num_global_edges, + graph_props, + sorted_by_global_degree_within_vertex_partition, + do_expensive_check); + + graph_container.graph_ptr_union.graph_t_double_mg_ptr = + std::unique_ptr>(g); + graph_container.graph_ptr_type = graphTypeEnum::graph_t_double_mg; + } + + } else { + bool sorted_by_degree{false}; + + if (weightType == numberTypeEnum::floatType) { + experimental::edgelist_t edge_list{src_vertices_array, + dst_vertices_array, + reinterpret_cast(weights), + num_partition_edges}; + auto g = new experimental::graph_t( + handle, edge_list, num_global_vertices, graph_props, sorted_by_degree, do_expensive_check); + + graph_container.graph_ptr_union.graph_t_float_ptr = + std::unique_ptr>(g); + graph_container.graph_ptr_type = graphTypeEnum::graph_t_float; + + } else { + experimental::edgelist_t edge_list{src_vertices_array, + dst_vertices_array, + reinterpret_cast(weights), + num_partition_edges}; + auto g = new experimental::graph_t( + handle, edge_list, num_global_vertices, graph_props, sorted_by_degree, do_expensive_check); + + graph_container.graph_ptr_union.graph_t_double_ptr = + std::unique_ptr>(g); + graph_container.graph_ptr_type = graphTypeEnum::graph_t_double; + } + } +} + +void populate_graph_container_legacy(graph_container_t& graph_container, + legacyGraphTypeEnum legacyType, + raft::handle_t const& handle, + void* offsets, + void* indices, + void* weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + size_t num_global_vertices, + size_t num_global_edges, + int* local_vertices, + int* local_edges, + int* local_offsets) +{ + CUGRAPH_EXPECTS(graph_container.graph_ptr_type == graphTypeEnum::null, + "populate_graph_container() can only be called on an empty container."); + // FIXME: This is soon-to-be legacy code left in place until the new graph_t // class is supported everywhere else. Remove everything down to the comment // line after the return stmnt. @@ -60,8 +194,8 @@ void populate_graph_container(graph_container_t& graph_container, std::make_unique>(reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), - num_vertices, - num_edges); + num_global_vertices, + num_global_edges); graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; (graph_container.graph_ptr_union.GraphCSRViewFloatPtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -73,8 +207,8 @@ void populate_graph_container(graph_container_t& graph_container, std::make_unique>(reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), - num_vertices, - num_edges); + num_global_vertices, + num_global_edges); graph_container.graph_ptr_type = graphTypeEnum::GraphCSCViewFloat; (graph_container.graph_ptr_union.GraphCSCViewFloatPtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -86,8 +220,8 @@ void populate_graph_container(graph_container_t& graph_container, std::make_unique>(reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), - num_vertices, - num_edges); + num_global_vertices, + num_global_edges); graph_container.graph_ptr_type = graphTypeEnum::GraphCOOViewFloat; (graph_container.graph_ptr_union.GraphCOOViewFloatPtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -103,8 +237,8 @@ void populate_graph_container(graph_container_t& graph_container, std::make_unique>(reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), - num_vertices, - num_edges); + num_global_vertices, + num_global_edges); graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; (graph_container.graph_ptr_union.GraphCSRViewDoublePtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -116,8 +250,8 @@ void populate_graph_container(graph_container_t& graph_container, std::make_unique>(reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), - num_vertices, - num_edges); + num_global_vertices, + num_global_edges); graph_container.graph_ptr_type = graphTypeEnum::GraphCSCViewDouble; (graph_container.graph_ptr_union.GraphCSCViewDoublePtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -129,8 +263,8 @@ void populate_graph_container(graph_container_t& graph_container, std::make_unique>(reinterpret_cast(offsets), reinterpret_cast(indices), reinterpret_cast(weights), - num_vertices, - num_edges); + num_global_vertices, + num_global_edges); graph_container.graph_ptr_type = graphTypeEnum::GraphCOOViewDouble; (graph_container.graph_ptr_union.GraphCOOViewDoublePtr) ->set_local_data(local_vertices, local_edges, local_offsets); @@ -140,155 +274,50 @@ void populate_graph_container(graph_container_t& graph_container, } } return; - //////////////////////////////////////////////////////////////////////////////////// - - bool do_expensive_check{false}; - bool sorted_by_global_degree_within_vertex_partition{false}; - experimental::graph_properties_t graph_props{.is_symmetric = false, .is_multigraph = false}; - - if (multi_gpu) { - std::vector adjmatrix_partition_offsets_vect; - std::vector adjmatrix_partition_indices_vect; - std::vector vertex_partition_segment_offsets_vect; - std::vector vertex_partition_offsets; - experimental::partition_t partition(vertex_partition_offsets, false, 0, 0, 0, 0); - - if (weightType == numberTypeEnum::floatType) { - std::vector adjmatrix_partition_weights_vect; - auto g = new experimental::graph_view_t( - handle, - adjmatrix_partition_offsets_vect, - adjmatrix_partition_indices_vect, - adjmatrix_partition_weights_vect, - vertex_partition_segment_offsets_vect, - partition, - num_vertices, - num_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); - graph_container.graph_ptr_union.graph_view_t_float_mg_ptr = - std::unique_ptr>(g); - graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float_mg; - - } else { - std::vector adjmatrix_partition_weights_vect; - auto g = new experimental::graph_view_t( - handle, - adjmatrix_partition_offsets_vect, - adjmatrix_partition_indices_vect, - adjmatrix_partition_weights_vect, - vertex_partition_segment_offsets_vect, - partition, - num_vertices, - num_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); - graph_container.graph_ptr_union.graph_view_t_double_mg_ptr = - std::unique_ptr>(g); - graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double_mg; - } - - } else { - auto offsets_array = reinterpret_cast(offsets); - auto indices_array = reinterpret_cast(indices); - std::vector segment_offsets; - - if (weightType == numberTypeEnum::floatType) { - auto weights_array = reinterpret_cast(weights); - auto g = new experimental::graph_view_t( - handle, - offsets_array, - indices_array, - weights_array, - segment_offsets, - num_vertices, - num_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); - graph_container.graph_ptr_union.graph_view_t_float_ptr = - std::unique_ptr>(g); - graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_float; - - } else { - auto weights_array = reinterpret_cast(weights); - auto g = new experimental::graph_view_t( - handle, - offsets_array, - indices_array, - weights_array, - segment_offsets, - num_vertices, - num_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); - graph_container.graph_ptr_union.graph_view_t_double_ptr = - std::unique_ptr>(g); - graph_container.graph_ptr_type = graphTypeEnum::graph_view_t_double; - } - } } -void graph_container_t::get_vertex_identifiers(void* c_identifier) -{ - CUGRAPH_EXPECTS(graph_ptr_type != graphTypeEnum::null, - "get_vertex_identifiers() cannot be called on an uninitialized container"); - - switch (graph_ptr_type) { - case graphTypeEnum::GraphCSRViewFloat: { - graph_ptr_union.GraphCSRViewFloatPtr->get_vertex_identifiers( - static_cast(c_identifier)); - } break; - case graphTypeEnum::GraphCSCViewFloat: { - graph_ptr_union.GraphCSCViewFloatPtr->get_vertex_identifiers( - static_cast(c_identifier)); - } break; - case graphTypeEnum::GraphCOOViewFloat: { - graph_ptr_union.GraphCOOViewFloatPtr->get_vertex_identifiers( - static_cast(c_identifier)); - } break; - case graphTypeEnum::GraphCSRViewDouble: { - graph_ptr_union.GraphCSRViewDoublePtr->get_vertex_identifiers( - static_cast(c_identifier)); - } break; - case graphTypeEnum::GraphCSCViewDouble: { - graph_ptr_union.GraphCSCViewDoublePtr->get_vertex_identifiers( - static_cast(c_identifier)); - } break; - case graphTypeEnum::GraphCOOViewDouble: { - graph_ptr_union.GraphCOOViewDoublePtr->get_vertex_identifiers( - static_cast(c_identifier)); - } break; - default: { - CUGRAPH_FAIL("unexpected weight type"); - } - } -} +//////////////////////////////////////////////////////////////////////////////// // Wrapper for calling Louvain using a graph container template std::pair call_louvain(raft::handle_t const& handle, graph_container_t const& graph_container, + void* identifiers, void* parts, size_t max_level, weight_t resolution) { - std::pair results; + std::pair results; // FIXME: the only graph types currently in the container have ints for // vertex_t and edge_t types. In the future, additional types for vertices and // edges will be available, and when that happens, additional castings will be // needed for the 'parts' arg in particular. For now, it is hardcoded to int. - if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { + if (graph_container.graph_ptr_type == graphTypeEnum::graph_t_float_mg) { + results = louvain(handle, + graph_container.graph_ptr_union.graph_t_float_mg_ptr->view(), + reinterpret_cast(parts), + max_level, + static_cast(resolution)); + + } else if (graph_container.graph_ptr_type == graphTypeEnum::graph_t_double_mg) { + results = louvain(handle, + graph_container.graph_ptr_union.graph_t_double_mg_ptr->view(), + reinterpret_cast(parts), + max_level, + static_cast(resolution)); + } else if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { + // if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { + graph_container.graph_ptr_union.GraphCSCViewFloatPtr->get_vertex_identifiers( + static_cast(identifiers)); results = louvain(handle, *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), reinterpret_cast(parts), max_level, static_cast(resolution)); - } else { + } else if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewDouble) { + graph_container.graph_ptr_union.GraphCSCViewDoublePtr->get_vertex_identifiers( + static_cast(identifiers)); results = louvain(handle, *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), reinterpret_cast(parts), @@ -302,12 +331,14 @@ std::pair call_louvain(raft::handle_t const& handle, // Explicit instantiations template std::pair call_louvain(raft::handle_t const& handle, graph_container_t const& graph_container, + void* identifiers, void* parts, size_t max_level, float resolution); template std::pair call_louvain(raft::handle_t const& handle, graph_container_t const& graph_container, + void* identifiers, void* parts, size_t max_level, double resolution); diff --git a/python/cugraph/community/louvain.pxd b/python/cugraph/community/louvain.pxd index 7b15b87f62b..eca15ba3d20 100644 --- a/python/cugraph/community/louvain.pxd +++ b/python/cugraph/community/louvain.pxd @@ -26,6 +26,7 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": cdef pair[size_t, weight_t] call_louvain[weight_t]( const handle_t &handle, const graph_container_t &g, - void *louvain_parts, + void *identifiers, + void *parts, size_t max_level, weight_t resolution) except + diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx index 6a8c06b948d..124fd9365dc 100644 --- a/python/cugraph/community/louvain_wrapper.pyx +++ b/python/cugraph/community/louvain_wrapper.pyx @@ -79,21 +79,19 @@ def louvain(input_graph, max_level, resolution): # FIXME: The excessive casting for the enum arg is needed to make cython # understand how to pass the enum value (this is the same pattern # used by cudf). This will not be needed with Cython 3.0 - populate_graph_container(graph_container, - ((legacyGraphTypeEnum.CSR)), - handle_[0], - c_offsets, c_indices, c_weights, - ((numberTypeEnum.intType)), - ((numberTypeEnum.intType)), - ((weightTypeMap[weights.dtype])), - num_verts, num_edges, - c_local_verts, c_local_edges, c_local_offsets, - False, True) # store_transposed, multi_gpu - - graph_container.get_vertex_identifiers(c_identifier) + populate_graph_container_legacy(graph_container, + ((legacyGraphTypeEnum.CSR)), + handle_[0], + c_offsets, c_indices, c_weights, + ((numberTypeEnum.intType)), + ((numberTypeEnum.intType)), + ((weightTypeMap[weights.dtype])), + num_verts, num_edges, + c_local_verts, c_local_edges, c_local_offsets) if weights.dtype == np.float32: num_level, final_modularity_float = c_louvain.call_louvain[float](handle_[0], graph_container, + c_identifier, c_partition, max_level, resolution) @@ -101,6 +99,7 @@ def louvain(input_graph, max_level, resolution): final_modularity = final_modularity_float else: num_level, final_modularity_double = c_louvain.call_louvain[double](handle_[0], graph_container, + c_identifier, c_partition, max_level, resolution) diff --git a/python/cugraph/dask/common/input_utils.py b/python/cugraph/dask/common/input_utils.py index c08582c1774..0140c9f06f9 100644 --- a/python/cugraph/dask/common/input_utils.py +++ b/python/cugraph/dask/common/input_utils.py @@ -223,3 +223,12 @@ def get_local_data(input_graph, by, load_balance=True): def get_mg_batch_data(dask_cudf_data): data = DistributedDataHandler.create(data=dask_cudf_data) return data + + +def get_distributed_data(input_ddf): + ddf = input_ddf + comms = Comms.get_comms() + data = DistributedDataHandler.create(data=ddf) + if data.worker_info is None and comms is not None: + data.calculate_worker_and_rank_info(comms) + return data diff --git a/python/cugraph/dask/community/louvain.pxd b/python/cugraph/dask/community/louvain.pxd index 1090ec18660..b6b4cd23143 100644 --- a/python/cugraph/dask/community/louvain.pxd +++ b/python/cugraph/dask/community/louvain.pxd @@ -19,11 +19,13 @@ from libcpp.utility cimport pair from cugraph.structure.graph_primtypes cimport * + cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": cdef pair[size_t, weight_t] call_louvain[weight_t]( const handle_t &handle, const graph_container_t &g, + void *identifiers, void *parts, size_t max_level, weight_t resolution) except + diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index c183d54e85a..3b4132e821b 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -14,16 +14,30 @@ from dask.distributed import wait, default_client import cugraph.comms.comms as Comms -from cugraph.dask.common.input_utils import get_local_data - +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.structure.shuffle import shuffle from cugraph.dask.community import louvain_wrapper as c_mg_louvain -def call_louvain(sID, data, local_data, max_level, resolution): +def call_louvain(sID, + data, + num_verts, + num_edges, + partition_row_size, + partition_col_size, + vertex_partition_offsets, + max_level, + resolution): + wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) + return c_mg_louvain.louvain(data[0], - local_data, + num_verts, + num_edges, + partition_row_size, + partition_col_size, + vertex_partition_offsets, wid, handle, max_level, @@ -62,23 +76,30 @@ def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): # raise Exception("input graph must be undirected") client = default_client() - - if(input_graph.local_data is not None and - input_graph.local_data['by'] == 'src'): - data = input_graph.local_data['data'] - else: - data = get_local_data(input_graph, by='src', load_balance=load_balance) + input_graph.compute_renumber_edge_list(transposed=False) + (ddf, + num_verts, + partition_row_size, + partition_col_size, + vertex_partition_offsets) = shuffle(input_graph, transposed=False) + num_edges = len(ddf) + data = get_distributed_data(ddf) result = dict([(data.worker_info[wf[0]]["rank"], client.submit( call_louvain, Comms.get_session_id(), wf[1], - data.local_data, + num_verts, + num_edges, + partition_row_size, + partition_col_size, + vertex_partition_offsets, max_iter, resolution, workers=[wf[0]])) for idx, wf in enumerate(data.worker_to_parts.items())]) + wait(result) (parts, modularity_score) = result[0].result() diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index ec17653e62b..86dc3bed524 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -25,7 +25,16 @@ import cudf import numpy as np -def louvain(input_df, local_data, rank, handle, max_level, resolution): +def louvain(input_df, + num_global_verts, + num_global_edges, + partition_row_size, + partition_col_size, + vertex_partition_offsets, + rank, + handle, + max_level, + resolution): """ Call MG Louvain """ @@ -38,8 +47,7 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): final_modularity = None # FIXME: much of this code is common to other algo wrappers, consider adding - # this to a shared utility as well (extracting pointers from - # dataframes, handling local_data, etc.) + # this to a shared utility as well src = input_df['src'] dst = input_df['dst'] @@ -48,50 +56,25 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): else: weights = None - num_verts = local_data['verts'].sum() - num_edges = local_data['edges'].sum() + # FIXME: needs to be edge_t type not int + cdef int num_partition_edges = len(src) - local_offset = local_data['offsets'][rank] - dst = dst - local_offset - num_local_verts = local_data['verts'][rank] - num_local_edges = len(src) - - cdef uintptr_t c_local_verts = local_data['verts'].__array_interface__['data'][0] - cdef uintptr_t c_local_edges = local_data['edges'].__array_interface__['data'][0] - cdef uintptr_t c_local_offsets = local_data['offsets'].__array_interface__['data'][0] - - [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) - if weights is not None: - if weights.dtype in [np.float32, np.double]: - [weights] = graph_primtypes_wrapper.datatype_cast([weights], [weights.dtype]) - else: - raise TypeError(f"unsupported type {weights.dtype} for weights") - - _offsets, indices, weights = graph_primtypes_wrapper.coo2csr(dst, src, weights) - offsets = _offsets[:num_local_verts + 1] - del _offsets - - # Create the output dataframe - df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - df['partition'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - - cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] - cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] - cdef uintptr_t c_weights = NULL + # COO + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL if weights is not None: - c_weights = weights.__cuda_array_interface__['data'][0] - cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0] - cdef uintptr_t c_partition = df['partition'].__cuda_array_interface__['data'][0] + c_edge_weights = weights.__cuda_array_interface__['data'][0] - cdef float final_modularity_float = 1.0 - cdef double final_modularity_double = 1.0 - cdef int num_level = 0 + # data is on device, move to host (.values_host) since graph_t in + # graph_container needs a host array + cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets.values_host.__array_interface__['data'][0] # FIXME: Offsets and indices are currently hardcoded to int, but this may # not be acceptable in the future. weightTypeMap = {np.dtype("float32") : numberTypeEnum.floatType, np.dtype("double") : numberTypeEnum.doubleType} + weightType = weightTypeMap[weights.dtype] if weights is not None else numberTypeEnum.floatType cdef graph_container_t graph_container @@ -99,24 +82,33 @@ def louvain(input_df, local_data, rank, handle, max_level, resolution): # understand how to pass the enum value (this is the same pattern # used by cudf). This will not be needed with Cython 3.0 populate_graph_container(graph_container, - ((legacyGraphTypeEnum.CSR)), handle_[0], - c_offsets, c_indices, c_weights, + c_src_vertices, c_dst_vertices, c_edge_weights, + c_vertex_partition_offsets, ((numberTypeEnum.intType)), ((numberTypeEnum.intType)), - ((weightTypeMap[weights.dtype])), - num_verts, num_local_edges, - c_local_verts, c_local_edges, c_local_offsets, + ((weightType)), + num_partition_edges, + num_global_verts, num_global_edges, + partition_row_size, partition_col_size, False, True) # store_transposed, multi_gpu - if weights.dtype == np.float32: + # Create the output dataframe + df = cudf.DataFrame() + df['vertex'] = cudf.Series(np.zeros(num_global_verts, dtype=np.int32)) + df['partition'] = cudf.Series(np.zeros(num_global_verts, dtype=np.int32)) + + cdef uintptr_t c_identifiers = df['vertex'].__cuda_array_interface__['data'][0] + cdef uintptr_t c_partition = df['partition'].__cuda_array_interface__['data'][0] + + if weightType == numberTypeEnum.floatType: num_level, final_modularity_float = c_louvain.call_louvain[float]( - handle_[0], graph_container, c_partition, max_level, resolution) + handle_[0], graph_container, c_identifiers, c_partition, max_level, resolution) final_modularity = final_modularity_float else: num_level, final_modularity_double = c_louvain.call_louvain[double]( - handle_[0], graph_container, c_partition, max_level, resolution) + handle_[0], graph_container, c_identifiers, c_partition, max_level, resolution) final_modularity = final_modularity_double return df, final_modularity diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index e051e3e0a4e..42377506d26 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -200,15 +200,33 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": floatType "cugraph::cython::numberTypeEnum::floatType" doubleType "cugraph::cython::numberTypeEnum::doubleType" + cdef cppclass graph_container_t: + pass + + cdef void populate_graph_container( + graph_container_t &graph_container, + handle_t &handle, + void *src_vertices, + void *dst_vertices, + void *weights, + void *vertex_partition_offsets, + numberTypeEnum vertexType, + numberTypeEnum edgeType, + numberTypeEnum weightType, + int num_partition_edges, + size_t num_global_vertices, + size_t num_global_edges, + size_t row_comm_size, + size_t col_comm_size, + bool transposed, + bool multi_gpu) except + + ctypedef enum legacyGraphTypeEnum: CSR "cugraph::cython::legacyGraphTypeEnum::CSR" CSC "cugraph::cython::legacyGraphTypeEnum::CSC" COO "cugraph::cython::legacyGraphTypeEnum::COO" - cdef cppclass graph_container_t: - void get_vertex_identifiers(void *) - - cdef void populate_graph_container( + cdef void populate_graph_container_legacy( graph_container_t &graph_container, legacyGraphTypeEnum legacyType, const handle_t &handle, @@ -218,10 +236,8 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": numberTypeEnum offsetType, numberTypeEnum indexType, numberTypeEnum weightType, - int num_vertices, - int num_edges, + size_t num_global_vertices, + size_t num_global_edges, int *local_vertices, int *local_edges, - int *local_offsets, - bool transposed, - bool multi_gpu) except + + int *local_offsets) except + diff --git a/python/cugraph/structure/shuffle.py b/python/cugraph/structure/shuffle.py index 88791dd0f71..ea3c28463d7 100644 --- a/python/cugraph/structure/shuffle.py +++ b/python/cugraph/structure/shuffle.py @@ -1,3 +1,16 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import math from dask.dataframe.shuffle import rearrange_by_column import cudf @@ -17,7 +30,7 @@ def get_2D_div(ngpus): def _set_partitions_pre(df, vertex_row_partitions, vertex_col_partitions, - prows, pcols, transposed): + prows, pcols, transposed, partition_type): if transposed: r = df['dst'] c = df['src'] @@ -26,11 +39,15 @@ def _set_partitions_pre(df, vertex_row_partitions, vertex_col_partitions, c = df['dst'] r_div = vertex_row_partitions.searchsorted(r, side='right')-1 c_div = vertex_col_partitions.searchsorted(c, side='right')-1 - partitions = r_div % prows + c_div * prows + + if partition_type == 1: + partitions = r_div * pcols + c_div + else: + partitions = r_div % prows + c_div * prows return partitions -def shuffle(dg, transposed=False, prows=None, pcols=None): +def shuffle(dg, transposed=False, prows=None, pcols=None, partition_type=1): """ Shuffles the renumbered input distributed graph edgelist into ngpu partitions. The number of processes/gpus P = prows*pcols. The 2D @@ -42,7 +59,10 @@ def shuffle(dg, transposed=False, prows=None, pcols=None): ddf = dg.edgelist.edgelist_df ngpus = get_n_workers() if prows is None and pcols is None: - prows, pcols = get_2D_div(ngpus) + if partition_type == 1: + pcols, prows = get_2D_div(ngpus) + else: + prows, pcols = get_2D_div(ngpus) else: if prows is not None and pcols is not None: if ngpus != prows*pcols: @@ -62,24 +82,39 @@ def shuffle(dg, transposed=False, prows=None, pcols=None): renumber_vertex_count = dg.renumber_map.implementation.\ ddf.map_partitions(len).compute() renumber_vertex_cumsum = renumber_vertex_count.cumsum() - src_dtype = ddf['src'].dtype - dst_dtype = ddf['dst'].dtype - vertex_row_partitions = cudf.Series([0], dtype=src_dtype) - vertex_row_partitions = vertex_row_partitions.append(cudf.Series( - renumber_vertex_cumsum, dtype=src_dtype)) - num_verts = vertex_row_partitions.iloc[-1] + if transposed: + row_dtype = ddf['dst'].dtype + col_dtype = ddf['src'].dtype + else: + row_dtype = ddf['src'].dtype + col_dtype = ddf['dst'].dtype + + vertex_partition_offsets = cudf.Series([0], dtype=row_dtype) + vertex_partition_offsets = vertex_partition_offsets.append(cudf.Series( + renumber_vertex_cumsum, dtype=row_dtype)) + num_verts = vertex_partition_offsets.iloc[-1] + if partition_type == 1: + vertex_row_partitions = [] + for i in range(prows + 1): + vertex_row_partitions.append( + vertex_partition_offsets.iloc[i*pcols]) + vertex_row_partitions = cudf.Series( + vertex_row_partitions, dtype=row_dtype) + else: + vertex_row_partitions = vertex_partition_offsets vertex_col_partitions = [] for i in range(pcols + 1): - vertex_col_partitions.append(vertex_row_partitions.iloc[i*prows]) - vertex_col_partitions = cudf.Series(vertex_col_partitions, dtype=dst_dtype) + vertex_col_partitions.append(vertex_partition_offsets.iloc[i*prows]) + vertex_col_partitions = cudf.Series(vertex_col_partitions, dtype=col_dtype) meta = ddf._meta._constructor_sliced([0]) partitions = ddf.map_partitions( _set_partitions_pre, vertex_row_partitions=vertex_row_partitions, vertex_col_partitions=vertex_col_partitions, prows=prows, - pcols=pcols, transposed=transposed, meta=meta) + pcols=pcols, transposed=transposed, partition_type=partition_type, + meta=meta) ddf2 = ddf.assign(_partitions=partitions) ddf3 = rearrange_by_column( ddf2, @@ -90,4 +125,11 @@ def shuffle(dg, transposed=False, prows=None, pcols=None): ignore_index=True, ).drop(columns=["_partitions"]) - return ddf3, num_verts, vertex_row_partitions + partition_row_size = pcols + partition_col_size = prows + + return (ddf3, + num_verts, + partition_row_size, + partition_col_size, + vertex_partition_offsets) diff --git a/python/cugraph/tests/dask/test_mg_louvain.py b/python/cugraph/tests/dask/test_mg_louvain.py index b4655b02a8c..56401e338a4 100644 --- a/python/cugraph/tests/dask/test_mg_louvain.py +++ b/python/cugraph/tests/dask/test_mg_louvain.py @@ -47,7 +47,7 @@ def client_connection(): # setup cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client From 765d776eec12f4e403e86e3048d7ab598730fee0 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Thu, 1 Oct 2020 16:42:52 -0500 Subject: [PATCH 69/74] [REVIEW] BLD Added RAPIDS cpp packages to cugraph dev env (#1169) * added cpp packages to dev env * changelog --- CHANGELOG.md | 1 + conda/environments/cugraph_dev_cuda10.1.yml | 2 ++ conda/environments/cugraph_dev_cuda10.2.yml | 2 ++ conda/environments/cugraph_dev_cuda11.0.yml | 2 ++ 4 files changed, 7 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5e7466b44a1..0bfee6ee990 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -24,6 +24,7 @@ - PR #1152 graph container cleanup, added arg for instantiating legacy types and switch statements to factory function - PR #1164 MG symmetrize and conda env updates - PR #1162 enhanced networkx testing +- PR #1169 Added RAPIDS cpp packages to cugraph dev env - PR #1165 updated remaining algorithms to be NetworkX compatible - PR #1176 Update ci/local/README.md diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index c9d04da58f4..05113f3d7ee 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -6,7 +6,9 @@ channels: - conda-forge dependencies: - cudf=0.16.* +- libcudf=0.16.* - rmm=0.16.* +- librmm=0.16.* - dask>=2.12.0 - distributed>=2.12.0 - dask-cuda=0.16* diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 0285d9b2b10..02537e4bf6c 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -6,7 +6,9 @@ channels: - conda-forge dependencies: - cudf=0.16.* +- libcudf=0.16.* - rmm=0.16.* +- librmm=0.16.* - dask>=2.12.0 - distributed>=2.12.0 - dask-cuda=0.16* diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index 1b6d1400897..efd4b57dcc4 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -6,7 +6,9 @@ channels: - conda-forge dependencies: - cudf=0.16.* +- libcudf=0.16.* - rmm=0.16.* +- librmm=0.16.* - dask>=2.12.0 - distributed>=2.12.0 - dask-cuda=0.16* From a4b1bb3beae65eda2e866340388bcb009f6b3036 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Fri, 2 Oct 2020 07:45:21 -0500 Subject: [PATCH 70/74] [REVEIW] BLD Adopt RAFT model for cuhornet dependency (#1180) * Update CMakeLists.txt * Update CHANGELOG.md --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0bfee6ee990..61c532d8f79 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -35,6 +35,7 @@ - PR #1158 Pass size_t* & size_t* instead of size_t[] & int[] for raft allgatherv's input parameters recvcounts & displs - PR #1168 Disabled MG tests on single GPU - PR #1166 Fix misspelling of function calls in asserts causing debug build to fail +- PR #1180 BLD Adopt RAFT model for cuhornet dependency # cuGraph 0.15.0 (26 Aug 2020) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 41cf82dfb39..40ab12ade94 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -185,7 +185,7 @@ set(CUHORNET_INCLUDE_DIR ${CUHORNET_DIR}/src/cuhornet CACHE STRING "Path to cuho ExternalProject_Add(cuhornet GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG main + GIT_TAG 7e8be7e439c2765384c40b004806aabae2d74666 PREFIX ${CUHORNET_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" From fdfa5849365cd7a442ecbcfa11976f4fb21601e0 Mon Sep 17 00:00:00 2001 From: Dillon Cullinan Date: Fri, 2 Oct 2020 15:33:51 -0400 Subject: [PATCH 71/74] [REVIEW] FIX Fix notebook error handling (#1181) * FIX Fix notebook error handlinig * DOC Changelog update --- CHANGELOG.md | 2 ++ ci/gpu/build.sh | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61c532d8f79..e464aa5800c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -36,6 +36,8 @@ - PR #1168 Disabled MG tests on single GPU - PR #1166 Fix misspelling of function calls in asserts causing debug build to fail - PR #1180 BLD Adopt RAFT model for cuhornet dependency +- PR #1181 Fix notebook error handling in CI + # cuGraph 0.15.0 (26 Aug 2020) diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 3cef2e56877..83f234f787b 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -53,7 +53,7 @@ logger "Check GPU usage..." nvidia-smi logger "Activate conda env..." -source activate gdf +source activate rapids logger "conda install required packages" conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ @@ -98,6 +98,10 @@ fi # TEST - Run GoogleTest and py.tests for libcugraph and cuGraph ################################################################################ +set +e -Eo pipefail +EXITCODE=0 +trap "EXITCODE=1" ERR + if hasArg --skip-tests; then logger "Skipping Tests..." else @@ -122,3 +126,5 @@ else ${WORKSPACE}/ci/gpu/test-notebooks.sh 2>&1 | tee nbtest.log python ${WORKSPACE}/ci/utils/nbtestlog2junitxml.py nbtest.log fi + +return ${EXITCODE} From 60b9b85d79fd677ff7b11befc5d179c8bb050407 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 2 Oct 2020 15:14:47 -0500 Subject: [PATCH 72/74] ENH Refactored cython graph factory code to scale to additional data types (#1178) * Minor update to comment to describe array sizes. * Changed graph container to use smart pointers, added arg for instantiating legacy types and switch statements for it to factory function. * Added PR 1152 to CHANGELOG.md * Removing unnecessary .get() call on unique_ptr instance * Using make_unique() instead of new * Updated to call drop() correctly after cudf API update. * Added args to support calling get_vertex_identifiers(). * Style fixes, removed commented out code meant for a future change. * Updated comment with description of new 'identifiers' arg. * Safety commit, still WIP, does not compile - updates for 2D graph support and upcoming 2D shuffle support * safety commit, does not pass tests: updated enough to be able to run the MG Louvain test. * Updated call_louvain() to use the new graph_t types. Still WIP, needs louvain updates to compile. * WIP: updates for incorporating new 2D shuffle data, still does not pass test. * Adding updates from iroy30 for calling shuffle from louvain.py * Updated to extract and pass the partition_t info and call the graph_t ctor. Now having a problem finding the right subcommunicator. * Updates to set up subcomms - having a problem with something needed by subcomms not being initialized: "address not mapped to object at address (nil)" * Added p2p flag to comms initialize() to enable initialization of UCX endpoints needed for MG test. * some proposed cleanup * safety commit: committing with debug prints to allow other team members to debug in parallel. * new technique for factory * safety commit: more updates to address problems instantiating graph_t (using num edges for partition instead of global for edgelist) and for debugging (print statments). * Changing how row and col rank are obtained, added debug prints for edge lists info * Fixes to partition_t get_matrix_partition_major/minor methods based on feedback. * Update shuffle.py * Integrating changes from iroy30 to produce "option 1" shuffle output by default, with an option to enable "option 2", temporarily enabled graph expensive checks for debugging. * Addressed review feedback: made var names consistent, fixed weights=None bug in cython code, added copyright to shuffle.py, changed how ranks are retrieved from the raft handle. * Removed debug prints. * Added PR 1163 to CHANGELOG.md * Removed extra newlines accidentally added to clean up diff in the PR, updated comment in cython code. * Added specific newlines back so file does not differ unnecessarily. * Disabled graph_t expensive check that was left enabled for debugging. * Added code path in call_louvain to support legacy graph types, to be removed when migration to graph_t types is complete. * Updates based on feedback from PR 1163: code cleanup/removed unused union members, consolidated legacy enum types, updated comments, initial support added for 64-bit vertex types (untested) * plumbed bool set based on running renumbering to set sorted_by_degree flag in graph container. * Added PR 1178 to CHANGELOG.md, C++ style fixes. * Addressed PR review feedback: added support for proper edge_t in cython wrapper and removed unnecessary vertex_t/edge_t int64,int32 combinations. Co-authored-by: Rick Ratzel Co-authored-by: Chuck Hastings Co-authored-by: Iroy30 <41401566+Iroy30@users.noreply.github.com> --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 2 +- cpp/include/utilities/cython.hpp | 97 ++-- cpp/src/community/louvain.cu | 24 + cpp/src/experimental/graph.cu | 2 +- cpp/src/experimental/graph_view.cu | 8 + cpp/src/utilities/cython.cpp | 347 ------------ cpp/src/utilities/cython.cu | 495 ++++++++++++++++++ python/cugraph/community/louvain_wrapper.pyx | 25 +- python/cugraph/dask/community/louvain.py | 5 + .../dask/community/louvain_wrapper.pyx | 51 +- python/cugraph/structure/graph_primtypes.pxd | 16 +- 12 files changed, 644 insertions(+), 429 deletions(-) delete mode 100644 cpp/src/utilities/cython.cpp create mode 100644 cpp/src/utilities/cython.cu diff --git a/CHANGELOG.md b/CHANGELOG.md index e464aa5800c..60a27752d7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ - PR #1157 Louvain API update to use graph_container_t - PR #1151 MNMG extension for pattern accelerator based PageRank, Katz Centrality, BFS, and SSSP implementations (C++ part) - PR #1163 Integrated 2D shuffling and Louvain updates +- PR #1178 Refactored cython graph factory code to scale to additional data types ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 40ab12ade94..7cfd24fa9ef 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -277,7 +277,7 @@ add_library(cugraph SHARED src/db/db_parser_integration_test.cu src/db/db_operators.cu src/utilities/spmv_1D.cu - src/utilities/cython.cpp + src/utilities/cython.cu src/structure/graph.cu src/link_analysis/pagerank.cu src/link_analysis/pagerank_1D.cu diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index eb2852fecdf..cf7428177d6 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -22,65 +22,55 @@ namespace cugraph { namespace cython { -enum class numberTypeEnum : int { intType, floatType, doubleType }; +enum class numberTypeEnum : int { int32Type, int64Type, floatType, doubleType }; -// FIXME: The GraphC??View* types will not be used in the near future. Those are -// left in place as cython wrappers transition from the GraphC* classes to -// graph_* classes. Remove GraphC* classes once the transition is complete. enum class graphTypeEnum : int { + // represents unintiialized or NULL ptr null, + // represents some legacy Cxx type. This and other LegacyCxx values are not + // used for the unique_ptr in a graph_container_t, but instead for when this + // enum is used for determining high-level code paths to take to prevent + // needing to expose each legacy enum value to cython. + LegacyCSR, + LegacyCSC, + LegacyCOO, + // represents that a GraphCxxView* unique_ptr type is present in a + // graph_container_t. GraphCSRViewFloat, GraphCSRViewDouble, GraphCSCViewFloat, GraphCSCViewDouble, GraphCOOViewFloat, GraphCOOViewDouble, - graph_t_float, - graph_t_double, - graph_t_float_mg, - graph_t_double_mg, - graph_t_float_transposed, - graph_t_double_transposed, - graph_t_float_mg_transposed, - graph_t_double_mg_transposed + // represents values present in the graph_container_t to construct a graph_t, + // but unlike legacy classes does not mean a graph_t unique_ptr is present in + // the container. + graph_t, }; -// Enum for the high-level type of GraphC??View* class to instantiate. -enum class legacyGraphTypeEnum : int { CSR, CSC, COO }; - // "container" for a graph type instance which insulates the owner from the // specifics of the actual graph type. This is intended to be used in Cython // code that only needs to pass a graph object to another wrapped C++ API. This // greatly simplifies the Cython code since the Cython definition only needs to // define the container and not the various individual graph types in Cython. struct graph_container_t { - // FIXME: use std::variant (or a better alternative, ie. type erasure?) instead - // of a union if possible + // FIXME: This union is in place only to support legacy calls, remove when + // migration to graph_t types is complete, or when legacy graph objects are + // constructed in the call_< wrappers instead of the + // populate_graph_container_legacy() function. union graphPtrUnion { ~graphPtrUnion() {} void* null; - std::unique_ptr> GraphCSRViewFloatPtr; - std::unique_ptr> GraphCSRViewDoublePtr; - std::unique_ptr> GraphCSCViewFloatPtr; - std::unique_ptr> GraphCSCViewDoublePtr; - std::unique_ptr> GraphCOOViewFloatPtr; - std::unique_ptr> GraphCOOViewDoublePtr; - std::unique_ptr> graph_t_float_ptr; - std::unique_ptr> graph_t_double_ptr; - std::unique_ptr> graph_t_float_mg_ptr; - std::unique_ptr> graph_t_double_mg_ptr; - std::unique_ptr> - graph_t_float_transposed_ptr; - std::unique_ptr> - graph_t_double_transposed_ptr; - std::unique_ptr> - graph_t_float_mg_transposed_ptr; - std::unique_ptr> - graph_t_double_mg_transposed_ptr; + std::unique_ptr> GraphCSRViewFloatPtr; + std::unique_ptr> GraphCSRViewDoublePtr; + std::unique_ptr> GraphCSCViewFloatPtr; + std::unique_ptr> GraphCSCViewDoublePtr; + std::unique_ptr> GraphCOOViewFloatPtr; + std::unique_ptr> GraphCOOViewDoublePtr; }; - graph_container_t() : graph_ptr_union{nullptr}, graph_ptr_type{graphTypeEnum::null} {} + graph_container_t() : graph_ptr_union{nullptr}, graph_type{graphTypeEnum::null} {} ~graph_container_t() {} // The expected usage of a graph_container_t is for it to be created as part @@ -93,7 +83,30 @@ struct graph_container_t { graph_container_t& operator=(const graph_container_t&) = delete; graphPtrUnion graph_ptr_union; - graphTypeEnum graph_ptr_type; + graphTypeEnum graph_type; + + // primitive data used for constructing graph_t instances. + void* src_vertices; + void* dst_vertices; + void* weights; + void* vertex_partition_offsets; + + size_t num_partition_edges; + size_t num_global_vertices; + size_t num_global_edges; + numberTypeEnum vertexType; + numberTypeEnum edgeType; + numberTypeEnum weightType; + bool transposed; + bool is_multi_gpu; + bool sorted_by_degree; + bool do_expensive_check; + bool hypergraph_partitioned; + int row_comm_size; + int col_comm_size; + int row_comm_rank; + int col_comm_rank; + experimental::graph_properties_t graph_props; }; // FIXME: finish description for vertex_partition_offsets @@ -107,7 +120,7 @@ struct graph_container_t { // container (ie. a container that has not been previously populated by // populate_graph_container()) // -// legacyGraphTypeEnum legacyType +// graphTypeEnum legacyType // Specifies the type of graph when instantiating a legacy graph type // (GraphCSRViewFloat, etc.). // NOTE: this parameter will be removed when the transition to exclusinve use @@ -144,8 +157,6 @@ struct graph_container_t { // bool multi_gpu // true if the resulting graph object is to be used for a multi-gpu // application -// -// FIXME: Should local_* values be void* as well? void populate_graph_container(graph_container_t& graph_container, raft::handle_t& handle, void* src_vertices, @@ -155,17 +166,19 @@ void populate_graph_container(graph_container_t& graph_container, numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, - int num_partition_edges, + size_t num_partition_edges, size_t num_global_vertices, size_t num_global_edges, size_t row_comm_size, // pcols size_t col_comm_size, // prows + bool sorted_by_degree, bool transposed, bool multi_gpu); // FIXME: comment this function +// FIXME: Should local_* values be void* as well? void populate_graph_container_legacy(graph_container_t& graph_container, - legacyGraphTypeEnum legacyType, + graphTypeEnum legacyType, raft::handle_t const& handle, void* offsets, void* indices, diff --git a/cpp/src/community/louvain.cu b/cpp/src/community/louvain.cu index 094d950010c..559bb70d098 100644 --- a/cpp/src/community/louvain.cu +++ b/cpp/src/community/louvain.cu @@ -97,6 +97,18 @@ template std::pair louvain( int32_t *, size_t, double); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int64_t *, + size_t, + float); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int64_t *, + size_t, + double); template std::pair louvain( raft::handle_t const &, experimental::graph_view_t const &, @@ -135,6 +147,18 @@ template std::pair louvain( int32_t *, size_t, double); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int64_t *, + size_t, + float); +template std::pair louvain( + raft::handle_t const &, + experimental::graph_view_t const &, + int64_t *, + size_t, + double); template std::pair louvain( raft::handle_t const &, experimental::graph_view_t const &, diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 02f02ac6792..0294716089c 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -522,7 +522,7 @@ template class graph_t; template class graph_t; template class graph_t; template class graph_t; - +// template class graph_t; template class graph_t; template class graph_t; diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 5038f521a69..999c91df427 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -295,6 +295,10 @@ template class graph_view_t; template class graph_view_t; template class graph_view_t; template class graph_view_t; +template class graph_view_t; +template class graph_view_t; +template class graph_view_t; +template class graph_view_t; template class graph_view_t; template class graph_view_t; @@ -308,6 +312,10 @@ template class graph_view_t; template class graph_view_t; template class graph_view_t; template class graph_view_t; +template class graph_view_t; +template class graph_view_t; +template class graph_view_t; +template class graph_view_t; } // namespace experimental } // namespace cugraph diff --git a/cpp/src/utilities/cython.cpp b/cpp/src/utilities/cython.cpp deleted file mode 100644 index 82191307cd3..00000000000 --- a/cpp/src/utilities/cython.cpp +++ /dev/null @@ -1,347 +0,0 @@ -/* - * Copyright (c) 2020, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include -#include -#include -#include -#include -#include -#include - -namespace cugraph { -namespace cython { - -// Populates a graph_container_t with a pointer to a new graph object and sets -// the meta-data accordingly. The graph container owns the pointer and it is -// assumed it will delete it on destruction. -void populate_graph_container(graph_container_t& graph_container, - raft::handle_t& handle, - void* src_vertices, - void* dst_vertices, - void* weights, - void* vertex_partition_offsets, - numberTypeEnum vertexType, - numberTypeEnum edgeType, - numberTypeEnum weightType, - int num_partition_edges, - size_t num_global_vertices, - size_t num_global_edges, - size_t row_comm_size, // pcols - size_t col_comm_size, // prows - bool transposed, - bool multi_gpu) -{ - CUGRAPH_EXPECTS(graph_container.graph_ptr_type == graphTypeEnum::null, - "populate_graph_container() can only be called on an empty container."); - - bool do_expensive_check{false}; - bool hypergraph_partitioned{false}; - - // FIXME: Consider setting up the subcomms right after initializing comms, no - // need to delay to this point. - // Setup the subcommunicators needed for this partition on the handle. - partition_2d::subcomm_factory_t subcomm_factory(handle, - row_comm_size); - - // FIXME: once the subcomms are set up earlier (outside this function), remove - // the row/col_comm_size params and retrieve them from the handle (commented - // out lines below) - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - // auto const row_comm_size = row_comm.get_size(); // pcols - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - // auto const col_comm_size = col_comm.get_size(); // prows - - // Copy the contents of the vertex_partition_offsets (host array) to a vector - // as needed by the partition_t ctor. - int* vertex_partition_offsets_array = reinterpret_cast(vertex_partition_offsets); - - // FIXME: this needs to be vertex_t, not int? - std::vector vertex_partition_offsets_vect( - vertex_partition_offsets_array, - vertex_partition_offsets_array + (col_comm_size * row_comm_size) + 1); - - experimental::partition_t partition(vertex_partition_offsets_vect, - hypergraph_partitioned, - row_comm_size, - col_comm_size, - row_comm_rank, - col_comm_rank); - - experimental::graph_properties_t graph_props{.is_symmetric = false, .is_multigraph = false}; - - auto src_vertices_array = reinterpret_cast(src_vertices); - auto dst_vertices_array = reinterpret_cast(dst_vertices); - - if (multi_gpu) { - bool sorted_by_global_degree_within_vertex_partition{false}; - - if (weightType == numberTypeEnum::floatType) { - // vector of 1 representing the indivdual partition for this worker - std::vector> edge_lists; - edge_lists.push_back( - experimental::edgelist_t{src_vertices_array, - dst_vertices_array, - reinterpret_cast(weights), - num_partition_edges}); - auto g = new experimental::graph_t( - handle, - edge_lists, - partition, - num_global_vertices, - num_global_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); - - graph_container.graph_ptr_union.graph_t_float_mg_ptr = - std::unique_ptr>(g); - graph_container.graph_ptr_type = graphTypeEnum::graph_t_float_mg; - - } else { - std::vector> edge_lists; - edge_lists.push_back( - experimental::edgelist_t{src_vertices_array, - dst_vertices_array, - reinterpret_cast(weights), - num_partition_edges}); - auto g = new experimental::graph_t( - handle, - edge_lists, - partition, - num_global_vertices, - num_global_edges, - graph_props, - sorted_by_global_degree_within_vertex_partition, - do_expensive_check); - - graph_container.graph_ptr_union.graph_t_double_mg_ptr = - std::unique_ptr>(g); - graph_container.graph_ptr_type = graphTypeEnum::graph_t_double_mg; - } - - } else { - bool sorted_by_degree{false}; - - if (weightType == numberTypeEnum::floatType) { - experimental::edgelist_t edge_list{src_vertices_array, - dst_vertices_array, - reinterpret_cast(weights), - num_partition_edges}; - auto g = new experimental::graph_t( - handle, edge_list, num_global_vertices, graph_props, sorted_by_degree, do_expensive_check); - - graph_container.graph_ptr_union.graph_t_float_ptr = - std::unique_ptr>(g); - graph_container.graph_ptr_type = graphTypeEnum::graph_t_float; - - } else { - experimental::edgelist_t edge_list{src_vertices_array, - dst_vertices_array, - reinterpret_cast(weights), - num_partition_edges}; - auto g = new experimental::graph_t( - handle, edge_list, num_global_vertices, graph_props, sorted_by_degree, do_expensive_check); - - graph_container.graph_ptr_union.graph_t_double_ptr = - std::unique_ptr>(g); - graph_container.graph_ptr_type = graphTypeEnum::graph_t_double; - } - } -} - -void populate_graph_container_legacy(graph_container_t& graph_container, - legacyGraphTypeEnum legacyType, - raft::handle_t const& handle, - void* offsets, - void* indices, - void* weights, - numberTypeEnum offsetType, - numberTypeEnum indexType, - numberTypeEnum weightType, - size_t num_global_vertices, - size_t num_global_edges, - int* local_vertices, - int* local_edges, - int* local_offsets) -{ - CUGRAPH_EXPECTS(graph_container.graph_ptr_type == graphTypeEnum::null, - "populate_graph_container() can only be called on an empty container."); - - // FIXME: This is soon-to-be legacy code left in place until the new graph_t - // class is supported everywhere else. Remove everything down to the comment - // line after the return stmnt. - // Keep new code below return stmnt enabled to ensure it builds. - if (weightType == numberTypeEnum::floatType) { - switch (legacyType) { - case legacyGraphTypeEnum::CSR: { - graph_container.graph_ptr_union.GraphCSRViewFloatPtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); - graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewFloat; - (graph_container.graph_ptr_union.GraphCSRViewFloatPtr) - ->set_local_data(local_vertices, local_edges, local_offsets); - (graph_container.graph_ptr_union.GraphCSRViewFloatPtr) - ->set_handle(const_cast(&handle)); - } break; - case legacyGraphTypeEnum::CSC: { - graph_container.graph_ptr_union.GraphCSCViewFloatPtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); - graph_container.graph_ptr_type = graphTypeEnum::GraphCSCViewFloat; - (graph_container.graph_ptr_union.GraphCSCViewFloatPtr) - ->set_local_data(local_vertices, local_edges, local_offsets); - (graph_container.graph_ptr_union.GraphCSCViewFloatPtr) - ->set_handle(const_cast(&handle)); - } break; - case legacyGraphTypeEnum::COO: { - graph_container.graph_ptr_union.GraphCOOViewFloatPtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); - graph_container.graph_ptr_type = graphTypeEnum::GraphCOOViewFloat; - (graph_container.graph_ptr_union.GraphCOOViewFloatPtr) - ->set_local_data(local_vertices, local_edges, local_offsets); - (graph_container.graph_ptr_union.GraphCOOViewFloatPtr) - ->set_handle(const_cast(&handle)); - } break; - } - - } else { - switch (legacyType) { - case legacyGraphTypeEnum::CSR: { - graph_container.graph_ptr_union.GraphCSRViewDoublePtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); - graph_container.graph_ptr_type = graphTypeEnum::GraphCSRViewDouble; - (graph_container.graph_ptr_union.GraphCSRViewDoublePtr) - ->set_local_data(local_vertices, local_edges, local_offsets); - (graph_container.graph_ptr_union.GraphCSRViewDoublePtr) - ->set_handle(const_cast(&handle)); - } break; - case legacyGraphTypeEnum::CSC: { - graph_container.graph_ptr_union.GraphCSCViewDoublePtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); - graph_container.graph_ptr_type = graphTypeEnum::GraphCSCViewDouble; - (graph_container.graph_ptr_union.GraphCSCViewDoublePtr) - ->set_local_data(local_vertices, local_edges, local_offsets); - (graph_container.graph_ptr_union.GraphCSCViewDoublePtr) - ->set_handle(const_cast(&handle)); - } break; - case legacyGraphTypeEnum::COO: { - graph_container.graph_ptr_union.GraphCOOViewDoublePtr = - std::make_unique>(reinterpret_cast(offsets), - reinterpret_cast(indices), - reinterpret_cast(weights), - num_global_vertices, - num_global_edges); - graph_container.graph_ptr_type = graphTypeEnum::GraphCOOViewDouble; - (graph_container.graph_ptr_union.GraphCOOViewDoublePtr) - ->set_local_data(local_vertices, local_edges, local_offsets); - (graph_container.graph_ptr_union.GraphCOOViewDoublePtr) - ->set_handle(const_cast(&handle)); - } break; - } - } - return; -} - -//////////////////////////////////////////////////////////////////////////////// - -// Wrapper for calling Louvain using a graph container -template -std::pair call_louvain(raft::handle_t const& handle, - graph_container_t const& graph_container, - void* identifiers, - void* parts, - size_t max_level, - weight_t resolution) -{ - std::pair results; - - // FIXME: the only graph types currently in the container have ints for - // vertex_t and edge_t types. In the future, additional types for vertices and - // edges will be available, and when that happens, additional castings will be - // needed for the 'parts' arg in particular. For now, it is hardcoded to int. - if (graph_container.graph_ptr_type == graphTypeEnum::graph_t_float_mg) { - results = louvain(handle, - graph_container.graph_ptr_union.graph_t_float_mg_ptr->view(), - reinterpret_cast(parts), - max_level, - static_cast(resolution)); - - } else if (graph_container.graph_ptr_type == graphTypeEnum::graph_t_double_mg) { - results = louvain(handle, - graph_container.graph_ptr_union.graph_t_double_mg_ptr->view(), - reinterpret_cast(parts), - max_level, - static_cast(resolution)); - } else if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { - // if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewFloat) { - graph_container.graph_ptr_union.GraphCSCViewFloatPtr->get_vertex_identifiers( - static_cast(identifiers)); - results = louvain(handle, - *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), - reinterpret_cast(parts), - max_level, - static_cast(resolution)); - } else if (graph_container.graph_ptr_type == graphTypeEnum::GraphCSRViewDouble) { - graph_container.graph_ptr_union.GraphCSCViewDoublePtr->get_vertex_identifiers( - static_cast(identifiers)); - results = louvain(handle, - *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), - reinterpret_cast(parts), - max_level, - static_cast(resolution)); - } - - return results; -} - -// Explicit instantiations -template std::pair call_louvain(raft::handle_t const& handle, - graph_container_t const& graph_container, - void* identifiers, - void* parts, - size_t max_level, - float resolution); - -template std::pair call_louvain(raft::handle_t const& handle, - graph_container_t const& graph_container, - void* identifiers, - void* parts, - size_t max_level, - double resolution); - -} // namespace cython -} // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu new file mode 100644 index 00000000000..f10b11fe8a4 --- /dev/null +++ b/cpp/src/utilities/cython.cu @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace cugraph { +namespace cython { + +namespace detail { + +// FIXME: Add description of this function +template * = nullptr> +std::unique_ptr> +create_graph(raft::handle_t const& handle, graph_container_t const& graph_container) +{ + std::vector> edgelist( + {{reinterpret_cast(graph_container.src_vertices), + reinterpret_cast(graph_container.dst_vertices), + reinterpret_cast(graph_container.weights), + static_cast(graph_container.num_partition_edges)}}); + + std::vector partition_offsets_vector( + reinterpret_cast(graph_container.vertex_partition_offsets), + reinterpret_cast(graph_container.vertex_partition_offsets) + + (graph_container.row_comm_size * graph_container.col_comm_size) + 1); + + experimental::partition_t partition(partition_offsets_vector, + graph_container.hypergraph_partitioned, + graph_container.row_comm_size, + graph_container.col_comm_size, + graph_container.row_comm_rank, + graph_container.col_comm_rank); + + return std::make_unique>( + handle, + edgelist, + partition, + static_cast(graph_container.num_global_vertices), + static_cast(graph_container.num_global_edges), + graph_container.graph_props, + graph_container.sorted_by_degree, + graph_container.do_expensive_check); +} + +template * = nullptr> +std::unique_ptr> +create_graph(raft::handle_t const& handle, graph_container_t const& graph_container) +{ + experimental::edgelist_t edgelist{ + reinterpret_cast(graph_container.src_vertices), + reinterpret_cast(graph_container.dst_vertices), + reinterpret_cast(graph_container.weights), + static_cast(graph_container.num_partition_edges)}; + + return std::make_unique>( + handle, + edgelist, + static_cast(graph_container.num_global_vertices), + graph_container.graph_props, + graph_container.sorted_by_degree, + graph_container.do_expensive_check); +} + +} // namespace detail + +// Populates a graph_container_t with a pointer to a new graph object and sets +// the meta-data accordingly. The graph container owns the pointer and it is +// assumed it will delete it on destruction. +void populate_graph_container(graph_container_t& graph_container, + raft::handle_t& handle, + void* src_vertices, + void* dst_vertices, + void* weights, + void* vertex_partition_offsets, + numberTypeEnum vertexType, + numberTypeEnum edgeType, + numberTypeEnum weightType, + size_t num_partition_edges, + size_t num_global_vertices, + size_t num_global_edges, + size_t row_comm_size, // pcols + size_t col_comm_size, // prows + bool sorted_by_degree, + bool transposed, + bool multi_gpu) +{ + CUGRAPH_EXPECTS(graph_container.graph_type == graphTypeEnum::null, + "populate_graph_container() can only be called on an empty container."); + + bool do_expensive_check{false}; + bool hypergraph_partitioned{false}; + + // FIXME: Consider setting up the subcomms right after initializing comms, no + // need to delay to this point. + // Setup the subcommunicators needed for this partition on the handle. + partition_2d::subcomm_factory_t subcomm_factory(handle, + row_comm_size); + // FIXME: once the subcomms are set up earlier (outside this function), remove + // the row/col_comm_size params and retrieve them from the handle (commented + // out lines below) + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + // auto const row_comm_size = row_comm.get_size(); // pcols + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + // auto const col_comm_size = col_comm.get_size(); // prows + + graph_container.vertex_partition_offsets = vertex_partition_offsets; + graph_container.src_vertices = src_vertices; + graph_container.dst_vertices = dst_vertices; + graph_container.weights = weights; + graph_container.num_partition_edges = num_partition_edges; + graph_container.num_global_vertices = num_global_vertices; + graph_container.num_global_edges = num_global_edges; + graph_container.vertexType = vertexType; + graph_container.edgeType = edgeType; + graph_container.weightType = weightType; + graph_container.transposed = transposed; + graph_container.is_multi_gpu = multi_gpu; + graph_container.hypergraph_partitioned = hypergraph_partitioned; + graph_container.row_comm_size = row_comm_size; + graph_container.col_comm_size = col_comm_size; + graph_container.row_comm_rank = row_comm_rank; + graph_container.col_comm_rank = col_comm_rank; + graph_container.sorted_by_degree = sorted_by_degree; + graph_container.do_expensive_check = do_expensive_check; + + experimental::graph_properties_t graph_props{.is_symmetric = false, .is_multigraph = false}; + graph_container.graph_props = graph_props; + + graph_container.graph_type = graphTypeEnum::graph_t; +} + +void populate_graph_container_legacy(graph_container_t& graph_container, + graphTypeEnum legacyType, + raft::handle_t const& handle, + void* offsets, + void* indices, + void* weights, + numberTypeEnum offsetType, + numberTypeEnum indexType, + numberTypeEnum weightType, + size_t num_global_vertices, + size_t num_global_edges, + int* local_vertices, + int* local_edges, + int* local_offsets) +{ + CUGRAPH_EXPECTS(graph_container.graph_type == graphTypeEnum::null, + "populate_graph_container() can only be called on an empty container."); + + // FIXME: This is soon-to-be legacy code left in place until the new graph_t + // class is supported everywhere else. Remove everything down to the comment + // line after the return stmnt. + // Keep new code below return stmnt enabled to ensure it builds. + if (weightType == numberTypeEnum::floatType) { + switch (legacyType) { + case graphTypeEnum::LegacyCSR: { + graph_container.graph_ptr_union.GraphCSRViewFloatPtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); + graph_container.graph_type = graphTypeEnum::GraphCSRViewFloat; + (graph_container.graph_ptr_union.GraphCSRViewFloatPtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCSRViewFloatPtr) + ->set_handle(const_cast(&handle)); + } break; + case graphTypeEnum::LegacyCSC: { + graph_container.graph_ptr_union.GraphCSCViewFloatPtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); + graph_container.graph_type = graphTypeEnum::GraphCSCViewFloat; + (graph_container.graph_ptr_union.GraphCSCViewFloatPtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCSCViewFloatPtr) + ->set_handle(const_cast(&handle)); + } break; + case graphTypeEnum::LegacyCOO: { + graph_container.graph_ptr_union.GraphCOOViewFloatPtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); + graph_container.graph_type = graphTypeEnum::GraphCOOViewFloat; + (graph_container.graph_ptr_union.GraphCOOViewFloatPtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCOOViewFloatPtr) + ->set_handle(const_cast(&handle)); + } break; + default: CUGRAPH_FAIL("unsupported graphTypeEnum value"); break; + } + + } else { + switch (legacyType) { + case graphTypeEnum::LegacyCSR: { + graph_container.graph_ptr_union.GraphCSRViewDoublePtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); + graph_container.graph_type = graphTypeEnum::GraphCSRViewDouble; + (graph_container.graph_ptr_union.GraphCSRViewDoublePtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCSRViewDoublePtr) + ->set_handle(const_cast(&handle)); + } break; + case graphTypeEnum::LegacyCSC: { + graph_container.graph_ptr_union.GraphCSCViewDoublePtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); + graph_container.graph_type = graphTypeEnum::GraphCSCViewDouble; + (graph_container.graph_ptr_union.GraphCSCViewDoublePtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCSCViewDoublePtr) + ->set_handle(const_cast(&handle)); + } break; + case graphTypeEnum::LegacyCOO: { + graph_container.graph_ptr_union.GraphCOOViewDoublePtr = + std::make_unique>(reinterpret_cast(offsets), + reinterpret_cast(indices), + reinterpret_cast(weights), + num_global_vertices, + num_global_edges); + graph_container.graph_type = graphTypeEnum::GraphCOOViewDouble; + (graph_container.graph_ptr_union.GraphCOOViewDoublePtr) + ->set_local_data(local_vertices, local_edges, local_offsets); + (graph_container.graph_ptr_union.GraphCOOViewDoublePtr) + ->set_handle(const_cast(&handle)); + } break; + default: CUGRAPH_FAIL("unsupported graphTypeEnum value"); break; + } + } + return; +} + +//////////////////////////////////////////////////////////////////////////////// + +namespace detail { +template +std::pair call_louvain(raft::handle_t const& handle, + graph_view_t const& graph_view, + void* identifiers, + void* parts, + size_t max_level, + weight_t resolution) +{ + thrust::copy( // rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::device, + thrust::make_counting_iterator(graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(graph_view.get_local_vertex_last()), + reinterpret_cast(identifiers)); + + return louvain(handle, + graph_view, + reinterpret_cast(parts), + max_level, + static_cast(resolution)); +} + +} // namespace detail + +namespace detail { + +// Final, fully-templatized call. +template +return_t call_function(raft::handle_t const& handle, + graph_container_t const& graph_container, + function_t function) +{ + auto graph = + create_graph(handle, graph_container); + + return function(handle, graph->view()); +} + +// Makes another call based on vertex_t and edge_t +template +return_t call_function(raft::handle_t const& handle, + graph_container_t const& graph_container, + function_t function) +{ + // Since only vertex/edge types (int32,int32), (int32,int64), and + // (int64,int64) are being supported, explicitely check for those types and + // ensure (int64,int32) is rejected as unsupported. + if ((graph_container.vertexType == numberTypeEnum::int32Type) && + (graph_container.edgeType == numberTypeEnum::int32Type)) { + return call_function(handle, graph_container, function); + } else if ((graph_container.vertexType == numberTypeEnum::int32Type) && + (graph_container.edgeType == numberTypeEnum::int64Type)) { + return call_function(handle, graph_container, function); + } else if ((graph_container.vertexType == numberTypeEnum::int64Type) && + (graph_container.edgeType == numberTypeEnum::int64Type)) { + return call_function(handle, graph_container, function); + } else { + CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + } +} + +// Makes another call based on weight_t +template +return_t call_function(raft::handle_t const& handle, + graph_container_t const& graph_container, + function_t function) +{ + if (graph_container.weightType == numberTypeEnum::floatType) { + return call_function( + handle, graph_container, function); + } else if (graph_container.weightType == numberTypeEnum::doubleType) { + return call_function( + handle, graph_container, function); + } else { + CUGRAPH_FAIL("weightType unsupported"); + } +} + +// Makes another call based on multi_gpu +template +return_t call_function(raft::handle_t const& handle, + graph_container_t const& graph_container, + function_t function) +{ + if (graph_container.is_multi_gpu) { + return call_function(handle, graph_container, function); + } else { + return call_function( + handle, graph_container, function); + } +} + +// Initial call_function() call starts here. +// This makes another call based on transposed +template +return_t call_function(raft::handle_t const& handle, + graph_container_t const& graph_container, + function_t function) +{ + if (graph_container.transposed) { + return call_function(handle, graph_container, function); + } else { + return call_function(handle, graph_container, function); + } +} + +template +class louvain_functor { + public: + louvain_functor(void* identifiers, void* parts, size_t max_level, weight_t resolution) + : identifiers_(identifiers), parts_(parts), max_level_(max_level), resolution_(resolution) + { + } + + template + std::pair operator()(raft::handle_t const& handle, + graph_view_t const& graph_view) + { + return cugraph::louvain(handle, + graph_view, + reinterpret_cast(parts_), + max_level_, + resolution_); + } + + private: + void* identifiers_; // FIXME: this will be used in a future PR + void* parts_; + size_t max_level_; + weight_t resolution_; +}; + +} // namespace detail + +// Wrapper for calling Louvain using a graph container +template +std::pair call_louvain(raft::handle_t const& handle, + graph_container_t const& graph_container, + void* identifiers, + void* parts, + size_t max_level, + weight_t resolution) +{ + // LEGACY PATH - remove when migration to graph_t types complete + if (graph_container.graph_type == graphTypeEnum::GraphCSRViewFloat) { + graph_container.graph_ptr_union.GraphCSRViewFloatPtr->get_vertex_identifiers( + static_cast(identifiers)); + return louvain(handle, + *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), + reinterpret_cast(parts), + max_level, + static_cast(resolution)); + } else if (graph_container.graph_type == graphTypeEnum::GraphCSRViewDouble) { + graph_container.graph_ptr_union.GraphCSRViewDoublePtr->get_vertex_identifiers( + static_cast(identifiers)); + return louvain(handle, + *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), + reinterpret_cast(parts), + max_level, + static_cast(resolution)); + } + + // NON-LEGACY PATH + detail::louvain_functor functor{identifiers, parts, max_level, resolution}; + + return detail::call_function>( + handle, graph_container, functor); +} + +// Explicit instantiations +template std::pair call_louvain(raft::handle_t const& handle, + graph_container_t const& graph_container, + void* identifiers, + void* parts, + size_t max_level, + float resolution); + +template std::pair call_louvain(raft::handle_t const& handle, + graph_container_t const& graph_container, + void* identifiers, + void* parts, + size_t max_level, + double resolution); + +} // namespace cython +} // namespace cugraph diff --git a/python/cugraph/community/louvain_wrapper.pyx b/python/cugraph/community/louvain_wrapper.pyx index 124fd9365dc..6b218a0b962 100644 --- a/python/cugraph/community/louvain_wrapper.pyx +++ b/python/cugraph/community/louvain_wrapper.pyx @@ -26,6 +26,13 @@ import rmm import numpy as np +# FIXME: move this to a more reusable location +numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + def louvain(input_graph, max_level, resolution): """ Call louvain @@ -50,6 +57,8 @@ def louvain(input_graph, max_level, resolution): else: weights = cudf.Series(np.full(num_edges, 1.0, dtype=np.float32)) + weight_t = weights.dtype + # Create the output dataframe df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) @@ -64,32 +73,26 @@ def louvain(input_graph, max_level, resolution): cdef uintptr_t c_local_edges = NULL; cdef uintptr_t c_local_offsets = NULL; - cdef float final_modularity_float = 1.0 cdef double final_modularity_double = 1.0 cdef int num_level = 0 - # FIXME: Offsets and indices are currently hardcoded to int, but this may - # not be acceptable in the future. - weightTypeMap = {np.dtype("float32") : numberTypeEnum.floatType, - np.dtype("double") : numberTypeEnum.doubleType} - cdef graph_container_t graph_container # FIXME: The excessive casting for the enum arg is needed to make cython # understand how to pass the enum value (this is the same pattern # used by cudf). This will not be needed with Cython 3.0 populate_graph_container_legacy(graph_container, - ((legacyGraphTypeEnum.CSR)), + ((graphTypeEnum.LegacyCSR)), handle_[0], c_offsets, c_indices, c_weights, - ((numberTypeEnum.intType)), - ((numberTypeEnum.intType)), - ((weightTypeMap[weights.dtype])), + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), num_verts, num_edges, c_local_verts, c_local_edges, c_local_offsets) - if weights.dtype == np.float32: + if weight_t == np.float32: num_level, final_modularity_float = c_louvain.call_louvain[float](handle_[0], graph_container, c_identifier, c_partition, diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 3b4132e821b..06f3b47b3b4 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -26,6 +26,7 @@ def call_louvain(sID, partition_row_size, partition_col_size, vertex_partition_offsets, + sorted_by_degree, max_level, resolution): @@ -40,6 +41,7 @@ def call_louvain(sID, vertex_partition_offsets, wid, handle, + sorted_by_degree, max_level, resolution) @@ -76,7 +78,9 @@ def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): # raise Exception("input graph must be undirected") client = default_client() + # Calling renumbering results in data that is sorted by degree input_graph.compute_renumber_edge_list(transposed=False) + sorted_by_degree = True (ddf, num_verts, partition_row_size, @@ -95,6 +99,7 @@ def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): partition_row_size, partition_col_size, vertex_partition_offsets, + sorted_by_degree, max_iter, resolution, workers=[wf[0]])) diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 86dc3bed524..3d72a7c3bd6 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -25,6 +25,13 @@ import cudf import numpy as np +# FIXME: move this to a more reusable location +numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + def louvain(input_df, num_global_verts, num_global_edges, @@ -33,6 +40,7 @@ def louvain(input_df, vertex_partition_offsets, rank, handle, + sorted_by_degree, max_level, resolution): """ @@ -51,31 +59,29 @@ def louvain(input_df, src = input_df['src'] dst = input_df['dst'] + num_partition_edges = len(src) + if "value" in input_df.columns: weights = input_df['value'] else: - weights = None + weights = cudf.Series(np.full(num_partition_edges, 1.0, dtype=np.float32)) - # FIXME: needs to be edge_t type not int - cdef int num_partition_edges = len(src) + vertex_t = src.dtype + if num_global_edges > (2**31 - 1): + edge_t = np.dtype("int64") + else: + edge_t = np.dtype("int32") + weight_t = weights.dtype # COO cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] - cdef uintptr_t c_edge_weights = NULL - if weights is not None: - c_edge_weights = weights.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = weights.__cuda_array_interface__['data'][0] # data is on device, move to host (.values_host) since graph_t in # graph_container needs a host array cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets.values_host.__array_interface__['data'][0] - # FIXME: Offsets and indices are currently hardcoded to int, but this may - # not be acceptable in the future. - weightTypeMap = {np.dtype("float32") : numberTypeEnum.floatType, - np.dtype("double") : numberTypeEnum.doubleType} - weightType = weightTypeMap[weights.dtype] if weights is not None else numberTypeEnum.floatType - cdef graph_container_t graph_container # FIXME: The excessive casting for the enum arg is needed to make cython @@ -85,30 +91,35 @@ def louvain(input_df, handle_[0], c_src_vertices, c_dst_vertices, c_edge_weights, c_vertex_partition_offsets, - ((numberTypeEnum.intType)), - ((numberTypeEnum.intType)), - ((weightType)), + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), num_partition_edges, num_global_verts, num_global_edges, partition_row_size, partition_col_size, + sorted_by_degree, False, True) # store_transposed, multi_gpu # Create the output dataframe df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_global_verts, dtype=np.int32)) - df['partition'] = cudf.Series(np.zeros(num_global_verts, dtype=np.int32)) + df['vertex'] = cudf.Series(np.zeros(num_global_verts, dtype=vertex_t)) + df['partition'] = cudf.Series(np.zeros(num_global_verts, dtype=vertex_t)) cdef uintptr_t c_identifiers = df['vertex'].__cuda_array_interface__['data'][0] cdef uintptr_t c_partition = df['partition'].__cuda_array_interface__['data'][0] - if weightType == numberTypeEnum.floatType: + if weight_t == np.float32: num_level, final_modularity_float = c_louvain.call_louvain[float]( - handle_[0], graph_container, c_identifiers, c_partition, max_level, resolution) + handle_[0], graph_container, + c_identifiers, c_partition, + max_level, resolution) final_modularity = final_modularity_float else: num_level, final_modularity_double = c_louvain.call_louvain[double]( - handle_[0], graph_container, c_identifiers, c_partition, max_level, resolution) + handle_[0], graph_container, + c_identifiers, c_partition, + max_level, resolution) final_modularity = final_modularity_double return df, final_modularity diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 42377506d26..2879436690f 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -196,7 +196,8 @@ cdef GraphViewType get_graph_view(input_graph, bool weightless=*, GraphViewType* cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": ctypedef enum numberTypeEnum: - intType "cugraph::cython::numberTypeEnum::intType" + int32Type "cugraph::cython::numberTypeEnum::int32Type" + int64Type "cugraph::cython::numberTypeEnum::int64Type" floatType "cugraph::cython::numberTypeEnum::floatType" doubleType "cugraph::cython::numberTypeEnum::doubleType" @@ -213,22 +214,23 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": numberTypeEnum vertexType, numberTypeEnum edgeType, numberTypeEnum weightType, - int num_partition_edges, + size_t num_partition_edges, size_t num_global_vertices, size_t num_global_edges, size_t row_comm_size, size_t col_comm_size, + bool sorted_by_degree, bool transposed, bool multi_gpu) except + - ctypedef enum legacyGraphTypeEnum: - CSR "cugraph::cython::legacyGraphTypeEnum::CSR" - CSC "cugraph::cython::legacyGraphTypeEnum::CSC" - COO "cugraph::cython::legacyGraphTypeEnum::COO" + ctypedef enum graphTypeEnum: + LegacyCSR "cugraph::cython::graphTypeEnum::LegacyCSR" + LegacyCSC "cugraph::cython::graphTypeEnum::LegacyCSC" + LegacyCOO "cugraph::cython::graphTypeEnum::LegacyCOO" cdef void populate_graph_container_legacy( graph_container_t &graph_container, - legacyGraphTypeEnum legacyType, + graphTypeEnum legacyType, const handle_t &handle, void *offsets, void *indices, From 6fd59f1845bbbb94b561b362965460555d501a72 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Mon, 5 Oct 2020 17:05:46 -0500 Subject: [PATCH 73/74] [REVEIW] BLD Installing raft headers under cugraph (#1186) * instlling raft headers under cugraph --- CHANGELOG.md | 2 +- cpp/CMakeLists.txt | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 60a27752d7b..c6d934d40ac 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -38,7 +38,7 @@ - PR #1166 Fix misspelling of function calls in asserts causing debug build to fail - PR #1180 BLD Adopt RAFT model for cuhornet dependency - PR #1181 Fix notebook error handling in CI - +- PR #1186 BLD Installing raft headers under cugraph # cuGraph 0.15.0 (26 Aug 2020) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 7cfd24fa9ef..c3606646860 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -417,6 +417,8 @@ install(TARGETS cugraph LIBRARY install(DIRECTORY include/ DESTINATION include/cugraph) +install(DIRECTORY ${RAFT_DIR}/cpp/include/raft/ + DESTINATION include/cugraph/raft) ################################################################################################### # - make documentation ---------------------------------------------------------------------------- # requires doxygen and graphviz to be installed From 9a9d2c77e4b48442c04f62ff370496e70b4d6c31 Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Mon, 5 Oct 2020 17:10:18 -0500 Subject: [PATCH 74/74] [REVEIW] BLD getting latest tags (#1184) * cuhornet tag * Update CHANGELOG.md --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c6d934d40ac..cd6d6690659 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -28,6 +28,7 @@ - PR #1169 Added RAPIDS cpp packages to cugraph dev env - PR #1165 updated remaining algorithms to be NetworkX compatible - PR #1176 Update ci/local/README.md +- PR #1184 BLD getting latest tags ## Bug Fixes - PR #1131 Show style checker errors with set +e diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c3606646860..df17d7c14dd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -185,7 +185,7 @@ set(CUHORNET_INCLUDE_DIR ${CUHORNET_DIR}/src/cuhornet CACHE STRING "Path to cuho ExternalProject_Add(cuhornet GIT_REPOSITORY https://github.com/rapidsai/cuhornet.git - GIT_TAG 7e8be7e439c2765384c40b004806aabae2d74666 + GIT_TAG 9cb8e8803852bd895a9c95c0fe778ad6eeefa7ad PREFIX ${CUHORNET_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND ""