From 277494aa598fe2b35ad04f5c7bba2163a2a65756 Mon Sep 17 00:00:00 2001 From: Raymond Douglass Date: Fri, 2 Oct 2020 10:41:58 -0400 Subject: [PATCH 01/16] DOC v0.17 Updates --- CHANGELOG.md | 8 ++++++++ conda/environments/cugraph_dev_cuda10.1.yml | 14 +++++++------- conda/environments/cugraph_dev_cuda10.2.yml | 14 +++++++------- conda/environments/cugraph_dev_cuda11.0.yml | 14 +++++++------- cpp/CMakeLists.txt | 2 +- docs/source/conf.py | 6 +++--- 6 files changed, 33 insertions(+), 25 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 61c532d8f79..5831e0d3842 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,11 @@ +# cuGraph 0.17.0 (Date TBD) + +## New Features + +## Improvements + +## Bug Fixes + # cuGraph 0.16.0 (Date TBD) ## New Features diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 05113f3d7ee..c6dafdf5aee 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -5,16 +5,16 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.16.* -- libcudf=0.16.* -- rmm=0.16.* -- librmm=0.16.* +- cudf=0.17.* +- libcudf=0.17.* +- rmm=0.17.* +- librmm=0.17.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.16* -- dask-cudf=0.16* +- dask-cuda=0.17* +- dask-cudf=0.17* - nccl>=2.5 -- ucx-py=0.16* +- ucx-py=0.17* - scipy - networkx - python-louvain diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 02537e4bf6c..e7a7b339a37 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -5,16 +5,16 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.16.* -- libcudf=0.16.* -- rmm=0.16.* -- librmm=0.16.* +- cudf=0.17.* +- libcudf=0.17.* +- rmm=0.17.* +- librmm=0.17.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.16* -- dask-cudf=0.16* +- dask-cuda=0.17* +- dask-cudf=0.17* - nccl>=2.5 -- ucx-py=0.16* +- ucx-py=0.17* - scipy - networkx - python-louvain diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index efd4b57dcc4..8691acff4ca 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -5,16 +5,16 @@ channels: - rapidsai-nightly - conda-forge dependencies: -- cudf=0.16.* -- libcudf=0.16.* -- rmm=0.16.* -- librmm=0.16.* +- cudf=0.17.* +- libcudf=0.17.* +- rmm=0.17.* +- librmm=0.17.* - dask>=2.12.0 - distributed>=2.12.0 -- dask-cuda=0.16* -- dask-cudf=0.16* +- dask-cuda=0.17* +- dask-cudf=0.17* - nccl>=2.5 -- ucx-py=0.16* +- ucx-py=0.17* - scipy - networkx - python-louvain diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 40ab12ade94..444dd07ed74 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -16,7 +16,7 @@ cmake_minimum_required(VERSION 3.12 FATAL_ERROR) -project(CUGRAPH VERSION 0.16.0 LANGUAGES C CXX CUDA) +project(CUGRAPH VERSION 0.17.0 LANGUAGES C CXX CUDA) ################################################################################################### # - build type ------------------------------------------------------------------------------------ diff --git a/docs/source/conf.py b/docs/source/conf.py index 0c8a0316278..fad2f3e21bb 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -65,9 +65,9 @@ # built documents. # # The short X.Y version. -version = '0.16' +version = '0.17' # The full version, including alpha/beta/rc tags. -release = '0.16.0' +release = '0.17.0' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -195,4 +195,4 @@ def setup(app): '.md': CommonMarkParser, } -source_suffix = ['.rst', '.md'] \ No newline at end of file +source_suffix = ['.rst', '.md'] From ae5d0f2a81fa1523155cae7cf9bc836d82da7716 Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Wed, 7 Oct 2020 09:34:21 -0400 Subject: [PATCH 02/16] [REVIEW] BUG fix benchmark notebooks for recent cudf changes (#1192) * fix notebooks for recent cudf changes * updated docs * changelog * fix notebooks for recent cudf changes * updated docs * changelog * reset * flake8 * fixed typo in function name * fixed typo * clean. this notebook can take 12+ hours to run. making data set small for nightly testing Co-authored-by: BradReesWork --- CHANGELOG.md | 1 + .../cugraph_benchmarks/bfs_benchmark.ipynb | 46 +--------- .../louvain_benchmark.ipynb | 42 +--------- .../pagerank_benchmark.ipynb | 40 +-------- notebooks/cugraph_benchmarks/release.ipynb | 25 +++--- python/cugraph/structure/graph.py | 83 +++++++++---------- 6 files changed, 59 insertions(+), 178 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd6d6690659..4ff3af3c2df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -40,6 +40,7 @@ - PR #1180 BLD Adopt RAFT model for cuhornet dependency - PR #1181 Fix notebook error handling in CI - PR #1186 BLD Installing raft headers under cugraph +- PR #1192 Fix benchmark notes and documentation issues in graph.py # cuGraph 0.15.0 (26 Aug 2020) diff --git a/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb b/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb index 1c1362d0498..58eb94bf0ee 100644 --- a/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/bfs_benchmark.ipynb @@ -62,34 +62,6 @@ "See the README file in this folder for a discription of how to get the data" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## If you have more than one GPU, set the GPU to use\n", - "This is not needed on a Single GPU system or if the default GPU is to be used" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Set the GPU to use\n", - "import os\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -144,22 +116,6 @@ "import numpy as np" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Print the name of the used GPU" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "cudf._cuda.gpu.deviceGetName(0)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -425,7 +381,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb b/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb index 7a234c9c159..a12b7c4bcc2 100644 --- a/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/louvain_benchmark.ipynb @@ -57,35 +57,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## If you have more than one GPU, set the GPU to use\n", - "This is not needed on a Single GPU system or if the default GPU is to be used" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# since this is a shared machine - let's pick a GPU that no one else is using\n", - "import os\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Now load the required libraries" + "## Load the required libraries" ] }, { @@ -149,16 +121,6 @@ "import numpy as np" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Print out GPU Name\n", - "cudf._cuda.gpu.deviceGetName(0)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -403,7 +365,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb b/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb index 52388fc1a14..c2933a10c7d 100644 --- a/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb +++ b/notebooks/cugraph_benchmarks/pagerank_benchmark.ipynb @@ -65,34 +65,6 @@ "See the README file in this folder for a discription of how to get the data" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## If you have more than one GPU, set the GPU to use\n", - "This is not needed on a Single GPU system or if the default GPU is to be used" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "!nvidia-smi" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# since this is a shared machine - let's pick a GPU that no one else is using\n", - "import os\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"]=\"0\"" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -147,16 +119,6 @@ "import numpy as np" ] }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Print out GPU Name\n", - "cudf._cuda.gpu.deviceGetName(0)" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -495,7 +457,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/notebooks/cugraph_benchmarks/release.ipynb b/notebooks/cugraph_benchmarks/release.ipynb index ff5ed5abf9f..d3110da3621 100644 --- a/notebooks/cugraph_benchmarks/release.ipynb +++ b/notebooks/cugraph_benchmarks/release.ipynb @@ -33,9 +33,9 @@ "Notebook Credits\n", "\n", " Original Authors: Bradley Rees\n", - " Last Edit: 08/17/2020\n", + " Last Edit: 10/06/2020\n", " \n", - "RAPIDS Versions: 0.15\n", + "RAPIDS Versions: 0.16\n", "\n", "Test Hardware\n", " GV100 32G, CUDA 10.2\n", @@ -124,12 +124,17 @@ "outputs": [], "source": [ "# Test File\n", + "# data = {\n", + "# 'preferentialAttachment' : './data/preferentialAttachment.mtx',\n", + "# 'dblp' : './data/dblp-2010.mtx',\n", + "# 'coPapersCiteseer' : './data/coPapersCiteseer.mtx',\n", + "# 'as-Skitter' : './data/as-Skitter.mtx'\n", + "#}\n", + "\n", + "# for quick testing\n", "data = {\n", - " 'preferentialAttachment' : './data/preferentialAttachment.mtx',\n", - " 'dblp' : './data/dblp-2010.mtx',\n", - " 'coPapersCiteseer' : './data/coPapersCiteseer.mtx',\n", - " 'as-Skitter' : './data/as-Skitter.mtx'\n", - "}" + " 'polbooks' : './data/polbooks.mtx', \n", + "}\n" ] }, { @@ -274,7 +279,7 @@ "\n", "def cu_pagerank(_df):\n", " t1 = time.time()\n", - " _G = create_cu_graph(_df)\n", + " _G = create_cu_digraph(_df)\n", " _ = cugraph.pagerank(_G)\n", " t2 = time.time() - t1\n", " return t2" @@ -302,7 +307,7 @@ "\n", "def cu_wcc(_df):\n", " t1 = time.time()\n", - " _G = create_cu_graph(_df) \n", + " _G = create_cu_digraph(_df) \n", " _ = cugraph.weakly_connected_components(_G)\n", " t2 = time.time() - t1\n", " return t2" @@ -588,7 +593,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.8" + "version": "3.8.5" } }, "nbformat": 4, diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index ce63eb52683..311ef7313ae 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -123,8 +123,10 @@ def __init__( self.renumbered = m_graph.renumbered self.renumber_map = m_graph.renumber_map else: - msg = "Graph can be initialized using MultiGraph\ - and DiGraph can be initialized using MultiDiGraph" + msg = ( + "Graph can be initialized using MultiGraph " + "and DiGraph can be initialized using MultiDiGraph" + ) raise Exception(msg) # self.number_of_vertices = None @@ -322,29 +324,19 @@ def from_cudf_edgelist( Parameters ---------- input_df : cudf.DataFrame or dask_cudf.DataFrame - This cudf.DataFrame wraps source, destination and weight - gdf_column of size E (E: number of edges) - The 'src' column contains the source index for each edge. - Source indices are in the range [0, V) (V: number of vertices). - The 'dst' column contains the destination index for each edge. - Destination indices are in the range [0, V) (V: number of - vertices). - If renumbering needs to be done, renumber - argument should be passed as True. - For weighted graphs, dataframe contains 'weight' column - containing the weight value for each edge. + A DataFrame that contains edge information If a dask_cudf.DataFrame is passed it will be reinterpreted as a cudf.DataFrame. For the distributed path please use from_dask_cudf_edgelist. - source : str - source argument is source column name - destination : str - destination argument is destination column name. - edge_attr : str - edge_attr argument is the weights column name. + source : str or array-like + source column name or array of column names + destination : str or array-like + destination column name or array of column names + edge_attr : str or None + the weights column name. Default is None renumber : bool - If source and destination indices are not in range 0 to V where V - is number of vertices, renumber argument should be True. + Indicate whether or not to renumber the source and destination + vertex IDs. Default is True. Examples -------- @@ -369,29 +361,31 @@ def from_cudf_edgelist( and set(d_col).issubset(set(input_df.columns)) ): raise Exception( - "source column names and/or destination column \ -names not found in input. Recheck the source and destination parameters" + "source column names and/or destination column " + "names not found in input. Recheck the source and " + "destination parameters" ) + # FIXME: update for smaller GPUs # Consolidation if isinstance(input_df, cudf.DataFrame): if len(input_df[source]) > 2147483100: raise Exception( - "cudf dataFrame edge list is too big \ - to fit in a single GPU" + "cudf dataFrame edge list is too big " + "to fit in a single GPU" ) elist = input_df elif isinstance(input_df, dask_cudf.DataFrame): if len(input_df[source]) > 2147483100: raise Exception( - "dask_cudf dataFrame edge list is too big \ - to fit in a single GPU" + "dask_cudf dataFrame edge list is too big " + "to fit in a single GPU" ) elist = input_df.compute().reset_index(drop=True) else: raise Exception( - "input should be a cudf.DataFrame or \ - a dask_cudf dataFrame" + "input should be a cudf.DataFrame or " + "a dask_cudf dataFrame" ) renumber_map = None @@ -462,12 +456,12 @@ def from_dask_cudf_edgelist( ---------- input_ddf : dask_cudf.DataFrame The edgelist as a dask_cudf.DataFrame - source : str - source argument is source column name + source : str or array-like + source column name or array of column names destination : str - destination argument is destination column name. + destination column name or array of column names edge_attr : str - edge_attr argument is the weights column name. + weights column name. renumber : bool If source and destination indices are not in range 0 to V where V is number of vertices, renumber argument should be True. @@ -490,8 +484,9 @@ def from_dask_cudf_edgelist( and set(d_col).issubset(set(input_ddf.columns)) ): raise Exception( - "source column names and/or destination column \ -names not found in input. Recheck the source and destination parameters" + "source column names and/or destination column " + "names not found in input. Recheck the source " + "and destination parameters" ) # # Keep all of the original parameters so we can lazily @@ -558,16 +553,16 @@ def view_edge_list(self): Returns ------- - edgelist_df : cudf.DataFrame + df : cudf.DataFrame This cudf.DataFrame wraps source, destination and weight - gdf_column of size E (E: number of edges) - The 'src' column contains the source index for each edge. - Source indices are in the range [0, V) (V: number of vertices). - The 'dst' column contains the destination index for each edge. - Destination indices are in the range [0, V) (V: number of - vertices). - For weighted graphs, dataframe contains 'weight' column - containing the weight value for each edge. + + df[src] : cudf.Series + contains the source index for each edge + df[dst] : cudf.Series + contains the destination index for each edge + df[weight] : cusd.Series + Column is only present for weighted Graph, + then containing the weight value for each edge """ if self.distributed: if self.edgelist is None: From b14c458ba401aeb8e8fc451f7479a6af7b679908 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Fri, 9 Oct 2020 14:21:01 -0500 Subject: [PATCH 03/16] [REVIEW] BUG Move subcomms init outside of individual algorithm functions (#1196) * WIP: initial commit, incomplete * Initial version that builds using a new cythin C++ util to initi subcomms. Still have an issue with error stating comms not initialized when trying to init subcomms. * WIP update to move subcomm init to user-facing comms init call. * move subcomm init to comms init * updates * Removing FIXMEs and old code now that subcomms init is moved to comms init. * Added PR 1196 to CHANGELOG.md, removed additional obsolete FIXME * Addressed FIXME by removing redundant prows and pcols args to populate_graph_container() call and using values obtained directly from handle instead. * C++ style check updates * Updated docs and comments based on review feedback. Minor consolidation of get_n_workers() call, but still need FIXME for that addressed. * flake8 updates Co-authored-by: Rick Ratzel Co-authored-by: Ishika Roy --- CHANGELOG.md | 3 +- cpp/include/partition_manager.hpp | 4 + cpp/include/utilities/cython.hpp | 15 +- cpp/src/utilities/cython.cu | 21 ++- python/cugraph/comms/comms.pxd | 25 ++++ python/cugraph/comms/comms.py | 137 +++++++++++++++--- python/cugraph/comms/comms_wrapper.pyx | 9 ++ python/cugraph/dask/community/louvain.py | 10 -- .../dask/community/louvain_wrapper.pyx | 3 - python/cugraph/structure/graph_primtypes.pxd | 2 - python/cugraph/structure/shuffle.py | 40 +---- 11 files changed, 184 insertions(+), 85 deletions(-) create mode 100644 python/cugraph/comms/comms.pxd create mode 100644 python/cugraph/comms/comms_wrapper.pyx diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ff3af3c2df..00b3fa41812 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,8 +39,9 @@ - PR #1166 Fix misspelling of function calls in asserts causing debug build to fail - PR #1180 BLD Adopt RAFT model for cuhornet dependency - PR #1181 Fix notebook error handling in CI -- PR #1186 BLD Installing raft headers under cugraph +- PR #1186 BLD Installing raft headers under cugraph - PR #1192 Fix benchmark notes and documentation issues in graph.py +- PR #1196 Move subcomms init outside of individual algorithm functions # cuGraph 0.15.0 (26 Aug 2020) diff --git a/cpp/include/partition_manager.hpp b/cpp/include/partition_manager.hpp index c15aa504084..431655e5642 100644 --- a/cpp/include/partition_manager.hpp +++ b/cpp/include/partition_manager.hpp @@ -54,6 +54,10 @@ struct key_naming_t { using pair_comms_t = std::pair, std::shared_ptr>; +// FIXME: This class is a misnomer since the python layer is currently +// responsible for creating and managing partitioning. Consider renaming it or +// refactoring it away. +// // class responsible for creating 2D partition sub-comms: // this is instantiated by each worker (processing element, PE) // for the row/column it belongs to; diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index cf7428177d6..8742f3bf956 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -169,8 +169,6 @@ void populate_graph_container(graph_container_t& graph_container, size_t num_partition_edges, size_t num_global_vertices, size_t num_global_edges, - size_t row_comm_size, // pcols - size_t col_comm_size, // prows bool sorted_by_degree, bool transposed, bool multi_gpu); @@ -201,5 +199,18 @@ std::pair call_louvain(raft::handle_t const& handle, size_t max_level, weight_t resolution); +// Helper for setting up subcommunicators, typically called as part of the +// user-initiated comms initialization in Python. +// +// raft::handle_t& handle +// Raft handle for which the new subcommunicators will be created. The +// subcommunicators will then be accessible from the handle passed to the +// parallel processes. +// +// size_t row_comm_size +// Number of items in a partition row (ie. pcols), needed for creating the +// appropriate number of subcommunicator instances. +void init_subcomms(raft::handle_t& handle, size_t row_comm_size); + } // namespace cython } // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index f10b11fe8a4..c4fc0f7285f 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -111,8 +111,6 @@ void populate_graph_container(graph_container_t& graph_container, size_t num_partition_edges, size_t num_global_vertices, size_t num_global_edges, - size_t row_comm_size, // pcols - size_t col_comm_size, // prows bool sorted_by_degree, bool transposed, bool multi_gpu) @@ -123,20 +121,12 @@ void populate_graph_container(graph_container_t& graph_container, bool do_expensive_check{false}; bool hypergraph_partitioned{false}; - // FIXME: Consider setting up the subcomms right after initializing comms, no - // need to delay to this point. - // Setup the subcommunicators needed for this partition on the handle. - partition_2d::subcomm_factory_t subcomm_factory(handle, - row_comm_size); - // FIXME: once the subcomms are set up earlier (outside this function), remove - // the row/col_comm_size params and retrieve them from the handle (commented - // out lines below) auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); - // auto const row_comm_size = row_comm.get_size(); // pcols + auto const row_comm_size = row_comm.get_size(); // pcols auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - // auto const col_comm_size = col_comm.get_size(); // prows + auto const col_comm_size = col_comm.get_size(); // prows graph_container.vertex_partition_offsets = vertex_partition_offsets; graph_container.src_vertices = src_vertices; @@ -491,5 +481,12 @@ template std::pair call_louvain(raft::handle_t const& handle, size_t max_level, double resolution); +// Helper for setting up subcommunicators +void init_subcomms(raft::handle_t& handle, size_t row_comm_size) +{ + partition_2d::subcomm_factory_t subcomm_factory(handle, + row_comm_size); +} + } // namespace cython } // namespace cugraph diff --git a/python/cugraph/comms/comms.pxd b/python/cugraph/comms/comms.pxd new file mode 100644 index 00000000000..44f7ee77562 --- /dev/null +++ b/python/cugraph/comms/comms.pxd @@ -0,0 +1,25 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# cython: profile=False +# distutils: language = c++ +# cython: embedsignature = True +# cython: language_level = 3 + +from cugraph.structure.graph_primtypes cimport handle_t + + +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef void init_subcomms(handle_t &handle, + size_t row_comm_size) diff --git a/python/cugraph/comms/comms.py b/python/cugraph/comms/comms.py index d8957cf0086..b5a283b5551 100644 --- a/python/cugraph/comms/comms.py +++ b/python/cugraph/comms/comms.py @@ -14,20 +14,70 @@ from cugraph.raft.dask.common.comms import Comms as raftComms from cugraph.raft.dask.common.comms import worker_state from cugraph.raft.common.handle import Handle +from cugraph.comms.comms_wrapper import init_subcomms as c_init_subcomms +from dask.distributed import default_client +from cugraph.dask.common import read_utils +import math __instance = None __default_handle = None +__subcomm = None -# Intialize Comms. If explicit Comms not provided as arg, -# default Comms are initialized as per client information. -def initialize(comms=None, p2p=False): +def __get_2D_div(ngpus): + pcols = int(math.sqrt(ngpus)) + while ngpus % pcols != 0: + pcols = pcols - 1 + return int(ngpus/pcols), pcols + + +def subcomm_init(prows, pcols, partition_type): + sID = get_session_id() + ngpus = get_n_workers() + if prows is None and pcols is None: + if partition_type == 1: + pcols, prows = __get_2D_div(ngpus) + else: + prows, pcols = __get_2D_div(ngpus) + else: + if prows is not None and pcols is not None: + if ngpus != prows*pcols: + raise Exception('prows*pcols should be equal to the\ + number of processes') + elif prows is not None: + if ngpus % prows != 0: + raise Exception('prows must be a factor of the number\ + of processes') + pcols = int(ngpus/prows) + elif pcols is not None: + if ngpus % pcols != 0: + raise Exception('pcols must be a factor of the number\ + of processes') + prows = int(ngpus/pcols) + + client = default_client() + client.run(_subcomm_init, sID, pcols) + global __subcomm + __subcomm = (prows, pcols, partition_type) + + +def _subcomm_init(sID, partition_row_size): + handle = get_handle(sID) + c_init_subcomms(handle, partition_row_size) + + +def initialize(comms=None, + p2p=False, + prows=None, + pcols=None, + partition_type=1): """ - Initialize a communicator for multi-node/multi-gpu communications. - It is expected to be called right after client initialization for running - multi-GPU algorithms. It wraps raft comms that manages underlying NCCL and - UCX comms handles across the workers of a Dask cluster. + Initialize a communicator for multi-node/multi-gpu communications. It is + expected to be called right after client initialization for running + multi-GPU algorithms (this wraps raft comms that manages underlying NCCL + and UCX comms handles across the workers of a Dask cluster). + It is recommended to also call `destroy()` when the comms are no longer needed so the underlying resources can be cleaned up. @@ -35,9 +85,25 @@ def initialize(comms=None, p2p=False): ---------- comms : raft Comms A pre-initialized raft communicator. If provided, this is used for mnmg - communications. + communications. If not provided, default comms are initialized as per + client information. p2p : bool - Initialize UCX endpoints + Initialize UCX endpoints if True. Default is False. + prows : int + Specifies the number of rows when performing a 2D partitioning of the + input graph. If specified, this must be a factor of the total number of + parallel processes. When specified with pcols, prows*pcols should be + equal to the total number of parallel processes. + pcols : int + Specifies the number of columns when performing a 2D partitioning of + the input graph. If specified, this must be a factor of the total + number of parallel processes. When specified with prows, prows*pcols + should be equal to the total number of parallel processes. + partition_type : int + Valid values are currently 1 or any int other than 1. A value of 1 (the + default) represents a partitioning resulting in prows*pcols + partitions. A non-1 value currently results in a partitioning of + p*pcols partitions, where p is the number of GPUs. """ global __instance @@ -45,16 +111,21 @@ def initialize(comms=None, p2p=False): global __default_handle __default_handle = None if comms is None: + # Initialize communicator __instance = raftComms(comms_p2p=p2p) __instance.init() + # Initialize subcommunicator + subcomm_init(prows, pcols, partition_type) else: __instance = comms else: raise Exception("Communicator is already initialized") -# Check is Comms was initialized. def is_initialized(): + """ + Returns True if comms was initialized, False otherwise. + """ global __instance if __instance is not None: return True @@ -62,27 +133,44 @@ def is_initialized(): return False -# Get raft Comms def get_comms(): + """ + Returns raft Comms instance + """ global __instance return __instance -# Get workers in the Comms def get_workers(): + """ + Returns the workers in the Comms instance, or None if Comms is not + initialized. + """ if is_initialized(): global __instance return __instance.worker_addresses -# Get sessionId for finding sessionstate of workers. def get_session_id(): + """ + Returns the sessionId for finding sessionstate of workers, or None if Comms + is not initialized. + """ if is_initialized(): global __instance return __instance.sessionId -# Destroy Comms +def get_2D_partition(): + """ + Returns a tuple representing the 2D partition information: (prows, pcols, + partition_type) + """ + global __subcomm + if __subcomm is not None: + return __subcomm + + def destroy(): """ Shuts down initialized comms and cleans up resources. @@ -93,9 +181,10 @@ def destroy(): __instance = None -# Default handle in case Comms is not initialized. -# This does not perform nccl initialization. def get_default_handle(): + """ + Returns the default handle. This does not perform nccl initialization. + """ global __default_handle if __default_handle is None: __default_handle = Handle() @@ -114,6 +203,16 @@ def get_worker_id(sID): return sessionstate['wid'] -def get_n_workers(sID): - sessionstate = worker_state(sID) - return sessionstate['nworkers'] +# FIXME: There are several similar instances of utility functions for getting +# the number of workers, including: +# * get_n_workers() (from cugraph.dask.common.read_utils) +# * len(get_visible_devices()) +# * len(numba.cuda.gpus) +# Consider consolidating these or emphasizing why different +# functions/techniques are needed. +def get_n_workers(sID=None): + if sID is None: + return read_utils.get_n_workers() + else: + sessionstate = worker_state(sID) + return sessionstate['nworkers'] diff --git a/python/cugraph/comms/comms_wrapper.pyx b/python/cugraph/comms/comms_wrapper.pyx new file mode 100644 index 00000000000..c1148b4c887 --- /dev/null +++ b/python/cugraph/comms/comms_wrapper.pyx @@ -0,0 +1,9 @@ + +from cugraph.structure.graph_primtypes cimport handle_t +from cugraph.comms.comms cimport init_subcomms as c_init_subcomms + + +def init_subcomms(handle, row_comm_size): + cdef size_t handle_size_t = handle.getHandle() + handle_ = handle_size_t + c_init_subcomms(handle_[0], row_comm_size) diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 06f3b47b3b4..186bd63ddc8 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -23,8 +23,6 @@ def call_louvain(sID, data, num_verts, num_edges, - partition_row_size, - partition_col_size, vertex_partition_offsets, sorted_by_degree, max_level, @@ -36,8 +34,6 @@ def call_louvain(sID, return c_mg_louvain.louvain(data[0], num_verts, num_edges, - partition_row_size, - partition_col_size, vertex_partition_offsets, wid, handle, @@ -67,10 +63,6 @@ def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): """ # FIXME: finish docstring: describe parameters, etc. - # FIXME: import here to prevent circular import: cugraph->louvain - # wrapper->cugraph/structure->cugraph/dask->dask/louvain->cugraph/structure - # from cugraph.structure.graph import Graph - # FIXME: dask methods to populate graphs from edgelists are only present on # DiGraph classes. Disable the Graph check for now and assume inputs are # symmetric DiGraphs. @@ -96,8 +88,6 @@ def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): wf[1], num_verts, num_edges, - partition_row_size, - partition_col_size, vertex_partition_offsets, sorted_by_degree, max_iter, diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 3d72a7c3bd6..59ec0f67733 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -35,8 +35,6 @@ numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, def louvain(input_df, num_global_verts, num_global_edges, - partition_row_size, - partition_col_size, vertex_partition_offsets, rank, handle, @@ -96,7 +94,6 @@ def louvain(input_df, ((numberTypeMap[weight_t])), num_partition_edges, num_global_verts, num_global_edges, - partition_row_size, partition_col_size, sorted_by_degree, False, True) # store_transposed, multi_gpu diff --git a/python/cugraph/structure/graph_primtypes.pxd b/python/cugraph/structure/graph_primtypes.pxd index 2879436690f..e46f4092dd4 100644 --- a/python/cugraph/structure/graph_primtypes.pxd +++ b/python/cugraph/structure/graph_primtypes.pxd @@ -217,8 +217,6 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": size_t num_partition_edges, size_t num_global_vertices, size_t num_global_edges, - size_t row_comm_size, - size_t col_comm_size, bool sorted_by_degree, bool transposed, bool multi_gpu) except + diff --git a/python/cugraph/structure/shuffle.py b/python/cugraph/structure/shuffle.py index ea3c28463d7..bbe55f4046b 100644 --- a/python/cugraph/structure/shuffle.py +++ b/python/cugraph/structure/shuffle.py @@ -11,22 +11,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import math from dask.dataframe.shuffle import rearrange_by_column import cudf - - -def get_n_workers(): - from dask.distributed import default_client - client = default_client() - return len(client.scheduler_info()['workers']) - - -def get_2D_div(ngpus): - pcols = int(math.sqrt(ngpus)) - while ngpus % pcols != 0: - pcols = pcols - 1 - return int(ngpus/pcols), pcols +import cugraph.comms.comms as Comms def _set_partitions_pre(df, vertex_row_partitions, vertex_col_partitions, @@ -47,7 +34,7 @@ def _set_partitions_pre(df, vertex_row_partitions, vertex_col_partitions, return partitions -def shuffle(dg, transposed=False, prows=None, pcols=None, partition_type=1): +def shuffle(dg, transposed=False): """ Shuffles the renumbered input distributed graph edgelist into ngpu partitions. The number of processes/gpus P = prows*pcols. The 2D @@ -57,27 +44,8 @@ def shuffle(dg, transposed=False, prows=None, pcols=None, partition_type=1): """ ddf = dg.edgelist.edgelist_df - ngpus = get_n_workers() - if prows is None and pcols is None: - if partition_type == 1: - pcols, prows = get_2D_div(ngpus) - else: - prows, pcols = get_2D_div(ngpus) - else: - if prows is not None and pcols is not None: - if ngpus != prows*pcols: - raise Exception('prows*pcols should be equal to the\ - number of processes') - elif prows is not None: - if ngpus % prows != 0: - raise Exception('prows must be a factor of the number\ - of processes') - pcols = int(ngpus/prows) - elif pcols is not None: - if ngpus % pcols != 0: - raise Exception('pcols must be a factor of the number\ - of processes') - prows = int(ngpus/pcols) + ngpus = Comms.get_n_workers() + prows, pcols, partition_type = Comms.get_2D_partition() renumber_vertex_count = dg.renumber_map.implementation.\ ddf.map_partitions(len).compute() From 5ff3f19b0a09830f6ee4c29d64171916bf26cc6f Mon Sep 17 00:00:00 2001 From: Alex Fender Date: Fri, 9 Oct 2020 17:57:42 -0500 Subject: [PATCH 04/16] [REVIEW] BUG segfault in python test suite (#1199) * Update build.sh * Update CHANGELOG.md * Update meta.yaml * Update cugraph_dev_cuda10.1.yml * Update cugraph_dev_cuda10.2.yml * Update cugraph_dev_cuda11.0.yml * Update meta.yaml --- CHANGELOG.md | 2 ++ ci/gpu/build.sh | 2 ++ conda/environments/cugraph_dev_cuda10.1.yml | 2 +- conda/environments/cugraph_dev_cuda10.2.yml | 2 +- conda/environments/cugraph_dev_cuda11.0.yml | 2 +- conda/recipes/cugraph/meta.yaml | 2 +- conda/recipes/libcugraph/meta.yaml | 4 ++-- 7 files changed, 10 insertions(+), 6 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 00b3fa41812..8aebfb8b286 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -39,6 +39,8 @@ - PR #1166 Fix misspelling of function calls in asserts causing debug build to fail - PR #1180 BLD Adopt RAFT model for cuhornet dependency - PR #1181 Fix notebook error handling in CI +- PR #1199 BUG segfault in python test suite +- PR #1186 BLD Installing raft headers under cugraph - PR #1186 BLD Installing raft headers under cugraph - PR #1192 Fix benchmark notes and documentation issues in graph.py - PR #1196 Move subcomms init outside of individual algorithm functions diff --git a/ci/gpu/build.sh b/ci/gpu/build.sh index 83f234f787b..0dab0437be7 100755 --- a/ci/gpu/build.sh +++ b/ci/gpu/build.sh @@ -57,7 +57,9 @@ source activate rapids logger "conda install required packages" conda install -c nvidia -c rapidsai -c rapidsai-nightly -c conda-forge -c defaults \ + "libcudf=${MINOR_VERSION}" \ "cudf=${MINOR_VERSION}" \ + "librmm=${MINOR_VERSION}" \ "rmm=${MINOR_VERSION}" \ "cudatoolkit=$CUDA_REL" \ "dask-cudf=${MINOR_VERSION}" \ diff --git a/conda/environments/cugraph_dev_cuda10.1.yml b/conda/environments/cugraph_dev_cuda10.1.yml index 05113f3d7ee..4fae56d7cd5 100644 --- a/conda/environments/cugraph_dev_cuda10.1.yml +++ b/conda/environments/cugraph_dev_cuda10.1.yml @@ -13,7 +13,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.16* - dask-cudf=0.16* -- nccl>=2.5 +- nccl>=2.7 - ucx-py=0.16* - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda10.2.yml b/conda/environments/cugraph_dev_cuda10.2.yml index 02537e4bf6c..1e7bfb0f47c 100644 --- a/conda/environments/cugraph_dev_cuda10.2.yml +++ b/conda/environments/cugraph_dev_cuda10.2.yml @@ -13,7 +13,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.16* - dask-cudf=0.16* -- nccl>=2.5 +- nccl>=2.7 - ucx-py=0.16* - scipy - networkx diff --git a/conda/environments/cugraph_dev_cuda11.0.yml b/conda/environments/cugraph_dev_cuda11.0.yml index efd4b57dcc4..ef0fdfa7506 100644 --- a/conda/environments/cugraph_dev_cuda11.0.yml +++ b/conda/environments/cugraph_dev_cuda11.0.yml @@ -13,7 +13,7 @@ dependencies: - distributed>=2.12.0 - dask-cuda=0.16* - dask-cudf=0.16* -- nccl>=2.5 +- nccl>=2.7 - ucx-py=0.16* - scipy - networkx diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index 1376a0e30d2..c7eba24d2fb 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -36,7 +36,7 @@ requirements: - dask-cuda {{ minor_version }} - dask>=2.12.0 - distributed>=2.12.0 - - nccl>=2.5 + - nccl>=2.7 - ucx-py {{ minor_version }} #test: diff --git a/conda/recipes/libcugraph/meta.yaml b/conda/recipes/libcugraph/meta.yaml index 22731102110..89ccd2d56e3 100644 --- a/conda/recipes/libcugraph/meta.yaml +++ b/conda/recipes/libcugraph/meta.yaml @@ -29,12 +29,12 @@ requirements: - cudatoolkit {{ cuda_version }}.* - boost-cpp>=1.66 - libcypher-parser - - nccl>=2.5 + - nccl>=2.7 - ucx-py {{ minor_version }} run: - libcudf={{ minor_version }} - {{ pin_compatible('cudatoolkit', max_pin='x.x') }} - - nccl>=2.5 + - nccl>=2.7 - ucx-py {{ minor_version }} #test: From 3a3fbc6aaf4f0c5e6310959a06ebedfe2c981ae2 Mon Sep 17 00:00:00 2001 From: Hugo Linsenmaier Date: Fri, 9 Oct 2020 18:54:46 -0700 Subject: [PATCH 05/16] [REVIEW] BUG Remove deprecated call to from_gpu_matrix (#1198) * Remove deprecated call to gpu matrix * Update CHANGELOG --- CHANGELOG.md | 1 + python/cugraph/layout/force_atlas2_wrapper.pyx | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8aebfb8b286..0542c2629d7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -44,6 +44,7 @@ - PR #1186 BLD Installing raft headers under cugraph - PR #1192 Fix benchmark notes and documentation issues in graph.py - PR #1196 Move subcomms init outside of individual algorithm functions +- PR #1198 Remove deprecated call to from_gpu_matrix # cuGraph 0.15.0 (26 Aug 2020) diff --git a/python/cugraph/layout/force_atlas2_wrapper.pyx b/python/cugraph/layout/force_atlas2_wrapper.pyx index 31bf8fc029e..39a54b0b3f0 100644 --- a/python/cugraph/layout/force_atlas2_wrapper.pyx +++ b/python/cugraph/layout/force_atlas2_wrapper.pyx @@ -127,7 +127,7 @@ def force_atlas2(input_graph, verbose, callback_ptr) - pos_df = cudf.DataFrame.from_gpu_matrix(pos, columns=['x', 'y']) + pos_df = cudf.DataFrame(pos, columns=['x', 'y']) df['x'] = pos_df['x'] df['y'] = pos_df['y'] else: @@ -159,7 +159,7 @@ def force_atlas2(input_graph, verbose, callback_ptr) - pos_df = cudf.DataFrame.from_gpu_matrix(pos, columns=['x', 'y']) + pos_df = cudf.DataFrame(pos, columns=['x', 'y']) df['x'] = pos_df['x'] df['y'] = pos_df['y'] From 3a90fcdabceb7e5ff3680aefbbfbc6ec6bda0154 Mon Sep 17 00:00:00 2001 From: JasonAtNvidia <35045286+JasonAtNvidia@users.noreply.github.com> Date: Sun, 11 Oct 2020 10:31:37 -0400 Subject: [PATCH 06/16] [Review] Adding CUDA architecture code for sm80 and aarch64 (#1156) * Adding CUDA architecture code for aarch64 and adding structure to reflect cudf and cuml CMake files * FAISS was not needed for cugraph, bad copy and paste :( * updating changelog * Adding code=compute_ to CMAKE_CUDA_FLAGS * Adding GENCODE 80 to GUNROCK commands in CMakeLists.txt * Update CMakeLists.txt * Update CMakeLists.txt Giving a try to Gunrock's CUDA_AUTODETECT_GENCODE feature. * Update CMakeLists.txt conditionally select gunrock gencodes * Update utils.py * Update katz_centrality_test.cu Co-authored-by: Alex Fender --- CHANGELOG.md | 2 + cpp/CMakeLists.txt | 57 +++++++++++++++----- cpp/tests/centrality/katz_centrality_test.cu | 2 +- python/cugraph/tests/utils.py | 4 +- 4 files changed, 49 insertions(+), 16 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0542c2629d7..722fa8873d8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ - PR #1145 Simple edge list generator - PR #1144 updated documentation and APIs - PR #1139 MNMG Louvain Python updates, Cython cleanup +- PR #1156 Add aarch64 gencode support - PR #1149 Parquet read and concat within workers - PR #1152 graph container cleanup, added arg for instantiating legacy types and switch statements to factory function - PR #1164 MG symmetrize and conda env updates @@ -30,6 +31,7 @@ - PR #1176 Update ci/local/README.md - PR #1184 BLD getting latest tags + ## Bug Fixes - PR #1131 Show style checker errors with set +e - PR #1150 Update RAFT git tag diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index df17d7c14dd..85b5822011c 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -48,14 +48,52 @@ if(CMAKE_COMPILER_IS_GNUCXX) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror -Wno-error=deprecated-declarations") endif(CMAKE_COMPILER_IS_GNUCXX) -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_60,code=sm_60") -set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_70,code=sm_70 -gencode=arch=compute_70,code=compute_70") - find_package(CUDA) -if((CUDA_VERSION_MAJOR EQUAL 10) OR (CUDA_VERSION_MAJOR GREATER 10)) - set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode=arch=compute_75,code=sm_75 -gencode=arch=compute_75,code=compute_75") + +# Check for aarch64 vs workstation architectures +if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64") + message(STATUS "CMAKE Detected aarch64 CPU architecture, selecting appropriate gencodes") + # This is being build for Linux4Tegra or SBSA ARM64 CUDA + set(GPU_ARCHS "62") # Default minimum CUDA GenCode - not supported by gunrock + if(CUDA_VERSION_MAJOR GREATER_EQUAL 9) + set(GPU_ARCHS "${GPU_ARCHS};72") + set(GUNROCK_GENCODE "-DGUNROCK_GENCODE_SM72=TRUE") + endif() + if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) + # This is probably for SBSA CUDA, or a next gen Jetson + set(GPU_ARCHS "${GPU_ARCHS};75;80") + set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM75=TRUE -DGUNROCK_GENCODE_SM80=TRUE ") + endif() + +else() + message(STATUS "CMAKE selecting appropriate gencodes for x86 or ppc64 CPU architectures") + # System architecture was not aarch64, + # this is datacenter or workstation class hardware + set(GPU_ARCHS "60") # Default minimum supported CUDA gencode + set(GUNROCK_GENCODE "-DGUNROCK_GENCODE_SM60=TRUE") + if(CUDA_VERSION_MAJOR GREATER_EQUAL 9) + set(GPU_ARCHS "${GPU_ARCHS};70") + set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM70=TRUE") + endif() + if(CUDA_VERSION_MAJOR GREATER_EQUAL 10) + set(GPU_ARCHS "${GPU_ARCHS};75") + set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM75=TRUE") + endif() + if(CUDA_VERSION_MAJOR GREATER_EQUAL 11) + set(GPU_ARCHS "${GPU_ARCHS};80") + set(GUNROCK_GENCODE "${GUNROCK_GENCODE} -DGUNROCK_GENCODE_SM80=TRUE") + endif() + endif() +message("-- Building for GPU_ARCHS = ${GPU_ARCHS}") +foreach(arch ${GPU_ARCHS}) + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${arch},code=sm_${arch}") +endforeach() + +list(GET GPU_ARCHS -1 ptx) +set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -gencode arch=compute_${ptx},code=compute_${ptx}") + set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} --expt-extended-lambda") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Werror=cross-execution-space-call -Wno-deprecated-declarations -Xptxas --disable-warnings") set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler -Wall,-Wno-error=sign-compare,-Wno-error=unused-but-set-variable") @@ -198,18 +236,13 @@ set(CUGUNROCK_DIR ${CMAKE_CURRENT_BINARY_DIR}/cugunrock CACHE STRING ExternalProject_Add(cugunrock GIT_REPOSITORY https://github.com/rapidsai/cugunrock.git - GIT_TAG main + GIT_TAG 0b92fae6ee9026188a811b4d08915779e7c97178 PREFIX ${CUGUNROCK_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX= - -DGPU_ARCHS="" -DGUNROCK_BUILD_SHARED_LIBS=OFF -DGUNROCK_BUILD_TESTS=OFF -DCUDA_AUTODETECT_GENCODE=FALSE - -DGUNROCK_GENCODE_SM60=TRUE - -DGUNROCK_GENCODE_SM61=TRUE - -DGUNROCK_GENCODE_SM70=TRUE - -DGUNROCK_GENCODE_SM72=TRUE - -DGUNROCK_GENCODE_SM75=TRUE + ${GUNROCK_GENCODE} BUILD_BYPRODUCTS ${CUGUNROCK_DIR}/lib/libgunrock.a ) diff --git a/cpp/tests/centrality/katz_centrality_test.cu b/cpp/tests/centrality/katz_centrality_test.cu index 97f499fc920..c4f17192955 100644 --- a/cpp/tests/centrality/katz_centrality_test.cu +++ b/cpp/tests/centrality/katz_centrality_test.cu @@ -160,7 +160,7 @@ INSTANTIATE_TEST_CASE_P( simple_test, Tests_Katz, ::testing::Values(Katz_Usecase("test/datasets/karate.mtx", "ref/katz/karate.csv"), - Katz_Usecase("test/datasets/netscience.mtx", "ref/katz/netscience.csv"), + // Katz_Usecase("test/datasets/netscience.mtx", "ref/katz/netscience.csv"), Katz_Usecase("test/datasets/polbooks.mtx", "ref/katz/polbooks.csv"), Katz_Usecase("test/datasets/dolphins.mtx", "ref/katz/dolphins.csv"))); diff --git a/python/cugraph/tests/utils.py b/python/cugraph/tests/utils.py index 88f79f65b4d..7f0a5346565 100644 --- a/python/cugraph/tests/utils.py +++ b/python/cugraph/tests/utils.py @@ -36,9 +36,7 @@ '../datasets/email-Eu-core.csv'] DATASETS_KTRUSS = [('../datasets/polbooks.csv', - '../datasets/ref/ktruss/polbooks.csv'), - ('../datasets/netscience.csv', - '../datasets/ref/ktruss/netscience.csv')] + '../datasets/ref/ktruss/polbooks.csv')] DATASETS_SMALL = ['../datasets/karate.csv', '../datasets/dolphins.csv', From 95be08554dc809cdf020ea64267bc58c393f304c Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Mon, 12 Oct 2020 15:41:49 -0500 Subject: [PATCH 07/16] [REVIEW] 2D cython/python infrastructure- PAGERANK (#1175) * pagerank 2D cython/python infrastructure * sgpu pagerank edits * edits * add namespace * pull branch0.16 * update test * review updates * clang * updatelocal_verts * review changes * review changes * Update mg_pagerank_wrapper.pyx * Update mg_pagerank_wrapper.pyx * Update pagerank.py * Update mg_pagerank_wrapper.pyx * rename edge_attr * Add renaming of edge_attr * Update CMakeLists.txt * flake8 * Update graph.py * update graph.py to rename edge_attr Co-authored-by: Alex Fender --- CHANGELOG.md | 1 + cpp/CMakeLists.txt | 2 +- cpp/include/algorithms.hpp | 2 +- cpp/include/utilities/cython.hpp | 14 ++ cpp/src/experimental/pagerank.cu | 28 ++-- cpp/src/utilities/cython.cu | 137 +++++++++++++++++- .../dask/link_analysis/mg_pagerank.pxd | 15 +- .../link_analysis/mg_pagerank_wrapper.pyx | 103 +++++++------ python/cugraph/dask/link_analysis/pagerank.py | 72 +++++---- python/cugraph/link_analysis/pagerank.pxd | 7 +- .../link_analysis/pagerank_wrapper.pyx | 49 +++++-- python/cugraph/structure/graph.py | 12 ++ python/cugraph/tests/dask/test_mg_pagerank.py | 5 +- 13 files changed, 333 insertions(+), 114 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 722fa8873d8..9568be67594 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ - PR #1151 MNMG extension for pattern accelerator based PageRank, Katz Centrality, BFS, and SSSP implementations (C++ part) - PR #1163 Integrated 2D shuffling and Louvain updates - PR #1178 Refactored cython graph factory code to scale to additional data types +- PR #1175 Integrated 2D pagerank python/cython infra ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 85b5822011c..48b5e0835f0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -283,7 +283,7 @@ else(DEFINED ENV{RAFT_PATH}) ExternalProject_Add(raft GIT_REPOSITORY https://github.com/rapidsai/raft.git - GIT_TAG 53c1e2dde4045f386f9cc4bb7d3dc99d5690b886 + GIT_TAG 515ed005aebc2276d52308516e623a4ab0b5e82c PREFIX ${RAFT_DIR} CONFIGURE_COMMAND "" BUILD_COMMAND "" diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index 9118ed3a7c4..f4b9868040b 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -1046,7 +1046,7 @@ void sssp(raft::handle_t &handle, * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template -void pagerank(raft::handle_t &handle, +void pagerank(raft::handle_t const &handle, graph_view_t const &graph_view, weight_t *adj_matrix_row_out_weight_sums, vertex_t *personalization_vertices, diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 8742f3bf956..36e0369c1c6 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -199,6 +199,20 @@ std::pair call_louvain(raft::handle_t const& handle, size_t max_level, weight_t resolution); +// Wrapper for calling Pagerank using a graph container +template +void call_pagerank(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* identifiers, + weight_t* pagerank, + vertex_t personalization_subset_size, + vertex_t* personalization_subset, + weight_t* personalization_values, + double alpha, + double tolerance, + int64_t max_iter, + bool has_guess); + // Helper for setting up subcommunicators, typically called as part of the // user-initiated comms initialization in Python. // diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index 5948d329d64..f1acd47ac52 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -42,7 +42,7 @@ namespace detail { // FIXME: personalization_vector_size is confusing in OPG (local or aggregate?) template -void pagerank(raft::handle_t& handle, +void pagerank(raft::handle_t const& handle, GraphViewType const& pull_graph_view, typename GraphViewType::weight_type* adj_matrix_row_out_weight_sums, typename GraphViewType::vertex_type* personalization_vertices, @@ -290,7 +290,7 @@ void pagerank(raft::handle_t& handle, } // namespace detail template -void pagerank(raft::handle_t& handle, +void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, weight_t* adj_matrix_row_out_weight_sums, vertex_t* personalization_vertices, @@ -319,7 +319,7 @@ void pagerank(raft::handle_t& handle, // explicit instantiation -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, float* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, @@ -332,7 +332,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, double* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, @@ -345,7 +345,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, float* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, @@ -358,7 +358,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, double* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, @@ -371,7 +371,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, float* adj_matrix_row_out_weight_sums, int64_t* personalization_vertices, @@ -384,7 +384,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, double* adj_matrix_row_out_weight_sums, int64_t* personalization_vertices, @@ -397,7 +397,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, float* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, @@ -410,7 +410,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, double* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, @@ -423,7 +423,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, float* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, @@ -436,7 +436,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, double* adj_matrix_row_out_weight_sums, int32_t* personalization_vertices, @@ -449,7 +449,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, float* adj_matrix_row_out_weight_sums, int64_t* personalization_vertices, @@ -462,7 +462,7 @@ template void pagerank(raft::handle_t& handle, bool has_initial_guess, bool do_expensive_check); -template void pagerank(raft::handle_t& handle, +template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, double* adj_matrix_row_out_weight_sums, int64_t* personalization_vertices, diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index c4fc0f7285f..78b59fbead8 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -52,12 +52,12 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai reinterpret_cast(graph_container.vertex_partition_offsets) + (graph_container.row_comm_size * graph_container.col_comm_size) + 1); - experimental::partition_t partition(partition_offsets_vector, - graph_container.hypergraph_partitioned, - graph_container.row_comm_size, - graph_container.col_comm_size, - graph_container.row_comm_rank, - graph_container.col_comm_rank); + experimental::partition_t partition(partition_offsets_vector, + graph_container.hypergraph_partitioned, + graph_container.row_comm_size, + graph_container.col_comm_size, + graph_container.row_comm_rank, + graph_container.col_comm_rank); return std::make_unique>( handle, @@ -466,6 +466,83 @@ std::pair call_louvain(raft::handle_t const& handle, handle, graph_container, functor); } +// Wrapper for calling Pagerank through a graph container +template +void call_pagerank(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* identifiers, + weight_t* p_pagerank, + vertex_t personalization_subset_size, + vertex_t* personalization_subset, + weight_t* personalization_values, + double alpha, + double tolerance, + int64_t max_iter, + bool has_guess) +{ + if (graph_container.graph_type == graphTypeEnum::GraphCSCViewFloat) { + pagerank(handle, + *(graph_container.graph_ptr_union.GraphCSCViewFloatPtr), + reinterpret_cast(p_pagerank), + static_cast(personalization_subset_size), + reinterpret_cast(personalization_subset), + reinterpret_cast(personalization_values), + alpha, + tolerance, + max_iter, + has_guess); + graph_container.graph_ptr_union.GraphCSCViewFloatPtr->get_vertex_identifiers( + reinterpret_cast(identifiers)); + } else if (graph_container.graph_type == graphTypeEnum::GraphCSCViewDouble) { + pagerank(handle, + *(graph_container.graph_ptr_union.GraphCSCViewDoublePtr), + reinterpret_cast(p_pagerank), + static_cast(personalization_subset_size), + reinterpret_cast(personalization_subset), + reinterpret_cast(personalization_values), + alpha, + tolerance, + max_iter, + has_guess); + graph_container.graph_ptr_union.GraphCSCViewDoublePtr->get_vertex_identifiers( + reinterpret_cast(identifiers)); + } else if (graph_container.graph_type == graphTypeEnum::graph_t) { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::pagerank(handle, + graph->view(), + static_cast(nullptr), + reinterpret_cast(personalization_subset), + reinterpret_cast(personalization_values), + static_cast(personalization_subset_size), + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + false); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::pagerank(handle, + graph->view(), + static_cast(nullptr), + reinterpret_cast(personalization_subset), + reinterpret_cast(personalization_values), + static_cast(personalization_subset_size), + reinterpret_cast(p_pagerank), + static_cast(alpha), + static_cast(tolerance), + max_iter, + has_guess, + false); + } else { + CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + } + } +} + // Explicit instantiations template std::pair call_louvain(raft::handle_t const& handle, graph_container_t const& graph_container, @@ -481,6 +558,54 @@ template std::pair call_louvain(raft::handle_t const& handle, size_t max_level, double resolution); +template void call_pagerank(raft::handle_t const& handle, + graph_container_t const& graph_container, + int* identifiers, + float* p_pagerank, + int32_t personalization_subset_size, + int32_t* personalization_subset, + float* personalization_values, + double alpha, + double tolerance, + int64_t max_iter, + bool has_guess); + +template void call_pagerank(raft::handle_t const& handle, + graph_container_t const& graph_container, + int* identifiers, + double* p_pagerank, + int32_t personalization_subset_size, + int32_t* personalization_subset, + double* personalization_values, + double alpha, + double tolerance, + int64_t max_iter, + bool has_guess); + +template void call_pagerank(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* identifiers, + float* p_pagerank, + int64_t personalization_subset_size, + int64_t* personalization_subset, + float* personalization_values, + double alpha, + double tolerance, + int64_t max_iter, + bool has_guess); + +template void call_pagerank(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* identifiers, + double* p_pagerank, + int64_t personalization_subset_size, + int64_t* personalization_subset, + double* personalization_values, + double alpha, + double tolerance, + int64_t max_iter, + bool has_guess); + // Helper for setting up subcommunicators void init_subcomms(raft::handle_t& handle, size_t row_comm_size) { diff --git a/python/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/dask/link_analysis/mg_pagerank.pxd index 429cb775e07..351b3d20d50 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank.pxd +++ b/python/cugraph/dask/link_analysis/mg_pagerank.pxd @@ -18,15 +18,16 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef void pagerank[VT,ET,WT]( + cdef void call_pagerank[vertex_t, weight_t]( const handle_t &handle, - const GraphCSCView[VT,ET,WT] &graph, - WT *pagerank, - VT size, - VT *personalization_subset, - WT *personalization_values, + const graph_container_t &g, + vertex_t *identifiers, + weight_t *pagerank, + vertex_t size, + vertex_t *personalization_subset, + weight_t *personalization_values, double alpha, double tolerance, long long max_iter, diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index 39b856e4946..8fecbb9ab87 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -21,38 +21,74 @@ from cugraph.structure.graph_primtypes cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t from cython.operator cimport dereference as deref - -def mg_pagerank(input_df, local_data, rank, handle, alpha=0.85, max_iter=100, tol=1.0e-5, personalization=None, nstart=None): +import numpy as np + + +def mg_pagerank(input_df, + num_global_verts, + num_global_edges, + vertex_partition_offsets, + rank, + handle, + alpha=0.85, + max_iter=100, + tol=1.0e-5, + personalization=None, + nstart=None): """ Call pagerank """ - cdef size_t handle_size_t = handle.getHandle() handle_ = handle_size_t - src = input_df['src'] dst = input_df['dst'] + vertex_t = src.dtype + if num_global_edges > (2**31 - 1): + edge_t = np.dtype("int64") + else: + edge_t = np.dtype("int32") + if "value" in input_df.columns: + weights = input_df['value'] + weight_t = weights.dtype + else: + weight_t = np.dtype("float32") + + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + # FIXME: needs to be edge_t type not int + cdef int num_partition_edges = len(src) + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + + # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C + vertex_partition_offsets_host = vertex_partition_offsets.values_host + cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + + cdef graph_container_t graph_container + + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + c_vertex_partition_offsets, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_partition_edges, + num_global_verts, num_global_edges, + True, + True, True) - num_verts = local_data['verts'].sum() - num_edges = local_data['edges'].sum() - - local_offset = local_data['offsets'][rank] - dst = dst - local_offset - num_local_verts = local_data['verts'][rank] - num_local_edges = len(src) - - cdef uintptr_t c_local_verts = local_data['verts'].__array_interface__['data'][0] - cdef uintptr_t c_local_edges = local_data['edges'].__array_interface__['data'][0] - cdef uintptr_t c_local_offsets = local_data['offsets'].__array_interface__['data'][0] - - [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) - _offsets, indices, weights = coo2csr(dst, src, None) - offsets = _offsets[:num_local_verts + 1] - del _offsets df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - df['pagerank'] = cudf.Series(np.zeros(num_verts, dtype=np.float32)) + df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t) + df['pagerank'] = cudf.Series(np.zeros(len(df['vertex']), dtype=weight_t)) cdef uintptr_t c_identifier = df['vertex'].__cuda_array_interface__['data'][0]; cdef uintptr_t c_pagerank_val = df['pagerank'].__cuda_array_interface__['data'][0]; @@ -61,13 +97,6 @@ def mg_pagerank(input_df, local_data, rank, handle, alpha=0.85, max_iter=100, to cdef uintptr_t c_pers_val = NULL cdef int sz = 0 - cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] - cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] - cdef uintptr_t c_weights = NULL - - cdef GraphCSCView[int,int,float] graph_float - cdef GraphCSCView[int,int,double] graph_double - if personalization is not None: sz = personalization['vertex'].shape[0] personalization['vertex'] = personalization['vertex'].astype(np.int32) @@ -76,18 +105,10 @@ def mg_pagerank(input_df, local_data, rank, handle, alpha=0.85, max_iter=100, to c_pers_val = personalization['values'].__cuda_array_interface__['data'][0] if (df['pagerank'].dtype == np.float32): - graph_float = GraphCSCView[int,int,float](c_offsets, c_indices, c_weights, num_verts, num_local_edges) - graph_float.set_local_data(c_local_verts, c_local_edges, c_local_offsets) - graph_float.set_handle(handle_) - c_pagerank.pagerank[int,int,float](handle_[0], graph_float, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, 0) - graph_float.get_vertex_identifiers(c_identifier) + c_pagerank.call_pagerank[int, float](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, + alpha, tol, max_iter, 0) else: - graph_double = GraphCSCView[int,int,double](c_offsets, c_indices, c_weights, num_verts, num_local_edges) - graph_double.set_local_data(c_local_verts, c_local_edges, c_local_offsets) - graph_double.set_handle(handle_) - c_pagerank.pagerank[int,int,double](handle_[0], graph_double, c_pagerank_val, sz, c_pers_vtx, c_pers_val, + c_pagerank.call_pagerank[int, double](handle_[0], graph_container, c_identifier, c_pagerank_val, sz, c_pers_vtx, c_pers_val, alpha, tol, max_iter, 0) - graph_double.get_vertex_identifiers(c_identifier) - + return df diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index a287333ef6f..143bb37dd22 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -14,17 +14,29 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_local_data +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.structure.shuffle import shuffle from cugraph.dask.link_analysis import mg_pagerank_wrapper as mg_pagerank import cugraph.comms.comms as Comms - - -def call_pagerank(sID, data, local_data, alpha, max_iter, - tol, personalization, nstart): +import dask_cudf + + +def call_pagerank(sID, + data, + num_verts, + num_edges, + vertex_partition_offsets, + alpha, + max_iter, + tol, + personalization, + nstart): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) return mg_pagerank.mg_pagerank(data[0], - local_data, + num_verts, + num_edges, + vertex_partition_offsets, wid, handle, alpha, @@ -113,15 +125,21 @@ def pagerank(input_graph, """ from cugraph.structure.graph import null_check + if personalization is not None: + raise Exception("Personalization not supported") + nstart = None client = default_client() - if(input_graph.local_data is not None and - input_graph.local_data['by'] == 'dst'): - data = input_graph.local_data['data'] - else: - data = get_local_data(input_graph, by='dst', load_balance=load_balance) + input_graph.compute_renumber_edge_list(transposed=True) + (ddf, + num_verts, + partition_row_size, + partition_col_size, + vertex_partition_offsets) = shuffle(input_graph, transposed=True) + num_edges = len(ddf) + data = get_distributed_data(ddf) if personalization is not None: null_check(personalization["vertex"]) @@ -131,22 +149,22 @@ def pagerank(input_graph, personalization, "vertex", "vertex" ).compute() - result = dict([(data.worker_info[wf[0]]["rank"], - client.submit( - call_pagerank, - Comms.get_session_id(), - wf[1], - data.local_data, - alpha, - max_iter, - tol, - personalization, - nstart, - workers=[wf[0]])) - for idx, wf in enumerate(data.worker_to_parts.items())]) + result = [client.submit(call_pagerank, + Comms.get_session_id(), + wf[1], + num_verts, + num_edges, + vertex_partition_offsets, + alpha, + max_iter, + tol, + personalization, + nstart, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] wait(result) - + ddf = dask_cudf.from_delayed(result) if input_graph.renumbered: - return input_graph.unrenumber(result[0].result(), 'vertex').compute() + return input_graph.unrenumber(ddf, 'vertex') - return result[0].result() + return ddf diff --git a/python/cugraph/link_analysis/pagerank.pxd b/python/cugraph/link_analysis/pagerank.pxd index df94b95d72e..79cb033f74b 100644 --- a/python/cugraph/link_analysis/pagerank.pxd +++ b/python/cugraph/link_analysis/pagerank.pxd @@ -20,11 +20,12 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef void pagerank[VT,ET,WT]( + cdef void call_pagerank[VT,WT]( const handle_t &handle, - const GraphCSCView[VT,ET,WT] &graph, + const graph_container_t &g, + VT *identifiers, WT *pagerank, VT size, VT *personalization_subset, diff --git a/python/cugraph/link_analysis/pagerank_wrapper.pyx b/python/cugraph/link_analysis/pagerank_wrapper.pyx index 9f4e555bbd9..a8c1c9faee8 100644 --- a/python/cugraph/link_analysis/pagerank_wrapper.pyx +++ b/python/cugraph/link_analysis/pagerank_wrapper.pyx @@ -17,7 +17,7 @@ # cython: language_level = 3 #cimport cugraph.link_analysis.pagerank as c_pagerank -from cugraph.link_analysis.pagerank cimport pagerank as c_pagerank +from cugraph.link_analysis.pagerank cimport call_pagerank from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool from libc.stdint cimport uintptr_t @@ -38,6 +38,7 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. cdef unique_ptr[handle_t] handle_ptr handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); [offsets, indices] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.offsets, input_graph.transposedadjlist.indices], [np.int32]) [weights] = graph_primtypes_wrapper.datatype_cast([input_graph.transposedadjlist.weights], [np.float32, np.float64]) @@ -66,14 +67,24 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. cdef uintptr_t c_offsets = offsets.__cuda_array_interface__['data'][0] cdef uintptr_t c_indices = indices.__cuda_array_interface__['data'][0] cdef uintptr_t c_weights = NULL + cdef uintptr_t c_local_verts = NULL; + cdef uintptr_t c_local_edges = NULL; + cdef uintptr_t c_local_offsets = NULL; personalization_id_series = None if weights is not None: c_weights = weights.__cuda_array_interface__['data'][0] + weight_t = weights.dtype + else: + weight_t = np.dtype("float32") - cdef GraphCSCView[int,int,float] graph_float - cdef GraphCSCView[int,int,double] graph_double + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} if personalization is not None: sz = personalization['vertex'].shape[0] @@ -82,16 +93,30 @@ def pagerank(input_graph, alpha=0.85, personalization=None, max_iter=100, tol=1. c_pers_vtx = personalization['vertex'].__cuda_array_interface__['data'][0] c_pers_val = personalization['values'].__cuda_array_interface__['data'][0] + cdef graph_container_t graph_container + populate_graph_container_legacy(graph_container, + ((graphTypeEnum.LegacyCSC)), + handle_[0], + c_offsets, c_indices, c_weights, + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), + num_verts, num_edges, + c_local_verts, c_local_edges, c_local_offsets) + if (df['pagerank'].dtype == np.float32): - graph_float = GraphCSCView[int,int,float](c_offsets, c_indices, c_weights, num_verts, num_edges) + call_pagerank[int, float](handle_[0], graph_container, + c_identifier, + c_pagerank_val, sz, + c_pers_vtx, c_pers_val, + alpha, tol, + max_iter, has_guess) - c_pagerank[int,int,float](handle_ptr.get()[0], graph_float, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, has_guess) - graph_float.get_vertex_identifiers(c_identifier) else: - graph_double = GraphCSCView[int,int,double](c_offsets, c_indices, c_weights, num_verts, num_edges) - c_pagerank[int,int,double](handle_ptr.get()[0], graph_double, c_pagerank_val, sz, c_pers_vtx, c_pers_val, - alpha, tol, max_iter, has_guess) - graph_double.get_vertex_identifiers(c_identifier) - + call_pagerank[int, double](handle_[0], graph_container, + c_identifier, + c_pagerank_val, sz, + c_pers_vtx, c_pers_val, + alpha, tol, + max_iter, has_guess) return df diff --git a/python/cugraph/structure/graph.py b/python/cugraph/structure/graph.py index 311ef7313ae..ffbf4b8ec75 100644 --- a/python/cugraph/structure/graph.py +++ b/python/cugraph/structure/graph.py @@ -488,6 +488,18 @@ def from_dask_cudf_edgelist( "names not found in input. Recheck the source " "and destination parameters" ) + ddf_columns = s_col + d_col + if edge_attr is not None: + if not (set([edge_attr]).issubset(set(input_ddf.columns))): + raise Exception( + "edge_attr column name not found in input." + "Recheck the edge_attr parameter") + ddf_columns = ddf_columns + [edge_attr] + input_ddf = input_ddf[ddf_columns] + + if edge_attr is not None: + input_ddf = input_ddf.rename(columns={edge_attr: 'value'}) + # # Keep all of the original parameters so we can lazily # evaluate this function diff --git a/python/cugraph/tests/dask/test_mg_pagerank.py b/python/cugraph/tests/dask/test_mg_pagerank.py index a2340e139d1..bd97a7354d2 100644 --- a/python/cugraph/tests/dask/test_mg_pagerank.py +++ b/python/cugraph/tests/dask/test_mg_pagerank.py @@ -49,14 +49,14 @@ def personalize(v, personalization_perc): return cu_personalization -PERSONALIZATION_PERC = [0, 10, 50] +PERSONALIZATION_PERC = [0] @pytest.fixture def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client @@ -108,6 +108,7 @@ def test_dask_pagerank(client_connection, personalization_perc): g, personalization=personalization, tol=1e-6 ) result_pr = dcg.pagerank(dg, personalization=personalization, tol=1e-6) + result_pr = result_pr.compute() err = 0 tol = 1.0e-05 From b704d139c6450f0e06b6d1a6f9066d1426e0cdc5 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Mon, 12 Oct 2020 19:45:55 -0500 Subject: [PATCH 08/16] [REVIEW] 2D cython/python infrastructure- BFS & SSSP (#1177) * add minimal update to create a PR * pagerank 2D cython/python infrastructure * 2D infra- bfs and sssp * add a work around for source (or destination) == self case for isend/irecv * fix a warning * remove dummy change log * sgpu pagerank edits * edits * add namespace * pull branch0.16 * update test * in copy_v_transform_reduce_in|out_nbr, implement missing communication alnog the minor direction * bug fix (assertion failure)\n * bug fix in copy_v_transform_reduce_in_out_nbr.cuh * clang-format * enforce consistency in variable naming related to subcommunicators * bug fix (graph construction) * bug fix (vertex_partition_segment_offsets) * review updates * clang * bug fix (caching comm_rank in partition_t object) * updatelocal_verts * bug fix (scale dangling_sum by damping factor) * remove transform_reduce_v_with_adj_matrix_row * replace device_vector with device_uvector in sssp * bfs updates to 2D infra * sssp 2D integration * sssp * flake8 * clang * add host_scalalr_bcast to comm_utils * remove unnecessary include * bug fix in update_frontier_v_push_if_out_nbr * bug fix in VertexFrontier declaration * add debug print for pagerank sum * remove dummy code * bug fix in assert * fix timing bug with isend/irecv * fix compile error * review updates * review updates * Revert "fix compile error" This reverts commit 900fd1143c6be38a4e974ff598627968eae20a07. * Revert "fix timing bug with isend/irecv" This reverts commit e0e696a580cfd2ef0bbe45dfd7e9845e139bee36. * Revert "bug fix in assert" This reverts commit 97b98ed4259a28afb050b1f6142ed91adae40264. * Revert "remove dummy code" This reverts commit facc70c50a0bde5ba06a5ddef830e23275ff5751. * Revert "add debug print for pagerank sum" This reverts commit c479b6df0855b70eb9340df761186ef85e247dcc. * Revert "bug fix in VertexFrontier declaration" This reverts commit 44e3e10d1da49fa5de3a54c31ff9f9d6bc3f1808. * Revert "bug fix in update_frontier_v_push_if_out_nbr" This reverts commit dd800014e2ce9985234e38db81c9b6276238873b. * Revert "remove unnecessary include" This reverts commit c55dbfb2af9a89ef289ffa6a0501c68b63f47900. * Revert "add host_scalalr_bcast to comm_utils" This reverts commit 6430ad55fef31749d340fc9daffe689966f8d83c. * Revert "replace device_vector with device_uvector in sssp" This reverts commit d6b2e5883f2a98f0e4ebc904ec4513bcb5f3aabe. * Revert "remove transform_reduce_v_with_adj_matrix_row" This reverts commit 21d4e104da02ef4d2609e2c05cd26471a40a6188. * Revert "bug fix (scale dangling_sum by damping factor)" This reverts commit 15818f74fe160c81e40987feb1162248d41e9c06. * Revert "bug fix (caching comm_rank in partition_t object)" This reverts commit bd2dd834f8df92944f64ff56fa698573fea9f416. * Revert "bug fix (vertex_partition_segment_offsets)" This reverts commit a006b9940b8d32d4e56cfc2b4a5746c14a51388c. * Revert "bug fix (graph construction)" This reverts commit 59fadefd0c7e6fd25c1332d5403b0c86e71defc2. * Revert "enforce consistency in variable naming related to subcommunicators" This reverts commit 790549f141c46dd463618504303f96f64e2ce712. * Revert "clang-format" This reverts commit 761f7aa81761d99832c6a9748679418e776ac495. * Revert "bug fix in copy_v_transform_reduce_in_out_nbr.cuh" This reverts commit f874f6517bfe76a317e51a681e9c33e7aa268004. * Revert "bug fix (assertion failure)\n" This reverts commit a33c2d10bcea579a12e298c0b5bb8b4917fd21e0. * Revert "in copy_v_transform_reduce_in|out_nbr, implement missing communication alnog the minor direction" This reverts commit 6e1b152630e1a5579d55a2f0948c0c010a5466a5. * Revert "fix a warning" This reverts commit 25607cad97cc6107586dfe0d9d30ad5ee2ca74b8. * Revert "add a work around for source (or destination) == self case for isend/irecv" This reverts commit 2be9e5f9a016d5884423b6e2b59e43ed646cde07. * revert * clang * update tests and predecessor * update doc * update edge weights * remove partition row/col size * remove partition row/col size * remove partition row/col size * remove partition row/col size Co-authored-by: Seunghwa Kang --- CHANGELOG.md | 2 + cpp/include/algorithms.hpp | 4 +- cpp/include/utilities/cython.hpp | 20 +++ cpp/src/experimental/bfs.cu | 28 +-- cpp/src/experimental/sssp.cu | 28 +-- cpp/src/utilities/cython.cu | 166 ++++++++++++++++++ python/cugraph/dask/__init__.py | 1 + .../dask/community/louvain_wrapper.pyx | 3 +- .../dask/link_analysis/mg_pagerank.pxd | 2 +- .../link_analysis/mg_pagerank_wrapper.pyx | 2 +- python/cugraph/dask/link_analysis/pagerank.py | 12 +- python/cugraph/dask/traversal/bfs.py | 72 ++++---- python/cugraph/dask/traversal/mg_bfs.pxd | 13 +- .../cugraph/dask/traversal/mg_bfs_wrapper.pyx | 92 ++++++---- python/cugraph/dask/traversal/mg_sssp.pxd | 28 +++ .../dask/traversal/mg_sssp_wrapper.pyx | 115 ++++++++++++ python/cugraph/dask/traversal/sssp.py | 120 +++++++++++++ python/cugraph/tests/dask/test_mg_bfs.py | 3 +- python/cugraph/tests/dask/test_mg_sssp.py | 86 +++++++++ python/cugraph/traversal/bfs.pxd | 14 +- python/cugraph/traversal/bfs_wrapper.pyx | 36 ++-- python/cugraph/traversal/sssp.pxd | 14 +- python/cugraph/traversal/sssp_wrapper.pyx | 68 +++---- 23 files changed, 758 insertions(+), 171 deletions(-) create mode 100644 python/cugraph/dask/traversal/mg_sssp.pxd create mode 100644 python/cugraph/dask/traversal/mg_sssp_wrapper.pyx create mode 100644 python/cugraph/dask/traversal/sssp.py create mode 100644 python/cugraph/tests/dask/test_mg_sssp.py diff --git a/CHANGELOG.md b/CHANGELOG.md index 9568be67594..b175568bf60 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ - PR #1163 Integrated 2D shuffling and Louvain updates - PR #1178 Refactored cython graph factory code to scale to additional data types - PR #1175 Integrated 2D pagerank python/cython infra +- PR #1177 Integrated 2D bfs and sssp python/cython infra ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree @@ -49,6 +50,7 @@ - PR #1196 Move subcomms init outside of individual algorithm functions - PR #1198 Remove deprecated call to from_gpu_matrix + # cuGraph 0.15.0 (26 Aug 2020) ## New Features diff --git a/cpp/include/algorithms.hpp b/cpp/include/algorithms.hpp index f4b9868040b..3b1bdde5472 100644 --- a/cpp/include/algorithms.hpp +++ b/cpp/include/algorithms.hpp @@ -965,7 +965,7 @@ namespace experimental { * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template -void bfs(raft::handle_t &handle, +void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, vertex_t *distances, vertex_t *predecessors, @@ -998,7 +998,7 @@ void bfs(raft::handle_t &handle, * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). */ template -void sssp(raft::handle_t &handle, +void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, weight_t *distances, vertex_t *predecessors, diff --git a/cpp/include/utilities/cython.hpp b/cpp/include/utilities/cython.hpp index 36e0369c1c6..8dcdfaf31cf 100644 --- a/cpp/include/utilities/cython.hpp +++ b/cpp/include/utilities/cython.hpp @@ -213,6 +213,26 @@ void call_pagerank(raft::handle_t const& handle, int64_t max_iter, bool has_guess); +// Wrapper for calling BFS through a graph container +template +void call_bfs(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* identifiers, + vertex_t* distances, + vertex_t* predecessors, + double* sp_counters, + const vertex_t start_vertex, + bool directed); + +// Wrapper for calling SSSP through a graph container +template +void call_sssp(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* identifiers, + weight_t* distances, + vertex_t* predecessors, + const vertex_t source_vertex); + // Helper for setting up subcommunicators, typically called as part of the // user-initiated comms initialization in Python. // diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index d9d7cb1a245..940ff30de07 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -41,7 +41,7 @@ namespace experimental { namespace detail { template -void bfs(raft::handle_t &handle, +void bfs(raft::handle_t const &handle, GraphViewType const &push_graph_view, typename GraphViewType::vertex_type *distances, PredecessorIterator predecessor_first, @@ -164,7 +164,7 @@ void bfs(raft::handle_t &handle, } // namespace detail template -void bfs(raft::handle_t &handle, +void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, vertex_t *distances, vertex_t *predecessors, @@ -196,7 +196,7 @@ void bfs(raft::handle_t &handle, // explicit instantiation -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, @@ -205,7 +205,7 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, @@ -214,7 +214,7 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, @@ -223,7 +223,7 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, @@ -232,7 +232,7 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int64_t *distances, int64_t *predecessors, @@ -241,7 +241,7 @@ template void bfs(raft::handle_t &handle, int64_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int64_t *distances, int64_t *predecessors, @@ -250,7 +250,7 @@ template void bfs(raft::handle_t &handle, int64_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, @@ -259,7 +259,7 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, @@ -268,7 +268,7 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, @@ -277,7 +277,7 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int32_t *distances, int32_t *predecessors, @@ -286,7 +286,7 @@ template void bfs(raft::handle_t &handle, int32_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int64_t *distances, int64_t *predecessors, @@ -295,7 +295,7 @@ template void bfs(raft::handle_t &handle, int64_t depth_limit, bool do_expensive_check); -template void bfs(raft::handle_t &handle, +template void bfs(raft::handle_t const &handle, graph_view_t const &graph_view, int64_t *distances, int64_t *predecessors, diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index e0679ad0d56..b1bc2968c71 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -42,7 +42,7 @@ namespace experimental { namespace detail { template -void sssp(raft::handle_t &handle, +void sssp(raft::handle_t const &handle, GraphViewType const &push_graph_view, typename GraphViewType::weight_type *distances, PredecessorIterator predecessor_first, @@ -241,7 +241,7 @@ void sssp(raft::handle_t &handle, } // namespace detail template -void sssp(raft::handle_t &handle, +void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, weight_t *distances, vertex_t *predecessors, @@ -265,7 +265,7 @@ void sssp(raft::handle_t &handle, // explicit instantiation -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, float *distances, int32_t *predecessors, @@ -273,7 +273,7 @@ template void sssp(raft::handle_t &handle, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, double *distances, int32_t *predecessors, @@ -281,7 +281,7 @@ template void sssp(raft::handle_t &handle, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, float *distances, int32_t *predecessors, @@ -289,7 +289,7 @@ template void sssp(raft::handle_t &handle, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, double *distances, int32_t *predecessors, @@ -297,7 +297,7 @@ template void sssp(raft::handle_t &handle, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, float *distances, int64_t *predecessors, @@ -305,7 +305,7 @@ template void sssp(raft::handle_t &handle, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, double *distances, int64_t *predecessors, @@ -313,7 +313,7 @@ template void sssp(raft::handle_t &handle, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, float *distances, int32_t *predecessors, @@ -321,7 +321,7 @@ template void sssp(raft::handle_t &handle, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, double *distances, int32_t *predecessors, @@ -329,7 +329,7 @@ template void sssp(raft::handle_t &handle, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, float *distances, int32_t *predecessors, @@ -337,7 +337,7 @@ template void sssp(raft::handle_t &handle, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, double *distances, int32_t *predecessors, @@ -345,7 +345,7 @@ template void sssp(raft::handle_t &handle, double cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, float *distances, int64_t *predecessors, @@ -353,7 +353,7 @@ template void sssp(raft::handle_t &handle, float cutoff, bool do_expensive_check); -template void sssp(raft::handle_t &handle, +template void sssp(raft::handle_t const &handle, graph_view_t const &graph_view, double *distances, int64_t *predecessors, diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 78b59fbead8..9705f229548 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -543,6 +543,108 @@ void call_pagerank(raft::handle_t const& handle, } } +// Wrapper for calling BFS through a graph container +template +void call_bfs(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* identifiers, + vertex_t* distances, + vertex_t* predecessors, + double* sp_counters, + const vertex_t start_vertex, + bool directed) +{ + if (graph_container.graph_type == graphTypeEnum::GraphCSRViewFloat) { + graph_container.graph_ptr_union.GraphCSRViewFloatPtr->get_vertex_identifiers( + reinterpret_cast(identifiers)); + bfs(handle, + *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + sp_counters, + static_cast(start_vertex), + directed); + } else if (graph_container.graph_type == graphTypeEnum::GraphCSRViewDouble) { + graph_container.graph_ptr_union.GraphCSRViewDoublePtr->get_vertex_identifiers( + reinterpret_cast(identifiers)); + bfs(handle, + *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + sp_counters, + static_cast(start_vertex), + directed); + } else if (graph_container.graph_type == graphTypeEnum::graph_t) { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::bfs(handle, + graph->view(), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + static_cast(start_vertex)); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::bfs(handle, + graph->view(), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + static_cast(start_vertex)); + } else { + CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + } + } +} + +// Wrapper for calling SSSP through a graph container +template +void call_sssp(raft::handle_t const& handle, + graph_container_t const& graph_container, + vertex_t* identifiers, + weight_t* distances, + vertex_t* predecessors, + const vertex_t source_vertex) +{ + if (graph_container.graph_type == graphTypeEnum::GraphCSRViewFloat) { + graph_container.graph_ptr_union.GraphCSRViewFloatPtr->get_vertex_identifiers( + reinterpret_cast(identifiers)); + sssp( // handle, TODO: clarify: no raft_handle_t? why? + *(graph_container.graph_ptr_union.GraphCSRViewFloatPtr), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + static_cast(source_vertex)); + } else if (graph_container.graph_type == graphTypeEnum::GraphCSRViewDouble) { + graph_container.graph_ptr_union.GraphCSRViewDoublePtr->get_vertex_identifiers( + reinterpret_cast(identifiers)); + sssp( // handle, TODO: clarify: no raft_handle_t? why? + *(graph_container.graph_ptr_union.GraphCSRViewDoublePtr), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + static_cast(source_vertex)); + } else if (graph_container.graph_type == graphTypeEnum::graph_t) { + if (graph_container.edgeType == numberTypeEnum::int32Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::sssp(handle, + graph->view(), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + static_cast(source_vertex)); + } else if (graph_container.edgeType == numberTypeEnum::int64Type) { + auto graph = + detail::create_graph(handle, graph_container); + cugraph::experimental::sssp(handle, + graph->view(), + reinterpret_cast(distances), + reinterpret_cast(predecessors), + static_cast(source_vertex)); + } else { + CUGRAPH_FAIL("vertexType/edgeType combination unsupported"); + } + } +} + // Explicit instantiations template std::pair call_louvain(raft::handle_t const& handle, graph_container_t const& graph_container, @@ -606,6 +708,70 @@ template void call_pagerank(raft::handle_t const& handle, int64_t max_iter, bool has_guess); +template void call_bfs(raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* identifiers, + int32_t* distances, + int32_t* predecessors, + double* sp_counters, + const int32_t start_vertex, + bool directed); + +template void call_bfs(raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* identifiers, + int32_t* distances, + int32_t* predecessors, + double* sp_counters, + const int32_t start_vertex, + bool directed); + +template void call_bfs(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* identifiers, + int64_t* distances, + int64_t* predecessors, + double* sp_counters, + const int64_t start_vertex, + bool directed); + +template void call_bfs(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* identifiers, + int64_t* distances, + int64_t* predecessors, + double* sp_counters, + const int64_t start_vertex, + bool directed); + +template void call_sssp(raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* identifiers, + float* distances, + int32_t* predecessors, + const int32_t source_vertex); + +template void call_sssp(raft::handle_t const& handle, + graph_container_t const& graph_container, + int32_t* identifiers, + double* distances, + int32_t* predecessors, + const int32_t source_vertex); + +template void call_sssp(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* identifiers, + float* distances, + int64_t* predecessors, + const int64_t source_vertex); + +template void call_sssp(raft::handle_t const& handle, + graph_container_t const& graph_container, + int64_t* identifiers, + double* distances, + int64_t* predecessors, + const int64_t source_vertex); + // Helper for setting up subcommunicators void init_subcomms(raft::handle_t& handle, size_t row_comm_size) { diff --git a/python/cugraph/dask/__init__.py b/python/cugraph/dask/__init__.py index e62a8bfcdb4..a79bee7c026 100644 --- a/python/cugraph/dask/__init__.py +++ b/python/cugraph/dask/__init__.py @@ -13,5 +13,6 @@ from .link_analysis.pagerank import pagerank from .traversal.bfs import bfs +from .traversal.sssp import sssp from .common.read_utils import get_chunksize from .community.louvain import louvain diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index 59ec0f67733..a1a1e629732 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -78,7 +78,8 @@ def louvain(input_df, # data is on device, move to host (.values_host) since graph_t in # graph_container needs a host array - cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets.values_host.__array_interface__['data'][0] + vertex_partition_offsets_host = vertex_partition_offsets.values_host + cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] cdef graph_container_t graph_container diff --git a/python/cugraph/dask/link_analysis/mg_pagerank.pxd b/python/cugraph/dask/link_analysis/mg_pagerank.pxd index 351b3d20d50..91104d9127c 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank.pxd +++ b/python/cugraph/dask/link_analysis/mg_pagerank.pxd @@ -31,4 +31,4 @@ cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": double alpha, double tolerance, long long max_iter, - bool has_guess) except + + bool has_guess) except + \ No newline at end of file diff --git a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx index 8fecbb9ab87..d459b93e7c4 100644 --- a/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx +++ b/python/cugraph/dask/link_analysis/mg_pagerank_wrapper.pyx @@ -67,7 +67,7 @@ def mg_pagerank(input_df, cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] cdef uintptr_t c_edge_weights = NULL - + # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C vertex_partition_offsets_host = vertex_partition_offsets.values_host cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index 143bb37dd22..0ea09969350 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -73,7 +73,7 @@ def pagerank(input_graph, Alpha should be greater than 0.0 and strictly lower than 1.0. personalization : cudf.Dataframe GPU Dataframe containing the personalization information. - + Currently not supported. personalization['vertex'] : cudf.Series Subset of vertices of graph for personalization personalization['values'] : cudf.Series @@ -99,13 +99,13 @@ def pagerank(input_graph, Returns ------- - PageRank : cudf.DataFrame - GPU data frame containing two cudf.Series of size V: the vertex - identifiers and the corresponding PageRank values. + PageRank : dask_cudf.DataFrame + GPU data frame containing two dask_cudf.Series of size V: the + vertex identifiers and the corresponding PageRank values. - df['vertex'] : cudf.Series + ddf['vertex'] : cudf.Series Contains the vertex identifiers - df['pagerank'] : cudf.Series + ddf['pagerank'] : cudf.Series Contains the PageRank score Examples diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 8baf15e079b..88eba53de55 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -14,29 +14,36 @@ # from dask.distributed import wait, default_client -from cugraph.dask.common.input_utils import get_local_data +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.structure.shuffle import shuffle from cugraph.dask.traversal import mg_bfs_wrapper as mg_bfs import cugraph.comms.comms as Comms import cudf +import dask_cudf -def call_bfs(sID, data, local_data, start, num_verts, return_distances): +def call_bfs(sID, + data, + num_verts, + num_edges, + vertex_partition_offsets, + start, + return_distances): wid = Comms.get_worker_id(sID) handle = Comms.get_handle(sID) return mg_bfs.mg_bfs(data[0], - local_data, + num_verts, + num_edges, + vertex_partition_offsets, wid, handle, start, - num_verts, return_distances) def bfs(graph, start, - return_distances=False, - load_balance=True): - + return_distances=False): """ Find the distances and predecessors for a breadth first traversal of a graph. @@ -54,10 +61,6 @@ def bfs(graph, iterates over edges in the component reachable from this node. return_distances : bool, optional, default=False Indicates if distances should be returned - load_balance : bool, optional, default=True - Set as True to perform load_balancing after global sorting of - dask-cudf DataFrame. This ensures that the data is uniformly - distributed among multiple GPUs to avoid over-loading. Returns ------- @@ -87,35 +90,36 @@ def bfs(graph, client = default_client() - if(graph.local_data is not None and - graph.local_data['by'] == 'src'): - data = graph.local_data['data'] - else: - data = get_local_data(graph, by='src', load_balance=load_balance) + graph.compute_renumber_edge_list(transposed=False) + (ddf, + num_verts, + partition_row_size, + partition_col_size, + vertex_partition_offsets) = shuffle(graph, transposed=False) + num_edges = len(ddf) + data = get_distributed_data(ddf) if graph.renumbered: start = graph.lookup_internal_vertex_id(cudf.Series([start], dtype='int32')).compute() start = start.iloc[0] - result = dict([(data.worker_info[wf[0]]["rank"], - client.submit( - call_bfs, - Comms.get_session_id(), - wf[1], - data.local_data, - start, - data.max_vertex_id+1, - return_distances, - workers=[wf[0]])) - for idx, wf in enumerate(data.worker_to_parts.items())]) + result = [client.submit( + call_bfs, + Comms.get_session_id(), + wf[1], + num_verts, + num_edges, + vertex_partition_offsets, + start, + return_distances, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] wait(result) - - df = result[0].result() + ddf = dask_cudf.from_delayed(result) if graph.renumbered: - df = graph.unrenumber(df, 'vertex').compute() - df = graph.unrenumber(df, 'predecessor').compute() - df["predecessor"].fillna(-1, inplace=True) - - return df + ddf = graph.unrenumber(ddf, 'vertex') + ddf = graph.unrenumber(ddf, 'predecessor') + ddf["predecessor"] = ddf["predecessor"].fillna(-1) + return ddf diff --git a/python/cugraph/dask/traversal/mg_bfs.pxd b/python/cugraph/dask/traversal/mg_bfs.pxd index 68010e2b816..82c6e97d668 100644 --- a/python/cugraph/dask/traversal/mg_bfs.pxd +++ b/python/cugraph/dask/traversal/mg_bfs.pxd @@ -18,13 +18,14 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef void bfs[VT,ET,WT]( + cdef void call_bfs[vertex_t, weight_t]( const handle_t &handle, - const GraphCSRView[VT,ET,WT] &graph, - VT *distances, - VT *predecessors, + const graph_container_t &g, + vertex_t *identifiers, + vertex_t *distances, + vertex_t *predecessors, double *sp_counters, - const VT start_vertex, + const vertex_t start_vertex, bool directed) except + diff --git a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx index 4c13aeb1286..c92f28eb407 100644 --- a/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx +++ b/python/cugraph/dask/traversal/mg_bfs_wrapper.pyx @@ -21,7 +21,14 @@ from cugraph.structure.graph_primtypes cimport * import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper from libc.stdint cimport uintptr_t -def mg_bfs(input_df, local_data, rank, handle, start, result_len, return_distances=False): +def mg_bfs(input_df, + num_global_verts, + num_global_edges, + vertex_partition_offsets, + rank, + handle, + start, + return_distances=False): """ Call pagerank """ @@ -32,59 +39,70 @@ def mg_bfs(input_df, local_data, rank, handle, start, result_len, return_distanc # Local COO information src = input_df['src'] dst = input_df['dst'] - num_verts = local_data['verts'].sum() - num_edges = local_data['edges'].sum() - local_offset = local_data['offsets'][rank] - src = src - local_offset - num_local_verts = local_data['verts'][rank] - num_local_edges = len(src) + vertex_t = src.dtype + if num_global_edges > (2**31 - 1): + edge_t = np.dtype("int64") + else: + edge_t = np.dtype("int32") + if "value" in input_df.columns: + weights = input_df['value'] + weight_t = weights.dtype + else: + weight_t = np.dtype("float32") - # Convert to local CSR - [src, dst] = graph_primtypes_wrapper.datatype_cast([src, dst], [np.int32]) - _offsets, indices, weights = coo2csr(src, dst, None) - offsets = _offsets[:num_local_verts + 1] - del _offsets + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} - # Pointers required for CSR Graph - cdef uintptr_t c_offsets_ptr = offsets.__cuda_array_interface__['data'][0] - cdef uintptr_t c_indices_ptr = indices.__cuda_array_interface__['data'][0] + # FIXME: needs to be edge_t type not int + cdef int num_partition_edges = len(src) + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + + # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C + vertex_partition_offsets_host = vertex_partition_offsets.values_host + cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + + cdef graph_container_t graph_container + + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + c_vertex_partition_offsets, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_partition_edges, + num_global_verts, num_global_edges, + True, + False, True) # Generate the cudf.DataFrame result df = cudf.DataFrame() - df['vertex'] = cudf.Series(range(0, result_len), dtype=np.int32) - df['predecessor'] = cudf.Series(np.zeros(result_len, dtype=np.int32)) + df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t) + df['predecessor'] = cudf.Series(np.zeros(len(df['vertex']), dtype=np.int32)) if (return_distances): - df['distance'] = cudf.Series(np.zeros(result_len, dtype=np.int32)) + df['distance'] = cudf.Series(np.zeros(len(df['vertex']), dtype=np.int32)) # Associate to cudf Series cdef uintptr_t c_distance_ptr = NULL # Pointer to the DataFrame 'distance' Series - cdef uintptr_t c_predecessor_ptr = df['predecessor'].__cuda_array_interface__['data'][0]; + cdef uintptr_t c_predecessor_ptr = df['predecessor'].__cuda_array_interface__['data'][0] if (return_distances): c_distance_ptr = df['distance'].__cuda_array_interface__['data'][0] - # Extract local data - cdef uintptr_t c_local_verts = local_data['verts'].__array_interface__['data'][0] - cdef uintptr_t c_local_edges = local_data['edges'].__array_interface__['data'][0] - cdef uintptr_t c_local_offsets = local_data['offsets'].__array_interface__['data'][0] - - # BFS - cdef GraphCSRView[int,int,float] graph - graph= GraphCSRView[int, int, float]( c_offsets_ptr, - c_indices_ptr, - NULL, - num_verts, - num_local_edges) - graph.set_local_data(c_local_verts, c_local_edges, c_local_offsets) - graph.set_handle(handle_) - cdef bool direction = 1 # MG BFS path assumes directed is true - c_bfs.bfs[int, int, float](handle_[0], - graph, + c_bfs.call_bfs[int, float](handle_[0], + graph_container, + NULL, c_distance_ptr, c_predecessor_ptr, NULL, start, direction) - return df diff --git a/python/cugraph/dask/traversal/mg_sssp.pxd b/python/cugraph/dask/traversal/mg_sssp.pxd new file mode 100644 index 00000000000..f846facd269 --- /dev/null +++ b/python/cugraph/dask/traversal/mg_sssp.pxd @@ -0,0 +1,28 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +from cugraph.structure.graph_primtypes cimport * +from libcpp cimport bool + + +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + + cdef void call_sssp[vertex_t, weight_t]( + const handle_t &handle, + const graph_container_t &g, + vertex_t *identifiers, + weight_t *distances, + vertex_t *predecessors, + const vertex_t start_vertex) diff --git a/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx new file mode 100644 index 00000000000..b7aec103098 --- /dev/null +++ b/python/cugraph/dask/traversal/mg_sssp_wrapper.pyx @@ -0,0 +1,115 @@ +# +# Copyright (c) 2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from cugraph.structure.utils_wrapper import * +from cugraph.dask.traversal cimport mg_sssp as c_sssp +import cudf +from cugraph.structure.graph_primtypes cimport * +import cugraph.structure.graph_primtypes_wrapper as graph_primtypes_wrapper +from libc.stdint cimport uintptr_t + +def mg_sssp(input_df, + num_global_verts, + num_global_edges, + vertex_partition_offsets, + rank, + handle, + start): + """ + Call sssp + """ + + cdef size_t handle_size_t = handle.getHandle() + handle_ = handle_size_t + + # Local COO information + src = input_df['src'] + dst = input_df['dst'] + vertex_t = src.dtype + if num_global_edges > (2**31 - 1): + edge_t = np.dtype("int64") + else: + edge_t = np.dtype("int32") + if "value" in input_df.columns: + weights = input_df['value'] + weight_t = weights.dtype + else: + weights = None + weight_t = np.dtype("float32") + + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} + + # FIXME: needs to be edge_t type not int + cdef int num_partition_edges = len(src) + + cdef uintptr_t c_src_vertices = src.__cuda_array_interface__['data'][0] + cdef uintptr_t c_dst_vertices = dst.__cuda_array_interface__['data'][0] + cdef uintptr_t c_edge_weights = NULL + if weights is not None: + c_edge_weights = weights.__cuda_array_interface__['data'][0] + + # FIXME: data is on device, move to host (to_pandas()), convert to np array and access pointer to pass to C + vertex_partition_offsets_host = vertex_partition_offsets.values_host + cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + + cdef graph_container_t graph_container + + populate_graph_container(graph_container, + handle_[0], + c_src_vertices, c_dst_vertices, c_edge_weights, + c_vertex_partition_offsets, + ((numberTypeMap[vertex_t])), + ((numberTypeMap[edge_t])), + ((numberTypeMap[weight_t])), + num_partition_edges, + num_global_verts, num_global_edges, + True, + False, True) + + # Generate the cudf.DataFrame result + df = cudf.DataFrame() + df['vertex'] = cudf.Series(np.arange(vertex_partition_offsets.iloc[rank], vertex_partition_offsets.iloc[rank+1]), dtype=vertex_t) + df['predecessor'] = cudf.Series(np.zeros(len(df['vertex']), dtype=vertex_t)) + df['distance'] = cudf.Series(np.zeros(len(df['vertex']), dtype=weight_t)) + + # Associate to cudf Series + cdef uintptr_t c_predecessor_ptr = df['predecessor'].__cuda_array_interface__['data'][0] + cdef uintptr_t c_distance_ptr = df['distance'].__cuda_array_interface__['data'][0] + + # MG BFS path assumes directed is true + if weight_t == np.float32: + c_sssp.call_sssp[int, float](handle_[0], + graph_container, + NULL, + c_distance_ptr, + c_predecessor_ptr, + start) + elif weight_t == np.float64: + c_sssp.call_sssp[int, double](handle_[0], + graph_container, + NULL, + c_distance_ptr, + c_predecessor_ptr, + start) + else: # This case should not happen + raise NotImplementedError + + return df diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py new file mode 100644 index 00000000000..9554e10f4d6 --- /dev/null +++ b/python/cugraph/dask/traversal/sssp.py @@ -0,0 +1,120 @@ +# Copyright (c) 2019-2020, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from dask.distributed import wait, default_client +from cugraph.dask.common.input_utils import get_distributed_data +from cugraph.structure.shuffle import shuffle +from cugraph.dask.traversal import mg_sssp_wrapper as mg_sssp +import cugraph.comms.comms as Comms +import cudf +import dask_cudf + + +def call_sssp(sID, + data, + num_verts, + num_edges, + vertex_partition_offsets, + start): + wid = Comms.get_worker_id(sID) + handle = Comms.get_handle(sID) + return mg_sssp.mg_sssp(data[0], + num_verts, + num_edges, + vertex_partition_offsets, + wid, + handle, + start) + + +def sssp(graph, + source): + + """ + Find the distances and predecessors for a breadth first traversal of a + graph. + The input graph must contain edge list as dask-cudf dataframe with + one partition per GPU. + + Parameters + ---------- + graph : cugraph.DiGraph + cuGraph graph descriptor, should contain the connectivity information + as dask cudf edge list dataframe(edge weights are not used for this + algorithm). Undirected Graph not currently supported. + source : Integer + Specify source vertex + + Returns + ------- + df : cudf.DataFrame + df['vertex'][i] gives the vertex id of the i'th vertex + + df['distance'][i] gives the path distance for the i'th vertex from the + starting vertex (Only if return_distances is True) + + df['predecessor'][i] gives for the i'th vertex the vertex it was + reached from in the traversal + + Examples + -------- + >>> import cugraph.dask as dcg + >>> Comms.initialize() + >>> chunksize = dcg.get_chunksize(input_data_path) + >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, + delimiter=' ', + names=['src', 'dst', 'value'], + dtype=['int32', 'int32', 'float32']) + >>> dg = cugraph.DiGraph() + >>> dg.from_dask_cudf_edgelist(ddf) + >>> df = dcg.sssp(dg, 0) + >>> Comms.destroy() + """ + + client = default_client() + + graph.compute_renumber_edge_list(transposed=False) + (ddf, + num_verts, + partition_row_size, + partition_col_size, + vertex_partition_offsets) = shuffle(graph, transposed=False) + num_edges = len(ddf) + data = get_distributed_data(ddf) + + if graph.renumbered: + source = graph.lookup_internal_vertex_id(cudf.Series([source], + dtype='int32')).compute() + source = source.iloc[0] + + result = [client.submit( + call_sssp, + Comms.get_session_id(), + wf[1], + num_verts, + num_edges, + vertex_partition_offsets, + source, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] + wait(result) + ddf = dask_cudf.from_delayed(result) + + if graph.renumbered: + ddf = graph.unrenumber(ddf, 'vertex') + ddf = graph.unrenumber(ddf, 'predecessor') + ddf["predecessor"] = ddf["predecessor"].fillna(-1) + + return ddf diff --git a/python/cugraph/tests/dask/test_mg_bfs.py b/python/cugraph/tests/dask/test_mg_bfs.py index 94bed827fd0..553bbc698ff 100644 --- a/python/cugraph/tests/dask/test_mg_bfs.py +++ b/python/cugraph/tests/dask/test_mg_bfs.py @@ -27,7 +27,7 @@ def client_connection(): cluster = LocalCUDACluster() client = Client(cluster) - Comms.initialize() + Comms.initialize(p2p=True) yield client @@ -68,6 +68,7 @@ def test_dask_bfs(client_connection): expected_dist = cugraph.bfs(g, 0) result_dist = dcg.bfs(dg, 0, True) + result_dist = result_dist.compute() compare_dist = expected_dist.merge( result_dist, on="vertex", suffixes=["_local", "_dask"] diff --git a/python/cugraph/tests/dask/test_mg_sssp.py b/python/cugraph/tests/dask/test_mg_sssp.py new file mode 100644 index 00000000000..ac4a60f1bdc --- /dev/null +++ b/python/cugraph/tests/dask/test_mg_sssp.py @@ -0,0 +1,86 @@ +# Copyright (c) 2020, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import cugraph.dask as dcg +import cugraph.comms as Comms +from dask.distributed import Client +import gc +import pytest +import cugraph +import dask_cudf +import cudf +from dask_cuda import LocalCUDACluster +from cugraph.dask.common.mg_utils import is_single_gpu + + +@pytest.fixture +def client_connection(): + cluster = LocalCUDACluster() + client = Client(cluster) + Comms.initialize(p2p=True) + + yield client + + Comms.destroy() + client.close() + cluster.close() + + +@pytest.mark.skipif( + is_single_gpu(), reason="skipping MG testing on Single GPU system" +) +def test_dask_sssp(client_connection): + gc.collect() + + input_data_path = r"../datasets/netscience.csv" + chunksize = dcg.get_chunksize(input_data_path) + + ddf = dask_cudf.read_csv( + input_data_path, + chunksize=chunksize, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + df = cudf.read_csv( + input_data_path, + delimiter=" ", + names=["src", "dst", "value"], + dtype=["int32", "int32", "float32"], + ) + + g = cugraph.DiGraph() + g.from_cudf_edgelist(df, "src", "dst", "value", renumber=True) + + dg = cugraph.DiGraph() + dg.from_dask_cudf_edgelist(ddf, "src", "dst", "value") + + expected_dist = cugraph.sssp(g, 0) + print(expected_dist) + result_dist = dcg.sssp(dg, 0) + result_dist = result_dist.compute() + + compare_dist = expected_dist.merge( + result_dist, on="vertex", suffixes=["_local", "_dask"] + ) + + err = 0 + + for i in range(len(compare_dist)): + if ( + compare_dist["distance_local"].iloc[i] + != compare_dist["distance_dask"].iloc[i] + ): + err = err + 1 + assert err == 0 diff --git a/python/cugraph/traversal/bfs.pxd b/python/cugraph/traversal/bfs.pxd index 0502754c161..5b73d23045c 100644 --- a/python/cugraph/traversal/bfs.pxd +++ b/python/cugraph/traversal/bfs.pxd @@ -20,13 +20,13 @@ from cugraph.structure.graph_primtypes cimport * from libcpp cimport bool -cdef extern from "algorithms.hpp" namespace "cugraph": - - cdef void bfs[VT,ET,WT]( +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": + cdef void call_bfs[vertex_t, weight_t]( const handle_t &handle, - const GraphCSRView[VT,ET,WT] &graph, - VT *distances, - VT *predecessors, + const graph_container_t &g, + vertex_t *identifiers, + vertex_t *distances, + vertex_t *predecessors, double *sp_counters, - const VT start_vertex, + const vertex_t start_vertex, bool directed) except + diff --git a/python/cugraph/traversal/bfs_wrapper.pyx b/python/cugraph/traversal/bfs_wrapper.pyx index c13e1eb58ee..ae346aea953 100644 --- a/python/cugraph/traversal/bfs_wrapper.pyx +++ b/python/cugraph/traversal/bfs_wrapper.pyx @@ -33,12 +33,22 @@ def bfs(input_graph, start, directed=True, Call bfs """ # Step 1: Declare the different varibales - cdef GraphCSRView[int, int, float] graph_float # For weighted float graph (SSSP) and Unweighted (BFS) - cdef GraphCSRView[int, int, double] graph_double # For weighted double graph (SSSP) + cdef graph_container_t graph_container + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} # Pointers required for CSR Graph cdef uintptr_t c_offsets_ptr = NULL # Pointer to the CSR offsets cdef uintptr_t c_indices_ptr = NULL # Pointer to the CSR indices + cdef uintptr_t c_weights = NULL + cdef uintptr_t c_local_verts = NULL; + cdef uintptr_t c_local_edges = NULL; + cdef uintptr_t c_local_offsets = NULL; + weight_t = np.dtype("float32") # Pointers for SSSP / BFS cdef uintptr_t c_identifier_ptr = NULL # Pointer to the DataFrame 'vertex' Series @@ -52,6 +62,7 @@ def bfs(input_graph, start, directed=True, cdef unique_ptr[handle_t] handle_ptr handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); # Step 3: Extract CSR offsets, indices, weights are not expected # - offsets: int (signed, 32-bit) @@ -86,15 +97,20 @@ def bfs(input_graph, start, directed=True, # Step 8: Proceed to BFS # FIXME: [int, int, float] or may add an explicit [int, int, int] in graph.cu? - graph_float = GraphCSRView[int, int, float]( c_offsets_ptr, - c_indices_ptr, - NULL, - num_verts, - num_edges) - graph_float.get_vertex_identifiers( c_identifier_ptr) + populate_graph_container_legacy(graph_container, + ((graphTypeEnum.LegacyCSR)), + handle_[0], + c_offsets_ptr, c_indices_ptr, c_weights, + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), + num_verts, num_edges, + c_local_verts, c_local_edges, c_local_offsets) + # Different pathing wether shortest_path_counting is required or not - c_bfs.bfs[int, int, float](handle_ptr.get()[0], - graph_float, + c_bfs.call_bfs[int, float](handle_ptr.get()[0], + graph_container, + c_identifier_ptr, c_distance_ptr, c_predecessor_ptr, c_sp_counter_ptr, diff --git a/python/cugraph/traversal/sssp.pxd b/python/cugraph/traversal/sssp.pxd index 8f36ff12ae8..e4b709cb879 100644 --- a/python/cugraph/traversal/sssp.pxd +++ b/python/cugraph/traversal/sssp.pxd @@ -18,10 +18,12 @@ from cugraph.structure.graph_primtypes cimport * -cdef extern from "algorithms.hpp" namespace "cugraph": +cdef extern from "utilities/cython.hpp" namespace "cugraph::cython": - cdef void sssp[VT, ET, WT]( - const GraphCSRView[VT, ET, WT] &graph, - WT *distances, - VT *predecessors, - VT start_vertex) except + + cdef void call_sssp[vertex_t, weight_t]( + const handle_t &handle, + const graph_container_t &g, + vertex_t *identifiers, + weight_t *distances, + vertex_t *predecessors, + vertex_t start_vertex) except + diff --git a/python/cugraph/traversal/sssp_wrapper.pyx b/python/cugraph/traversal/sssp_wrapper.pyx index 1504eee53e1..730fe0db94e 100644 --- a/python/cugraph/traversal/sssp_wrapper.pyx +++ b/python/cugraph/traversal/sssp_wrapper.pyx @@ -34,13 +34,22 @@ def sssp(input_graph, source): Call sssp """ # Step 1: Declare the different variables - cdef GraphCSRView[int, int, float] graph_float # For weighted float graph (SSSP) and Unweighted (BFS) - cdef GraphCSRView[int, int, double] graph_double # For weighted double graph (SSSP) + cdef graph_container_t graph_container + # FIXME: Offsets and indices are currently hardcoded to int, but this may + # not be acceptable in the future. + numberTypeMap = {np.dtype("int32") : numberTypeEnum.int32Type, + np.dtype("int64") : numberTypeEnum.int64Type, + np.dtype("float32") : numberTypeEnum.floatType, + np.dtype("double") : numberTypeEnum.doubleType} # Pointers required for CSR Graph cdef uintptr_t c_offsets_ptr = NULL # Pointer to the CSR offsets cdef uintptr_t c_indices_ptr = NULL # Pointer to the CSR indices cdef uintptr_t c_weights_ptr = NULL # Pointer to the CSR weights + cdef uintptr_t c_local_verts = NULL; + cdef uintptr_t c_local_edges = NULL; + cdef uintptr_t c_local_offsets = NULL; + weight_t = np.dtype("int32") # Pointers for SSSP / BFS cdef uintptr_t c_identifier_ptr = NULL # Pointer to the DataFrame 'vertex' Series @@ -49,6 +58,7 @@ def sssp(input_graph, source): cdef unique_ptr[handle_t] handle_ptr handle_ptr.reset(new handle_t()) + handle_ = handle_ptr.get(); # Step 2: Verify that input_graph has the expected format # the SSSP implementation expects CSR format @@ -65,9 +75,8 @@ def sssp(input_graph, source): c_offsets_ptr = offsets.__cuda_array_interface__['data'][0] c_indices_ptr = indices.__cuda_array_interface__['data'][0] - data_type = np.int32 if weights is not None: - data_type = weights.dtype + weight_t = weights.dtype c_weights_ptr = weights.__cuda_array_interface__['data'][0] # Step 4: Setup number of vertices and number of edges @@ -83,7 +92,7 @@ def sssp(input_graph, source): df = cudf.DataFrame() df['vertex'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) - df['distance'] = cudf.Series(np.zeros(num_verts, dtype=data_type)) + df['distance'] = cudf.Series(np.zeros(num_verts, dtype=weight_t)) df['predecessor'] = cudf.Series(np.zeros(num_verts, dtype=np.int32)) # Step 7: Associate to cudf Series @@ -94,44 +103,41 @@ def sssp(input_graph, source): # Step 8: Dispatch to SSSP / BFS Based on weights # - weights is not None: SSSP float or SSSP double # - weights is None: BFS + populate_graph_container_legacy(graph_container, + ((graphTypeEnum.LegacyCSR)), + handle_[0], + c_offsets_ptr, c_indices_ptr, c_weights_ptr, + ((numberTypeEnum.int32Type)), + ((numberTypeEnum.int32Type)), + ((numberTypeMap[weight_t])), + num_verts, num_edges, + c_local_verts, c_local_edges, c_local_offsets) + if weights is not None: - if data_type == np.float32: - graph_float = GraphCSRView[int, int, float]( c_offsets_ptr, - c_indices_ptr, - c_weights_ptr, - num_verts, - num_edges) - graph_float.get_vertex_identifiers( c_identifier_ptr) - c_sssp.sssp[int, int, float](graph_float, + if weight_t == np.float32: + c_sssp.call_sssp[int, float](handle_[0], + graph_container, + c_identifier_ptr, c_distance_ptr, c_predecessor_ptr, source) - elif data_type == np.float64: - graph_double = GraphCSRView[int, int, double]( c_offsets_ptr, - c_indices_ptr, - c_weights_ptr, - num_verts, - num_edges) - graph_double.get_vertex_identifiers( c_identifier_ptr) - c_sssp.sssp[int, int, double](graph_double, + elif weight_t == np.float64: + c_sssp.call_sssp[int, double](handle_[0], + graph_container, + c_identifier_ptr, c_distance_ptr, c_predecessor_ptr, source) else: # This case should not happen raise NotImplementedError else: - # FIXME: Something might be done here considering WT = float - graph_float = GraphCSRView[int, int, float]( c_offsets_ptr, - c_indices_ptr, - NULL, - num_verts, - num_edges) - graph_float.get_vertex_identifiers( c_identifier_ptr) - c_bfs.bfs[int, int, float](handle_ptr.get()[0], - graph_float, + c_bfs.call_bfs[int, float](handle_[0], + graph_container, + c_identifier_ptr, c_distance_ptr, c_predecessor_ptr, NULL, - source) + source, + 1) return df From b6fbab819031fac2df19b704fc6a20f3b7f10e94 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 13 Oct 2020 10:27:55 -0400 Subject: [PATCH 09/16] [REVIEW] BUG Test MNMG pattern accelerator based PageRank, BFS, and SSSP (#1174) * add minimal update to create a PR * pagerank 2D cython/python infrastructure * 2D infra- bfs and sssp * add a work around for source (or destination) == self case for isend/irecv * fix a warning * remove dummy change log * sgpu pagerank edits * edits * add namespace * pull branch0.16 * update test * in copy_v_transform_reduce_in|out_nbr, implement missing communication alnog the minor direction * bug fix (assertion failure)\n * bug fix in copy_v_transform_reduce_in_out_nbr.cuh * clang-format * enforce consistency in variable naming related to subcommunicators * bug fix (graph construction) * bug fix (vertex_partition_segment_offsets) * review updates * clang * bug fix (caching comm_rank in partition_t object) * updatelocal_verts * bug fix (scale dangling_sum by damping factor) * remove transform_reduce_v_with_adj_matrix_row * replace device_vector with device_uvector in sssp * bfs updates to 2D infra * sssp 2D integration * sssp * flake8 * clang * add host_scalalr_bcast to comm_utils * remove unnecessary include * review changes * review changes * bug fix in update_frontier_v_push_if_out_nbr * bug fix in VertexFrontier declaration * add debug print for pagerank sum * remove dummy code * bug fix in assert * fix timing bug with isend/irecv * fix compile error * fix debug compile error * add missing cudaStreamSynchronize * guard raft::grid_1d_thread_t * compile error fix * SG bug fix (calling get_rank() on uninitialized comms) * BFS bug fix * fix a PageRank bug * pattern accelerator bug fix (found testing SSSP) * Update mg_pagerank_wrapper.pyx * review updates * bug fix in BFS communication * review updates * Revert "fix compile error" This reverts commit 900fd1143c6be38a4e974ff598627968eae20a07. * Revert "fix timing bug with isend/irecv" This reverts commit e0e696a580cfd2ef0bbe45dfd7e9845e139bee36. * Revert "bug fix in assert" This reverts commit 97b98ed4259a28afb050b1f6142ed91adae40264. * Revert "remove dummy code" This reverts commit facc70c50a0bde5ba06a5ddef830e23275ff5751. * Revert "add debug print for pagerank sum" This reverts commit c479b6df0855b70eb9340df761186ef85e247dcc. * Revert "bug fix in VertexFrontier declaration" This reverts commit 44e3e10d1da49fa5de3a54c31ff9f9d6bc3f1808. * Revert "bug fix in update_frontier_v_push_if_out_nbr" This reverts commit dd800014e2ce9985234e38db81c9b6276238873b. * Revert "remove unnecessary include" This reverts commit c55dbfb2af9a89ef289ffa6a0501c68b63f47900. * Revert "add host_scalalr_bcast to comm_utils" This reverts commit 6430ad55fef31749d340fc9daffe689966f8d83c. * Revert "replace device_vector with device_uvector in sssp" This reverts commit d6b2e5883f2a98f0e4ebc904ec4513bcb5f3aabe. * Revert "remove transform_reduce_v_with_adj_matrix_row" This reverts commit 21d4e104da02ef4d2609e2c05cd26471a40a6188. * Revert "bug fix (scale dangling_sum by damping factor)" This reverts commit 15818f74fe160c81e40987feb1162248d41e9c06. * Revert "bug fix (caching comm_rank in partition_t object)" This reverts commit bd2dd834f8df92944f64ff56fa698573fea9f416. * Revert "bug fix (vertex_partition_segment_offsets)" This reverts commit a006b9940b8d32d4e56cfc2b4a5746c14a51388c. * Revert "bug fix (graph construction)" This reverts commit 59fadefd0c7e6fd25c1332d5403b0c86e71defc2. * Revert "enforce consistency in variable naming related to subcommunicators" This reverts commit 790549f141c46dd463618504303f96f64e2ce712. * Revert "clang-format" This reverts commit 761f7aa81761d99832c6a9748679418e776ac495. * Revert "bug fix in copy_v_transform_reduce_in_out_nbr.cuh" This reverts commit f874f6517bfe76a317e51a681e9c33e7aa268004. * Revert "bug fix (assertion failure)\n" This reverts commit a33c2d10bcea579a12e298c0b5bb8b4917fd21e0. * Revert "in copy_v_transform_reduce_in|out_nbr, implement missing communication alnog the minor direction" This reverts commit 6e1b152630e1a5579d55a2f0948c0c010a5466a5. * Revert "fix a warning" This reverts commit 25607cad97cc6107586dfe0d9d30ad5ee2ca74b8. * Revert "add a work around for source (or destination) == self case for isend/irecv" This reverts commit 2be9e5f9a016d5884423b6e2b59e43ed646cde07. * revert * clang * update tests and predecessor * Update mg_pagerank_wrapper.pyx * fix the mess-up in merging with unmerged PRs * transitioning from UCX send/recv to NCCL send/recv * remove temporary code * replace UCX backend wiht NCCL backend for GPU memory P2P in update_frontier_v_push_if_out_nbr * bug fix for potential hang * remove debug prints * fix a new bug introduced in sssp bug fix * Update pagerank.py * Update mg_pagerank_wrapper.pyx * update doc * update edge weights * rename edge_attr * Add renaming of edge_attr * Update CMakeLists.txt * flake8 * Update graph.py * update graph.py to rename edge_attr * bug fix handling edge weights * update change log * fixed outdated comments * clang-format * remove debug statement * fix comments & add cosmetic updates * fix a simple mistake in cosmetic updates Co-authored-by: Ishika Roy Co-authored-by: Iroy30 <41401566+Iroy30@users.noreply.github.com> Co-authored-by: Alex Fender --- CHANGELOG.md | 1 + .../experimental/detail/graph_utils.cuh | 70 +-- cpp/include/experimental/graph_view.hpp | 6 +- .../patterns/copy_to_adj_matrix_row_col.cuh | 285 +++++++----- .../copy_v_transform_reduce_in_out_nbr.cuh | 406 +++++++++++------- cpp/include/patterns/count_if_e.cuh | 67 +-- cpp/include/patterns/transform_reduce_e.cuh | 73 ++-- .../update_frontier_v_push_if_out_nbr.cuh | 298 +++++++------ cpp/include/patterns/vertex_frontier.cuh | 36 +- cpp/include/utilities/comm_utils.cuh | 364 +++++++++++++++- cpp/src/experimental/bfs.cu | 10 +- cpp/src/experimental/graph.cu | 46 +- cpp/src/experimental/graph_view.cu | 20 +- cpp/src/experimental/katz_centrality.cu | 34 +- cpp/src/experimental/pagerank.cu | 142 +++--- cpp/src/experimental/sssp.cu | 29 +- cpp/src/utilities/cython.cu | 15 +- 17 files changed, 1253 insertions(+), 649 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b175568bf60..36bb67cf326 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ - PR #1192 Fix benchmark notes and documentation issues in graph.py - PR #1196 Move subcomms init outside of individual algorithm functions - PR #1198 Remove deprecated call to from_gpu_matrix +- PR #1174 Fix bugs in MNMG pattern accelerators and pattern accelerator based implementations of MNMG PageRank, BFS, and SSSP # cuGraph 0.15.0 (26 Aug 2020) diff --git a/cpp/include/experimental/detail/graph_utils.cuh b/cpp/include/experimental/detail/graph_utils.cuh index c94348329f7..bf56b2e6f80 100644 --- a/cpp/include/experimental/detail/graph_utils.cuh +++ b/cpp/include/experimental/detail/graph_utils.cuh @@ -51,27 +51,23 @@ rmm::device_uvector compute_major_degree( rmm::device_uvector degrees(0, handle.get_stream()); vertex_t max_num_local_degrees{0}; - for (int i = 0; i < col_comm_size; ++i) { - auto vertex_partition_idx = - partition.is_hypergraph_partitioned() - ? static_cast(row_comm_size) * static_cast(i) + - static_cast(row_comm_rank) - : static_cast(col_comm_size) * static_cast(row_comm_rank) + - static_cast(i); - vertex_t major_first{}; - vertex_t major_last{}; - std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx); - max_num_local_degrees = std::max(max_num_local_degrees, major_last - major_first); - if (i == col_comm_rank) { degrees.resize(major_last - major_first, handle.get_stream()); } + for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); + ++i) { + auto vertex_partition_idx = partition.is_hypergraph_partitioned() + ? static_cast(i * row_comm_size + row_comm_rank) + : static_cast(col_comm_rank * row_comm_size + i); + auto vertex_partition_size = partition.get_vertex_partition_size(vertex_partition_idx); + max_num_local_degrees = std::max(max_num_local_degrees, vertex_partition_size); + if (i == (partition.is_hypergraph_partitioned() ? col_comm_rank : row_comm_rank)) { + degrees.resize(vertex_partition_size, handle.get_stream()); + } } local_degrees.resize(max_num_local_degrees, handle.get_stream()); - for (int i = 0; i < col_comm_size; ++i) { - auto vertex_partition_idx = - partition.is_hypergraph_partitioned() - ? static_cast(row_comm_size) * static_cast(i) + - static_cast(row_comm_rank) - : static_cast(col_comm_size) * static_cast(row_comm_rank) + - static_cast(i); + for (int i = 0; i < (partition.is_hypergraph_partitioned() ? col_comm_size : row_comm_size); + ++i) { + auto vertex_partition_idx = partition.is_hypergraph_partitioned() + ? static_cast(i * row_comm_size + row_comm_rank) + : static_cast(col_comm_rank * row_comm_size + i); vertex_t major_first{}; vertex_t major_last{}; std::tie(major_first, major_last) = partition.get_vertex_partition_range(vertex_partition_idx); @@ -79,23 +75,39 @@ rmm::device_uvector compute_major_degree( partition.is_hypergraph_partitioned() ? adj_matrix_partition_offsets[i] : adj_matrix_partition_offsets[0] + - (major_first - partition.get_vertex_partition_first(col_comm_size * row_comm_rank)); + (major_first - partition.get_vertex_partition_first(col_comm_rank * row_comm_size)); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_last - major_first), local_degrees.data(), [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); - row_comm.reduce(local_degrees.data(), - i == col_comm_rank ? degrees.data() : static_cast(nullptr), - degrees.size(), - raft::comms::op_t::SUM, - col_comm_rank, - handle.get_stream()); + if (partition.is_hypergraph_partitioned()) { + col_comm.reduce(local_degrees.data(), + i == col_comm_rank ? degrees.data() : static_cast(nullptr), + static_cast(major_last - major_first), + raft::comms::op_t::SUM, + i, + handle.get_stream()); + } else { + row_comm.reduce(local_degrees.data(), + i == row_comm_rank ? degrees.data() : static_cast(nullptr), + static_cast(major_last - major_first), + raft::comms::op_t::SUM, + i, + handle.get_stream()); + } } - auto status = handle.get_comms().sync_stream( - handle.get_stream()); // this is neessary as local_degrees will become out-of-scope once this - // function returns. + raft::comms::status_t status{}; + if (partition.is_hypergraph_partitioned()) { + status = + col_comm.sync_stream(handle.get_stream()); // this is neessary as local_degrees will become + // out-of-scope once this function returns. + } else { + status = + row_comm.sync_stream(handle.get_stream()); // this is neessary as local_degrees will become + // out-of-scope once this function returns. + } CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); return degrees; diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index c655b1451ca..93fb44e7faf 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -90,7 +90,7 @@ class partition_t { int col_comm_rank) : vertex_partition_offsets_(vertex_partition_offsets), hypergraph_partitioned_(hypergraph_partitioned), - comm_rank_(col_comm_size * row_comm_rank + col_comm_rank), + comm_rank_(col_comm_rank * row_comm_size + row_comm_rank), row_comm_size_(row_comm_size), col_comm_size_(col_comm_size), row_comm_rank_(row_comm_rank), @@ -402,7 +402,7 @@ class graph_view_t #include #include +#include #include #include @@ -75,8 +76,8 @@ void copy_to_matrix_major(raft::handle_t const& handle, } } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed - ? graph_view.get_number_of_adj_matrix_local_cols() - : graph_view.get_number_of_adj_matrix_local_rows()); + ? graph_view.get_number_of_local_adj_matrix_partition_cols() + : graph_view.get_number_of_local_adj_matrix_partition_rows()); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertex_value_input_first, vertex_value_input_first + graph_view.get_number_of_local_vertices(), @@ -114,24 +115,28 @@ void copy_to_matrix_major(raft::handle_t const& handle, host_scalar_allgather(row_comm, static_cast(thrust::distance(vertex_first, vertex_last)), handle.get_stream()); - std::vector displacements(row_comm_size, size_t{0}); - std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, displacements.begin() + 1); matrix_partition_device_t matrix_partition(graph_view, 0); for (int i = 0; i < row_comm_size; ++i) { - rmm::device_uvector rx_vertices(rx_counts[i], handle.get_stream()); + rmm::device_uvector rx_vertices(row_comm_rank == i ? size_t{0} : rx_counts[i], + handle.get_stream()); auto rx_tmp_buffer = allocate_comm_buffer::value_type>( rx_counts[i], handle.get_stream()); auto rx_value_first = get_comm_buffer_begin< typename std::iterator_traits::value_type>(rx_tmp_buffer); - if (i == row_comm_rank) { + if (row_comm_rank == i) { + vertex_partition_device_t vertex_partition(graph_view); + auto map_first = + thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); + }); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_first, - vertex_last, + map_first, + map_first + thrust::distance(vertex_first, vertex_last), vertex_value_input_first, rx_value_first); } @@ -143,23 +148,43 @@ void copy_to_matrix_major(raft::handle_t const& handle, device_bcast( row_comm, rx_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); - auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [matrix_partition] __device__(auto v) { - return matrix_partition.get_major_offset_from_major_nocheck(v); - }); - // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and - // directly scatters from the internal buffer) - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_value_first, - rx_value_first + rx_counts[i], - map_first, - matrix_major_value_output_first); + if (row_comm_rank == i) { + auto map_first = + thrust::make_transform_iterator(vertex_first, [matrix_partition] __device__(auto v) { + return matrix_partition.get_major_offset_from_major_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_major_value_output_first); + } else { + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_major_offset_from_major_nocheck(v); + }); + // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and + // directly scatters from the internal buffer) + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_major_value_output_first); + } + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope + // once control flow exits this block (FIXME: we can reduce stream + // synchronization if we compute the maximum rx_counts and + // allocate rx_tmp_buffer outside the loop) } } } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed - ? graph_view.get_number_of_adj_matrix_local_cols() - : graph_view.get_number_of_adj_matrix_local_rows()); + ? graph_view.get_number_of_local_adj_matrix_partition_cols() + : graph_view.get_number_of_local_adj_matrix_partition_rows()); auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first); thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), val_first, @@ -194,28 +219,28 @@ void copy_to_matrix_minor(raft::handle_t const& handle, // partitioning auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - auto constexpr tuple_size = thrust_tuple_size_or_one< - typename std::iterator_traits::value_type>::value; - std::vector requests(2 * tuple_size); - device_isend( - comm, - vertex_value_input_first, - static_cast(graph_view.get_number_of_local_vertices()), - comm_dst_rank, - int{0} /* base_tag */, - requests.data()); - device_irecv( - comm, - matrix_minor_value_output_first + - (graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + col_comm_rank) - - graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)), - static_cast(graph_view.get_vertex_partition_size(comm_src_rank)), - comm_src_rank, - int{0} /* base_tag */, - requests.data() + tuple_size); - // FIXME: this waitall can fail if MatrixMinorValueOutputIterator is a discard iterator or a - // zip iterator having one or more discard iterator - comm.waitall(requests.size(), requests.data()); + // FIXME: this branch may no longer necessary with NCCL backend + if (comm_src_rank == comm_rank) { + assert(comm_dst_rank == comm_rank); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_input_first, + vertex_value_input_first + graph_view.get_number_of_local_vertices(), + matrix_minor_value_output_first + + (graph_view.get_vertex_partition_first(comm_src_rank) - + graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size))); + } else { + device_sendrecv( + comm, + vertex_value_input_first, + static_cast(graph_view.get_number_of_local_vertices()), + comm_dst_rank, + matrix_minor_value_output_first + + (graph_view.get_vertex_partition_first(comm_src_rank) - + graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)), + static_cast(graph_view.get_vertex_partition_size(comm_src_rank)), + comm_src_rank, + handle.get_stream()); + } // FIXME: these broadcast operations can be placed between ncclGroupStart() and // ncclGroupEnd() @@ -233,8 +258,8 @@ void copy_to_matrix_minor(raft::handle_t const& handle, } } else { assert(graph_view.get_number_of_local_vertices() == GraphViewType::is_adj_matrix_transposed - ? graph_view.get_number_of_adj_matrix_local_rows() - : graph_view.get_number_of_adj_matrix_local_cols()); + ? graph_view.get_number_of_local_adj_matrix_partition_rows() + : graph_view.get_number_of_local_adj_matrix_partition_cols()); thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertex_value_input_first, vertex_value_input_first + graph_view.get_number_of_local_vertices(), @@ -272,23 +297,22 @@ void copy_to_matrix_minor(raft::handle_t const& handle, // hypergraph partitioning is applied or not auto comm_src_rank = row_comm_rank * col_comm_size + col_comm_rank; auto comm_dst_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; - auto constexpr tuple_size = thrust_tuple_size_or_one< - typename std::iterator_traits::value_type>::value; - - std::vector count_requests(2); - auto tx_count = thrust::distance(vertex_first, vertex_last); - auto rx_count = tx_count; - comm.isend(&tx_count, 1, comm_dst_rank, 0 /* tag */, count_requests.data()); - comm.irecv(&rx_count, 1, comm_src_rank, 0 /* tag */, count_requests.data() + 1); - comm.waitall(count_requests.size(), count_requests.data()); - - auto src_tmp_buffer = - allocate_comm_buffer::value_type>( - tx_count, handle.get_stream()); - auto src_value_first = - get_comm_buffer_begin::value_type>( - src_tmp_buffer); + size_t tx_count = thrust::distance(vertex_first, vertex_last); + size_t rx_count{}; + // FIXME: it seems like raft::isend and raft::irecv do not properly handle the destination (or + // source) == self case. Need to double check and fix this if this is indeed the case (or RAFT + // may use ncclSend/ncclRecv instead of UCX for device data). + if (comm_src_rank == comm_rank) { + assert(comm_dst_rank == comm_rank); + rx_count = tx_count; + } else { + std::vector count_requests(2); + comm.isend(&tx_count, 1, comm_dst_rank, 0 /* tag */, count_requests.data()); + comm.irecv(&rx_count, 1, comm_src_rank, 0 /* tag */, count_requests.data() + 1); + comm.waitall(count_requests.size(), count_requests.data()); + } + vertex_partition_device_t vertex_partition(graph_view); rmm::device_uvector dst_vertices(rx_count, handle.get_stream()); auto dst_tmp_buffer = allocate_comm_buffer::value_type>( @@ -296,49 +320,69 @@ void copy_to_matrix_minor(raft::handle_t const& handle, auto dst_value_first = get_comm_buffer_begin::value_type>( dst_tmp_buffer); - - thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + if (comm_src_rank == comm_rank) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), vertex_first, vertex_last, - vertex_value_input_first, - src_value_first); - - std::vector value_requests(2 * (1 + tuple_size)); - device_isend( - comm, vertex_first, tx_count, comm_dst_rank, int{0} /* base_tag */, value_requests.data()); - device_isend(comm, - src_value_first, - tx_count, - comm_dst_rank, - int{1} /* base_tag */, - value_requests.data() + 1); - device_irecv( - comm, - dst_vertices.begin(), - rx_count, - comm_src_rank, - int{0} /* base_tag */, - value_requests.data() + (1 + tuple_size)); - device_irecv( - comm, - dst_value_first, - rx_count, - comm_src_rank, - int{0} /* base_tag */, - value_requests.data() + ((1 + tuple_size) + 1)); - // FIXME: this waitall can fail if MatrixMinorValueOutputIterator is a discard iterator or a - // zip iterator having one or more discard iterator - comm.waitall(value_requests.size(), value_requests.data()); + dst_vertices.begin()); + auto map_first = + thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); + }); + thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + map_first, + map_first + thrust::distance(vertex_first, vertex_last), + vertex_value_input_first, + dst_value_first); + } else { + auto src_tmp_buffer = + allocate_comm_buffer::value_type>( + tx_count, handle.get_stream()); + auto src_value_first = get_comm_buffer_begin< + typename std::iterator_traits::value_type>(src_tmp_buffer); + + auto map_first = + thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + return vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v); + }); + thrust::gather(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + map_first, + map_first + thrust::distance(vertex_first, vertex_last), + vertex_value_input_first, + src_value_first); + + device_sendrecv( + comm, + vertex_first, + tx_count, + comm_dst_rank, + dst_vertices.begin(), + rx_count, + comm_src_rank, + handle.get_stream()); + + device_sendrecv(comm, + src_value_first, + tx_count, + comm_dst_rank, + dst_value_first, + rx_count, + comm_src_rank, + handle.get_stream()); + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is as necessary src_tmp_buffer will become out-of-scope + // once control flow exits this block + } // FIXME: now we can clear tx_tmp_buffer auto rx_counts = host_scalar_allgather(col_comm, rx_count, handle.get_stream()); - std::vector displacements(col_comm_size, size_t{0}); - std::partial_sum(rx_counts.begin(), rx_counts.end() - 1, displacements.begin() + 1); matrix_partition_device_t matrix_partition(graph_view, 0); for (int i = 0; i < col_comm_size; ++i) { - rmm::device_uvector rx_vertices(rx_counts[i], handle.get_stream()); + rmm::device_uvector rx_vertices(col_comm_rank == i ? size_t{0} : rx_counts[i], + handle.get_stream()); auto rx_tmp_buffer = allocate_comm_buffer::value_type>( rx_counts[i], handle.get_stream()); @@ -356,21 +400,44 @@ void copy_to_matrix_minor(raft::handle_t const& handle, device_bcast( col_comm, dst_value_first, rx_value_first, rx_counts[i], i, handle.get_stream()); - auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [matrix_partition] __device__(auto v) { - return matrix_partition.get_minor_offset_from_minor_nocheck(v); - }); + if (col_comm_rank == i) { + auto map_first = thrust::make_transform_iterator( + dst_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_minor_offset_from_minor_nocheck(v); + }); + + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + dst_value_first, + dst_value_first + rx_counts[i], + map_first, + matrix_minor_value_output_first); + } else { + auto map_first = thrust::make_transform_iterator( + rx_vertices.begin(), [matrix_partition] __device__(auto v) { + return matrix_partition.get_minor_offset_from_minor_nocheck(v); + }); + + thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + rx_value_first, + rx_value_first + rx_counts[i], + map_first, + matrix_minor_value_output_first); + } - thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - rx_value_first, - rx_value_first + rx_counts[i], - map_first, - matrix_minor_value_output_first); + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is as necessary rx_tmp_buffer will become out-of-scope + // once control flow exits this block (FIXME: we can reduce stream + // synchronization if we compute the maximum rx_counts and + // allocate rx_tmp_buffer outside the loop) } + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is as necessary dst_tmp_buffer will become out-of-scope once + // control flow exits this block } } else { assert(graph_view.get_number_of_local_vertices() == - graph_view.get_number_of_adj_matrix_local_rows()); + graph_view.get_number_of_local_adj_matrix_partition_rows()); auto val_first = thrust::make_permutation_iterator(vertex_value_input_first, vertex_first); thrust::scatter(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), val_first, @@ -402,7 +469,7 @@ void copy_to_matrix_minor(raft::handle_t const& handle, * @param adj_matrix_row_value_output_first Iterator pointing to the adjacency matrix row output * property variables for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_output_last` (exclusive) is deduced as @p adj_matrix_row_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_rows(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_rows(). */ template __global__ void for_all_major_for_all_nbr_low_degree( matrix_partition_device_t matrix_partition, - typename GraphViewType::vertex_type row_first, - typename GraphViewType::vertex_type row_last, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, ResultValueOutputIterator result_value_output_first, @@ -81,22 +81,23 @@ __global__ void for_all_major_for_all_nbr_low_degree( using weight_t = typename GraphViewType::weight_type; using e_op_result_t = T; - auto const tid = threadIdx.x + blockIdx.x * blockDim.x; - auto idx = - static_cast(row_first - matrix_partition.get_major_first()) + static_cast(tid); + auto const tid = threadIdx.x + blockIdx.x * blockDim.x; + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid); - while (idx < static_cast(row_last - matrix_partition.get_major_first())) { + while (idx < static_cast(major_last - major_first)) { vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; + auto major_offset = major_start_offset + idx; thrust::tie(indices, weights, local_degree) = - matrix_partition.get_local_edges(static_cast(idx)); + matrix_partition.get_local_edges(static_cast(major_offset)); #if 1 auto transform_op = [&matrix_partition, &adj_matrix_row_value_input_first, &adj_matrix_col_value_input_first, &e_op, - idx, + major_offset, indices, weights] __device__(auto i) { auto minor = indices[i]; @@ -104,14 +105,16 @@ __global__ void for_all_major_for_all_nbr_low_degree( auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; return evaluate_edge_op(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; auto e_op_result = evaluate_edge_op __global__ void for_all_major_for_all_nbr_mid_degree( matrix_partition_device_t matrix_partition, - typename GraphViewType::vertex_type row_first, - typename GraphViewType::vertex_type row_last, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, ResultValueOutputIterator result_value_output_first, @@ -208,15 +213,16 @@ __global__ void for_all_major_for_all_nbr_mid_degree( auto const tid = threadIdx.x + blockIdx.x * blockDim.x; static_assert(copy_v_transform_reduce_nbr_for_all_block_size % raft::warp_size() == 0); - auto const lane_id = tid % raft::warp_size(); - auto idx = static_cast(row_first - matrix_partition.get_major_first()) + - static_cast(tid / raft::warp_size()); + auto const lane_id = tid % raft::warp_size(); + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + auto idx = static_cast(tid / raft::warp_size()); - while (idx < static_cast(row_last - matrix_partition.get_major_first())) { + while (idx < static_cast(major_last - major_first)) { vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); + auto major_offset = major_start_offset + idx; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto e_op_result_sum = lane_id == 0 ? init : e_op_result_t{}; // relevent only if update_major == true for (edge_t i = lane_id; i < local_degree; i += raft::warp_size) { @@ -225,14 +231,16 @@ __global__ void for_all_major_for_all_nbr_mid_degree( auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; auto e_op_result = evaluate_edge_op __global__ void for_all_major_for_all_nbr_high_degree( matrix_partition_device_t matrix_partition, + typename GraphViewType::vertex_type major_first, + typename GraphViewType::vertex_type major_last, AdjMatrixRowValueInputIterator adj_matrix_row_value_input_first, AdjMatrixColValueInputIterator adj_matrix_col_value_input_first, ResultValueOutputIterator result_value_output_first, @@ -280,14 +288,15 @@ __global__ void for_all_major_for_all_nbr_high_degree( using weight_t = typename GraphViewType::weight_type; using e_op_result_t = T; - auto idx = static_cast(row_first - matrix_partition.get_major_first()) + - static_cast(blockIdx.x); + auto major_start_offset = static_cast(major_first - matrix_partition.get_major_first()); + auto idx = static_cast(blockIdx.x); - while (idx < static_cast(row_last - matrix_partition.get_major_first())) { + while (idx < static_cast(major_last - major_first)) { vertex_t const* indices{nullptr}; weight_t const* weights{nullptr}; edge_t local_degree{}; - thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(idx); + auto major_offset = major_start_offset + idx; + thrust::tie(indices, weights, local_degree) = matrix_partition.get_local_edges(major_offset); auto e_op_result_sum = threadIdx.x == 0 ? init : e_op_result_t{}; // relevent only if update_major == true for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { @@ -296,14 +305,16 @@ __global__ void for_all_major_for_all_nbr_high_degree( auto minor_offset = matrix_partition.get_minor_offset_from_minor_nocheck(minor); auto row = GraphViewType::is_adj_matrix_transposed ? minor - : matrix_partition.get_major_from_major_offset_nocheck(idx); + : matrix_partition.get_major_from_major_offset_nocheck(major_offset); auto col = GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_major_from_major_offset_nocheck(idx) + ? matrix_partition.get_major_from_major_offset_nocheck(major_offset) : minor; - auto row_offset = - GraphViewType::is_adj_matrix_transposed ? minor_offset : static_cast(idx); - auto col_offset = - GraphViewType::is_adj_matrix_transposed ? static_cast(idx) : minor_offset; + auto row_offset = GraphViewType::is_adj_matrix_transposed + ? minor_offset + : static_cast(major_offset); + auto col_offset = GraphViewType::is_adj_matrix_transposed + ? static_cast(major_offset) + : minor_offset; auto e_op_result = evaluate_edge_op(row_comm_size); } - - for (size_t i = 0; i < loop_count; ++i) { - matrix_partition_device_t matrix_partition( - graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); - - auto tmp_buffer_size = vertex_t{0}; + auto comm_rank = handle.comms_initialized() ? handle.get_comms().get_rank() : int{0}; + + auto minor_tmp_buffer_size = + (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) + ? GraphViewType::is_adj_matrix_transposed + ? graph_view.get_number_of_local_adj_matrix_partition_rows() + : graph_view.get_number_of_local_adj_matrix_partition_cols() + : vertex_t{0}; + auto minor_tmp_buffer = allocate_comm_buffer(minor_tmp_buffer_size, handle.get_stream()); + auto minor_buffer_first = get_comm_buffer_begin(minor_tmp_buffer); + + if (in != GraphViewType::is_adj_matrix_transposed) { + auto minor_init = init; if (GraphViewType::is_multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); + auto const row_comm_rank = row_comm.get_rank(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); + minor_init = graph_view.is_hypergraph_partitioned() ? (row_comm_rank == 0) ? init : T{} + : (col_comm_rank == 0) ? init : T{}; + } - tmp_buffer_size = - in ? GraphViewType::is_adj_matrix_transposed - ? graph_view.is_hypergraph_partitioned() - ? matrix_partition.get_major_size() - : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i) - : matrix_partition.get_minor_size() - : GraphViewType::is_adj_matrix_transposed - ? matrix_partition.get_minor_size() - : graph_view.is_hypergraph_partitioned() - ? matrix_partition.get_major_size() - : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i); + if (GraphViewType::is_multi_gpu) { + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_buffer_first, + minor_buffer_first + minor_tmp_buffer_size, + minor_init); + } else { + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_value_output_first, + vertex_value_output_first + graph_view.get_number_of_local_vertices(), + minor_init); } - auto tmp_buffer = allocate_comm_buffer(tmp_buffer_size, handle.get_stream()); - auto buffer_first = get_comm_buffer_begin(tmp_buffer); + } else { + assert(minor_tmp_buffer_size == 0); + } + + for (size_t i = 0; i < loop_count; ++i) { + matrix_partition_device_t matrix_partition( + graph_view, (GraphViewType::is_multi_gpu && !graph_view.is_hypergraph_partitioned()) ? 0 : i); - auto local_init = init; + auto major_tmp_buffer_size = vertex_t{0}; if (GraphViewType::is_multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); - if (in == GraphViewType::is_adj_matrix_transposed) { - local_init = graph_view.is_hypergraph_partitioned() ? (col_comm_rank == 0) ? init : T{} - : (row_comm_rank == 0) ? init : T{}; - } else { - local_init = graph_view.is_hypergraph_partitioned() ? (row_comm_rank == 0) ? init : T{} - : (col_comm_rank == 0) ? init : T{}; - } + + major_tmp_buffer_size = + (in == GraphViewType::is_adj_matrix_transposed) + ? graph_view.is_hypergraph_partitioned() + ? matrix_partition.get_major_size() + : graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i) + : vertex_t{0}; } + auto major_tmp_buffer = allocate_comm_buffer(major_tmp_buffer_size, handle.get_stream()); + auto major_buffer_first = get_comm_buffer_begin(major_tmp_buffer); - if (in != GraphViewType::is_adj_matrix_transposed) { + auto major_init = T{}; + if (in == GraphViewType::is_adj_matrix_transposed) { if (GraphViewType::is_multi_gpu) { - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - buffer_first, - buffer_first + tmp_buffer_size, - local_init); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + major_init = graph_view.is_hypergraph_partitioned() ? (col_comm_rank == 0) ? init : T{} + : (row_comm_rank == 0) ? init : T{}; } else { - thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_value_output_first, - vertex_value_output_first + graph_view.get_number_of_local_vertices(), - local_init); + major_init = init; } } @@ -425,91 +452,148 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, : col_comm_rank * row_comm_size + i; } - raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), - detail::copy_v_transform_reduce_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); + if (graph_view.get_vertex_partition_size(comm_root_rank) > 0) { + raft::grid_1d_thread_t update_grid(graph_view.get_vertex_partition_size(comm_root_rank), + detail::copy_v_transform_reduce_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); - if (GraphViewType::is_multi_gpu) { + if (GraphViewType::is_multi_gpu) { + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + (in == GraphViewType::is_adj_matrix_transposed) ? major_buffer_first + : minor_buffer_first, + e_op, + major_init); + } else { + detail::for_all_major_for_all_nbr_low_degree + <<>>( + matrix_partition, + graph_view.get_vertex_partition_first(comm_root_rank), + graph_view.get_vertex_partition_last(comm_root_rank), + adj_matrix_row_value_input_first, + adj_matrix_col_value_input_first, + vertex_value_output_first, + e_op, + major_init); + } + } + + if (GraphViewType::is_multi_gpu && (in == GraphViewType::is_adj_matrix_transposed)) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); - vertex_t row_value_input_offset = - GraphViewType::is_adj_matrix_transposed - ? 0 - : graph_view.is_hypergraph_partitioned() - ? matrix_partition.get_major_value_start_offset() - : graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - - graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size); - vertex_t col_value_input_offset = - GraphViewType::is_adj_matrix_transposed - ? graph_view.is_hypergraph_partitioned() - ? matrix_partition.get_major_value_start_offset() - : graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - - graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size) - : 0; - - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - buffer_first, - e_op, - local_init); - } else { - detail::for_all_major_for_all_nbr_low_degree - <<>>( - matrix_partition, - graph_view.get_vertex_partition_first(comm_root_rank), - graph_view.get_vertex_partition_last(comm_root_rank), - adj_matrix_row_value_input_first, - adj_matrix_col_value_input_first, + if (graph_view.is_hypergraph_partitioned()) { + device_reduce( + col_comm, + major_buffer_first, vertex_value_output_first, - e_op, - local_init); + static_cast(graph_view.get_vertex_partition_size(i * row_comm_size + i)), + raft::comms::op_t::SUM, + i, + handle.get_stream()); + } else { + device_reduce(row_comm, + major_buffer_first, + vertex_value_output_first, + static_cast( + graph_view.get_vertex_partition_size(col_comm_rank * row_comm_size + i)), + raft::comms::op_t::SUM, + i, + handle.get_stream()); + } } - if (GraphViewType::is_multi_gpu) { - if (in == GraphViewType::is_adj_matrix_transposed) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_rank = row_comm.get_rank(); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - auto const col_comm_size = col_comm.get_size(); + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is as necessary major_tmp_buffer will become out-of-scope once + // control flow exits this block (FIXME: we can reduce stream + // synchronization if we compute the maximum major_tmp_buffer_size and + // allocate major_tmp_buffer outside the loop) + } - if (graph_view.is_hypergraph_partitioned()) { - device_reduce( - col_comm, - buffer_first, - vertex_value_output_first, - static_cast(graph_view.get_vertex_partition_size(i * row_comm_size + i)), - raft::comms::op_t::SUM, - i, - handle.get_stream()); - } else { - for (int j = 0; j < row_comm_size; ++j) { - auto comm_root_rank = col_comm_rank * row_comm_size + j; - device_reduce( - row_comm, - buffer_first + (graph_view.get_vertex_partition_first(comm_root_rank) - - graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size)), - vertex_value_output_first, - static_cast(graph_view.get_vertex_partition_size(comm_root_rank)), - raft::comms::op_t::SUM, - j, - handle.get_stream()); - } - } + if (GraphViewType::is_multi_gpu && (in != GraphViewType::is_adj_matrix_transposed)) { + auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); + auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); + auto const row_comm_rank = row_comm.get_rank(); + auto const row_comm_size = row_comm.get_size(); + auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); + auto const col_comm_rank = col_comm.get_rank(); + auto const col_comm_size = col_comm.get_size(); + + if (graph_view.is_hypergraph_partitioned()) { + CUGRAPH_FAIL("unimplemented."); + } else { + for (int i = 0; i < col_comm_size; ++i) { + auto offset = (graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + i) - + graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)); + auto size = static_cast( + graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + i)); + device_reduce(col_comm, + minor_buffer_first + offset, + minor_buffer_first + offset, + size, + raft::comms::op_t::SUM, + i, + handle.get_stream()); + } + + // FIXME: this P2P is unnecessary if we apply the partitioning scheme used with hypergraph + // partitioning + auto comm_src_rank = (comm_rank % col_comm_size) * row_comm_size + comm_rank / col_comm_size; + auto comm_dst_rank = row_comm_rank * col_comm_size + col_comm_rank; + // FIXME: this branch may no longer necessary with NCCL backend + if (comm_src_rank == comm_rank) { + assert(comm_dst_rank == comm_rank); + auto offset = + graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + col_comm_rank) - + graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size); + auto size = static_cast( + graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + col_comm_rank)); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + minor_buffer_first + offset, + minor_buffer_first + offset + size, + vertex_value_output_first); } else { - CUGRAPH_FAIL("unimplemented."); + device_sendrecv( + comm, + minor_buffer_first + + (graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size + col_comm_rank) - + graph_view.get_vertex_partition_first(row_comm_rank * col_comm_size)), + static_cast( + graph_view.get_vertex_partition_size(row_comm_rank * col_comm_size + col_comm_rank)), + comm_dst_rank, + vertex_value_output_first, + static_cast(graph_view.get_vertex_partition_size(comm_rank)), + comm_src_rank, + handle.get_stream()); } } } + + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is as necessary minor_tmp_buffer will become out-of-scope once + // control flow exits this block } } // namespace detail @@ -525,11 +609,7 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, * input properties. * @tparam AdjMatrixColValueInputIterator Type of the iterator for graph adjacency matrix column * input properties. - * @tparam EdgeOp Type of the quaternraft::grid_1d_thread_t - update_grid(matrix_partition.get_major_size(), - detail::copy_v_transform_reduce_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]);ary (or - quinary) edge operator. + * @tparam EdgeOp Type of the quaternary (or quinary) edge operator. * @tparam T Type of the initial value for reduction over the incoming edges. * @tparam VertexValueOutputIterator Type of the iterator for vertex output property variables. * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and @@ -538,11 +618,11 @@ void copy_v_transform_reduce_nbr(raft::handle_t const& handle, * @param adj_matrix_row_value_input_first Iterator pointing to the adjacency matrix row input * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input * properties for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional edge * weight), *(@p adj_matrix_row_value_input_first + i), and *(@p adj_matrix_col_value_input_first + * j) (where i is in [0, graph_view.get_number_of_local_adj_matrix_partition_rows()) and j is in [0, @@ -598,12 +678,12 @@ void copy_v_transform_reduce_in_nbr(raft::handle_t const& handle, * properties for the first (inclusive) row (assigned to this process in multi-GPU). * `adj_matrix_row_value_input_last` (exclusive) is deduced as @p adj_matrix_row_value_input_first * + - * @p graph_view.get_number_of_adj_matrix_local_rows(). + * @p graph_view.get_number_of_local_adj_matrix_partition_rows(). * @param adj_matrix_col_value_input_first Iterator pointing to the adjacency matrix column input * properties for the first (inclusive) column (assigned to this process in multi-GPU). * `adj_matrix_col_value_output_last` (exclusive) is deduced as @p * adj_matrix_col_value_output_first - * + @p graph_view.get_number_of_adj_matrix_local_cols(). + * + @p graph_view.get_number_of_local_adj_matrix_partition_cols(). * @param e_op Quaternary (or quinary) operator takes edge source, edge destination, (optional * edge weight), *(@p adj_matrix_row_value_input_first + i), and *(@p * adj_matrix_col_value_input_first + j) (where i is in [0, diff --git a/cpp/include/patterns/count_if_e.cuh b/cpp/include/patterns/count_if_e.cuh index 04f22033f91..4f0f0a7a43e 100644 --- a/cpp/include/patterns/count_if_e.cuh +++ b/cpp/include/patterns/count_if_e.cuh @@ -188,37 +188,42 @@ typename GraphViewType::edge_type count_if_e( edge_t count{0}; for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { matrix_partition_device_t matrix_partition(graph_view, i); - auto row_value_input_offset = - GraphViewType::is_adj_matrix_transposed ? 0 : matrix_partition.get_major_value_start_offset(); - auto col_value_input_offset = - GraphViewType::is_adj_matrix_transposed ? matrix_partition.get_major_value_start_offset() : 0; - - raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), - detail::count_if_e_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - rmm::device_vector block_counts(update_grid.num_blocks); - - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - block_counts.data().get(), - e_op); - - // FIXME: we have several options to implement this. With cooperative group support - // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within - // the previous kernel. Using atomics at the end of the previous kernel is another option - // (sequentialization due to atomics may not be bad as different blocks may reach the - // synchronization point in varying timings and the number of SMs is not very big) - count += thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - block_counts.begin(), - block_counts.end(), - edge_t{0}, - thrust::plus()); + + if (matrix_partition.get_major_size() > 0) { + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + + raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::count_if_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + rmm::device_vector block_counts(update_grid.num_blocks); + + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + block_counts.data().get(), + e_op); + + // FIXME: we have several options to implement this. With cooperative group support + // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within + // the previous kernel. Using atomics at the end of the previous kernel is another option + // (sequentialization due to atomics may not be bad as different blocks may reach the + // synchronization point in varying timings and the number of SMs is not very big) + count += thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + block_counts.begin(), + block_counts.end(), + edge_t{0}, + thrust::plus()); + } } if (GraphViewType::is_multi_gpu) { diff --git a/cpp/include/patterns/transform_reduce_e.cuh b/cpp/include/patterns/transform_reduce_e.cuh index 3f334ceff00..797facd4657 100644 --- a/cpp/include/patterns/transform_reduce_e.cuh +++ b/cpp/include/patterns/transform_reduce_e.cuh @@ -192,40 +192,45 @@ T transform_reduce_e(raft::handle_t const& handle, T result{}; for (size_t i = 0; i < graph_view.get_number_of_local_adj_matrix_partitions(); ++i) { matrix_partition_device_t matrix_partition(graph_view, i); - auto row_value_input_offset = - GraphViewType::is_adj_matrix_transposed ? 0 : matrix_partition.get_major_value_start_offset(); - auto col_value_input_offset = - GraphViewType::is_adj_matrix_transposed ? matrix_partition.get_major_value_start_offset() : 0; - - raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), - detail::transform_reduce_e_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); - - rmm::device_vector block_results(update_grid.num_blocks); - - detail::for_all_major_for_all_nbr_low_degree<<>>( - matrix_partition, - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first + col_value_input_offset, - block_results.data(), - e_op); - - // FIXME: we have several options to implement this. With cooperative group support - // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within the - // previous kernel. Using atomics at the end of the previous kernel is another option - // (sequentialization due to atomics may not be bad as different blocks may reach the - // synchronization point in varying timings and the number of SMs is not very big) - auto partial_result = - thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - block_results.begin(), - block_results.end(), - T(), - [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); - - result = plus_edge_op_result(result, partial_result); + + if (matrix_partition.get_major_size() > 0) { + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); + auto col_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? matrix_partition.get_major_value_start_offset() + : vertex_t{0}; + + raft::grid_1d_thread_t update_grid(matrix_partition.get_major_size(), + detail::transform_reduce_e_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + rmm::device_vector block_results(update_grid.num_blocks); + + detail::for_all_major_for_all_nbr_low_degree<<>>( + matrix_partition, + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first + col_value_input_offset, + block_results.data(), + e_op); + + // FIXME: we have several options to implement this. With cooperative group support + // (https://devblogs.nvidia.com/cooperative-groups/), we can run this synchronization within + // the previous kernel. Using atomics at the end of the previous kernel is another option + // (sequentialization due to atomics may not be bad as different blocks may reach the + // synchronization point in varying timings and the number of SMs is not very big) + auto partial_result = + thrust::reduce(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + block_results.begin(), + block_results.end(), + T(), + [] __device__(auto lhs, auto rhs) { return plus_edge_op_result(lhs, rhs); }); + + result = plus_edge_op_result(result, partial_result); + } } if (GraphViewType::is_multi_gpu) { diff --git a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh index a1d18e26d1c..a2250482c68 100644 --- a/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh +++ b/cpp/include/patterns/update_frontier_v_push_if_out_nbr.cuh @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -23,6 +24,7 @@ #include #include #include +#include #include #include @@ -37,9 +39,12 @@ #include #include +#include +#include #include #include #include +#include namespace cugraph { namespace experimental { @@ -108,7 +113,7 @@ __global__ void for_all_frontier_row_for_all_nbr_low_degree( static_assert(sizeof(unsigned long long int) == sizeof(size_t)); auto buffer_idx = atomicAdd(reinterpret_cast(buffer_idx_ptr), static_cast(1)); - *(buffer_key_output_first + buffer_idx) = col_offset; + *(buffer_key_output_first + buffer_idx) = col; *(buffer_payload_output_first + buffer_idx) = remove_first_thrust_tuple_element()(e_op_result); } @@ -178,6 +183,7 @@ size_t reduce_buffer_elements(raft::handle_t const& handle, } template __global__ void update_frontier_and_vertex_output_values( + vertex_partition_device_t vertex_partition, BufferKeyInputIterator buffer_key_input_first, BufferPayloadInputIterator buffer_payload_input_first, size_t num_buffer_elements, @@ -221,12 +228,13 @@ __global__ void update_frontier_and_vertex_output_values( if (idx < num_buffer_elements) { key = *(buffer_key_input_first + idx); - auto v_val = *(vertex_value_input_first + key); + auto key_offset = vertex_partition.get_local_vertex_offset_from_vertex_nocheck(key); + auto v_val = *(vertex_value_input_first + key_offset); auto payload = *(buffer_payload_input_first + idx); auto v_op_result = v_op(v_val, payload); selected_bucket_idx = thrust::get<0>(v_op_result); if (selected_bucket_idx != invalid_bucket_idx) { - *(vertex_value_output_first + key) = + *(vertex_value_output_first + key_offset) = remove_first_thrust_tuple_element()(v_op_result); bucket_block_local_offsets[selected_bucket_idx] = 1; } @@ -364,6 +372,7 @@ void update_frontier_v_push_if_out_nbr( rmm::device_uvector frontier_rows( 0, handle.get_stream()); // relevant only if GraphViewType::is_multi_gpu is true + size_t frontier_size{}; if (GraphViewType::is_multi_gpu) { auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); @@ -372,47 +381,49 @@ void update_frontier_v_push_if_out_nbr( auto const col_comm_rank = col_comm.get_rank(); auto sub_comm_rank = graph_view.is_hypergraph_partitioned() ? col_comm_rank : row_comm_rank; - auto frontier_size = (static_cast(sub_comm_rank) == i) - ? thrust::distance(vertex_first, vertex_last) - : size_t{0}; - if (graph_view.is_hypergraph_partitioned()) { - col_comm.bcast(&frontier_size, 1, i, handle.get_stream()); - } else { - row_comm.bcast(&frontier_size, 1, i, handle.get_stream()); - } + frontier_size = host_scalar_bcast( + graph_view.is_hypergraph_partitioned() ? col_comm : row_comm, + (static_cast(sub_comm_rank) == i) ? thrust::distance(vertex_first, vertex_last) + : size_t{0}, + i, + handle.get_stream()); if (static_cast(sub_comm_rank) != i) { frontier_rows.resize(frontier_size, handle.get_stream()); } device_bcast(graph_view.is_hypergraph_partitioned() ? col_comm : row_comm, vertex_first, frontier_rows.begin(), - frontier_rows.size(), + frontier_size, i, handle.get_stream()); + } else { + frontier_size = thrust::distance(vertex_first, vertex_last); } edge_t max_pushes = - frontier_rows.size() > 0 - ? thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - frontier_rows.begin(), - frontier_rows.end(), - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()) - : thrust::transform_reduce( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - vertex_first, - vertex_last, - [matrix_partition] __device__(auto row) { - auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); - return matrix_partition.get_local_degree(row_offset); - }, - edge_t{0}, - thrust::plus()); + frontier_size > 0 + ? frontier_rows.size() > 0 + ? thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + frontier_rows.begin(), + frontier_rows.end(), + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()) + : thrust::transform_reduce( + rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_first, + vertex_last, + [matrix_partition] __device__(auto row) { + auto row_offset = matrix_partition.get_major_offset_from_major_nocheck(row); + return matrix_partition.get_local_degree(row_offset); + }, + edge_t{0}, + thrust::plus()) + : edge_t{0}; // FIXME: This is highly pessimistic for single GPU (and multi-GPU as well if we maintain // additional per column data for filtering in e_op). If we can pause & resume execution if @@ -433,55 +444,48 @@ void update_frontier_v_push_if_out_nbr( auto buffer_key_first = std::get<0>(buffer_first); auto buffer_payload_first = std::get<1>(buffer_first); - vertex_t row_value_input_offset = 0; - if (GraphViewType::is_multi_gpu) { - auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); - auto const row_comm_size = row_comm.get_size(); - auto& col_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().col_name()); - auto const col_comm_rank = col_comm.get_rank(); - row_value_input_offset = - graph_view.is_hypergraph_partitioned() - ? matrix_partition.get_major_value_start_offset() - : graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size + i) - - graph_view.get_vertex_partition_first(col_comm_rank * row_comm_size); - } - - raft::grid_1d_thread_t for_all_low_degree_grid( - frontier_rows.size() > 0 ? frontier_rows.size() : thrust::distance(vertex_first, vertex_last), - detail::update_frontier_v_push_if_out_nbr_for_all_block_size, - handle.get_device_properties().maxGridSize[0]); + auto row_value_input_offset = GraphViewType::is_adj_matrix_transposed + ? vertex_t{0} + : matrix_partition.get_major_value_start_offset(); // FIXME: This is highly inefficeint for graphs with high-degree vertices. If we renumber // vertices to insure that rows within a partition are sorted by their out-degree in decreasing // order, we will apply this kernel only to low out-degree vertices. - if (frontier_rows.size() > 0) { - detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( - matrix_partition, - frontier_rows.begin(), - frontier_rows.begin(), - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - buffer_key_first, - buffer_payload_first, - vertex_frontier.get_buffer_idx_ptr(), - e_op); - } else { - detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( - matrix_partition, - vertex_first, - vertex_last, - adj_matrix_row_value_input_first + row_value_input_offset, - adj_matrix_col_value_input_first, - buffer_key_first, - buffer_payload_first, - vertex_frontier.get_buffer_idx_ptr(), - e_op); + if (frontier_size > 0) { + raft::grid_1d_thread_t for_all_low_degree_grid( + frontier_size, + detail::update_frontier_v_push_if_out_nbr_for_all_block_size, + handle.get_device_properties().maxGridSize[0]); + + if (frontier_rows.size() > 0) { + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( + matrix_partition, + frontier_rows.begin(), + frontier_rows.end(), + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + buffer_key_first, + buffer_payload_first, + vertex_frontier.get_buffer_idx_ptr(), + e_op); + } else { + detail::for_all_frontier_row_for_all_nbr_low_degree<<>>( + matrix_partition, + vertex_first, + vertex_last, + adj_matrix_row_value_input_first + row_value_input_offset, + adj_matrix_col_value_input_first, + buffer_key_first, + buffer_payload_first, + vertex_frontier.get_buffer_idx_ptr(), + e_op); + } } } @@ -501,6 +505,7 @@ void update_frontier_v_push_if_out_nbr( if (GraphViewType::is_multi_gpu) { auto& comm = handle.get_comms(); + auto const comm_rank = comm.get_rank(); auto& row_comm = handle.get_subcomm(cugraph::partition_2d::key_naming_t().row_name()); auto const row_comm_rank = row_comm.get_rank(); auto const row_comm_size = row_comm.get_size(); @@ -515,99 +520,119 @@ void update_frontier_v_push_if_out_nbr( graph_view.is_hypergraph_partitioned() ? col_comm_rank * row_comm_size + i : row_comm_rank * col_comm_size + i); } + rmm::device_uvector d_vertex_lasts(h_vertex_lasts.size(), handle.get_stream()); raft::update_device( d_vertex_lasts.data(), h_vertex_lasts.data(), h_vertex_lasts.size(), handle.get_stream()); rmm::device_uvector d_tx_buffer_last_boundaries(d_vertex_lasts.size(), handle.get_stream()); - thrust::upper_bound(d_vertex_lasts.begin(), - d_vertex_lasts.end(), + thrust::lower_bound(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), buffer_key_first, buffer_key_first + num_buffer_elements, + d_vertex_lasts.begin(), + d_vertex_lasts.end(), d_tx_buffer_last_boundaries.begin()); std::vector h_tx_buffer_last_boundaries(d_tx_buffer_last_boundaries.size()); raft::update_host(h_tx_buffer_last_boundaries.data(), d_tx_buffer_last_boundaries.data(), d_tx_buffer_last_boundaries.size(), handle.get_stream()); - std::vector tx_counts(h_tx_buffer_last_boundaries.size()); + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + std::vector tx_counts(h_tx_buffer_last_boundaries.size()); std::adjacent_difference( h_tx_buffer_last_boundaries.begin(), h_tx_buffer_last_boundaries.end(), tx_counts.begin()); - std::vector rx_counts(graph_view.is_hypergraph_partitioned() ? row_comm_size + std::vector rx_counts(graph_view.is_hypergraph_partitioned() ? row_comm_size : col_comm_size); std::vector count_requests(tx_counts.size() + rx_counts.size()); + size_t tx_self_i = std::numeric_limits::max(); for (size_t i = 0; i < tx_counts.size(); ++i) { - comm.isend(&tx_counts[i], - 1, - graph_view.is_hypergraph_partitioned() ? col_comm_rank * row_comm_size + i - : row_comm_rank * col_comm_size + i, - 0 /* tag */, - count_requests.data() + i); + auto comm_dst_rank = graph_view.is_hypergraph_partitioned() + ? col_comm_rank * row_comm_size + static_cast(i) + : row_comm_rank * col_comm_size + static_cast(i); + if (comm_dst_rank == comm_rank) { + tx_self_i = i; + // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms + count_requests[i] = std::numeric_limits::max(); + } else { + comm.isend(&tx_counts[i], 1, comm_dst_rank, 0 /* tag */, count_requests.data() + i); + } } for (size_t i = 0; i < rx_counts.size(); ++i) { - comm.irecv(&rx_counts[i], - 1, - graph_view.is_hypergraph_partitioned() ? col_comm_rank * row_comm_size + i - : row_comm_rank + i * row_comm_size, - 0 /* tag */, - count_requests.data() + tx_counts.size() + i); + auto comm_src_rank = graph_view.is_hypergraph_partitioned() + ? col_comm_rank * row_comm_size + static_cast(i) + : static_cast(i) * row_comm_size + comm_rank / col_comm_size; + if (comm_src_rank == comm_rank) { + assert(tx_self_i != std::numeric_limits::max()); + rx_counts[i] = tx_counts[tx_self_i]; + // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms + count_requests[tx_counts.size() + i] = std::numeric_limits::max(); + } else { + comm.irecv(&rx_counts[i], + 1, + comm_src_rank, + 0 /* tag */, + count_requests.data() + tx_counts.size() + i); + } } + // FIXME: better define request_null (similar to MPI_REQUEST_NULL) under raft::comms, if + // raft::comms::wait immediately returns on seeing request_null, this remove is unnecessary + count_requests.erase(std::remove(count_requests.begin(), + count_requests.end(), + std::numeric_limits::max()), + count_requests.end()); comm.waitall(count_requests.size(), count_requests.data()); - std::vector tx_offsets(tx_counts.size() + 1, edge_t{0}); + std::vector tx_offsets(tx_counts.size() + 1, edge_t{0}); std::partial_sum(tx_counts.begin(), tx_counts.end(), tx_offsets.begin() + 1); - std::vector rx_offsets(rx_counts.size() + 1, edge_t{0}); + std::vector rx_offsets(rx_counts.size() + 1, edge_t{0}); std::partial_sum(rx_counts.begin(), rx_counts.end(), rx_offsets.begin() + 1); // FIXME: this will require costly reallocation if we don't use the new CUDA feature to reserve // address space. - vertex_frontier.resize_buffer(num_buffer_elements + rx_offsets.back()); + // FIXME: std::max(actual size, 1) as ncclRecv currently hangs if recvuff is nullptr even if + // count is 0 + vertex_frontier.resize_buffer(std::max(num_buffer_elements + rx_offsets.back(), size_t(1))); auto buffer_first = vertex_frontier.buffer_begin(); auto buffer_key_first = std::get<0>(buffer_first) + num_buffer_offset; auto buffer_payload_first = std::get<1>(buffer_first) + num_buffer_offset; - auto constexpr tuple_size = thrust_tuple_size_or_one< - typename std::iterator_traits::value_type>::value; - - std::vector buffer_requests((tx_counts.size() + rx_counts.size()) * - (1 + tuple_size)); - for (size_t i = 0; i < tx_counts.size(); ++i) { - auto comm_dst_rank = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + i - : row_comm_rank * col_comm_size + i; - comm.isend(detail::iter_to_raw_ptr(buffer_key_first + tx_offsets[i]), - static_cast(tx_counts[i]), - comm_dst_rank, - int{0} /* tag */, - buffer_requests.data() + i * (1 + tuple_size)); - device_isend( - comm, - buffer_payload_first + tx_offsets[i], - static_cast(tx_counts[i]), - comm_dst_rank, - int{1} /* base tag */, - buffer_requests.data() + (i * (1 + tuple_size) + 1)); + std::vector tx_dst_ranks(tx_counts.size()); + std::vector rx_src_ranks(rx_counts.size()); + for (size_t i = 0; i < tx_dst_ranks.size(); ++i) { + tx_dst_ranks[i] = graph_view.is_hypergraph_partitioned() + ? col_comm_rank * row_comm_size + static_cast(i) + : row_comm_rank * col_comm_size + static_cast(i); } - for (size_t i = 0; i < rx_counts.size(); ++i) { - auto comm_src_rank = graph_view.is_hypergraph_partitioned() - ? col_comm_rank * row_comm_size + i - : row_comm_rank + i * row_comm_size; - comm.irecv(detail::iter_to_raw_ptr(buffer_key_first + num_buffer_elements + rx_offsets[i]), - static_cast(rx_counts[i]), - comm_src_rank, - int{0} /* tag */, - buffer_requests.data() + ((tx_counts.size() + i) * (1 + tuple_size))); - device_irecv( - comm, - buffer_payload_first + num_buffer_elements + rx_offsets[i], - static_cast(rx_counts[i]), - comm_src_rank, - int{1} /* base tag */, - buffer_requests.data() + ((tx_counts.size() + i) * (1 + tuple_size) + 1)); + for (size_t i = 0; i < rx_src_ranks.size(); ++i) { + rx_src_ranks[i] = graph_view.is_hypergraph_partitioned() + ? col_comm_rank * row_comm_size + static_cast(i) + : static_cast(i) * row_comm_size + comm_rank / col_comm_size; } - comm.waitall(buffer_requests.size(), buffer_requests.data()); + + device_multicast_sendrecv( + comm, + buffer_key_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + buffer_key_first + num_buffer_elements, + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); + device_multicast_sendrecv( + comm, + buffer_payload_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + buffer_payload_first + num_buffer_elements, + rx_counts, + rx_offsets, + rx_src_ranks, + handle.get_stream()); // FIXME: this does not exploit the fact that each segment is sorted. Lost performance // optimization opportunities. @@ -634,10 +659,13 @@ void update_frontier_v_push_if_out_nbr( auto constexpr invalid_vertex = invalid_vertex_id::value; + vertex_partition_device_t vertex_partition(graph_view); + auto bucket_and_bucket_size_device_ptrs = vertex_frontier.get_bucket_and_bucket_size_device_pointers(); detail::update_frontier_and_vertex_output_values <<>>( + vertex_partition, buffer_key_first, buffer_payload_first, num_buffer_elements, diff --git a/cpp/include/patterns/vertex_frontier.cuh b/cpp/include/patterns/vertex_frontier.cuh index 3b4b05ffb2f..ccb9e1a5a0d 100644 --- a/cpp/include/patterns/vertex_frontier.cuh +++ b/cpp/include/patterns/vertex_frontier.cuh @@ -239,23 +239,25 @@ class VertexFrontier { auto bucket_and_bucket_size_device_ptrs = get_bucket_and_bucket_size_device_pointers(); auto& this_bucket = get_bucket(bucket_idx); - raft::grid_1d_thread_t move_and_invalidate_if_grid( - this_bucket.size(), - detail::move_and_invalidate_if_block_size, - handle_ptr_->get_device_properties().maxGridSize[0]); - - detail::move_and_invalidate_if - <<get_stream()>>>(this_bucket.begin(), - this_bucket.end(), - std::get<0>(bucket_and_bucket_size_device_ptrs).get(), - std::get<1>(bucket_and_bucket_size_device_ptrs).get(), - bucket_idx, - kInvalidBucketIdx, - invalid_vertex, - split_op); + if (this_bucket.size() > 0) { + raft::grid_1d_thread_t move_and_invalidate_if_grid( + this_bucket.size(), + detail::move_and_invalidate_if_block_size, + handle_ptr_->get_device_properties().maxGridSize[0]); + + detail::move_and_invalidate_if + <<get_stream()>>>(this_bucket.begin(), + this_bucket.end(), + std::get<0>(bucket_and_bucket_size_device_ptrs).get(), + std::get<1>(bucket_and_bucket_size_device_ptrs).get(), + bucket_idx, + kInvalidBucketIdx, + invalid_vertex, + split_op); + } // FIXME: if we adopt CUDA cooperative group https://devblogs.nvidia.com/cooperative-groups // and global sync(), we can merge this step with the above kernel (and rename the above kernel diff --git a/cpp/include/utilities/comm_utils.cuh b/cpp/include/utilities/comm_utils.cuh index 6cd6e62bc3a..fb69fff49c9 100644 --- a/cpp/include/utilities/comm_utils.cuh +++ b/cpp/include/utilities/comm_utils.cuh @@ -227,6 +227,207 @@ struct device_irecv_tuple_iterator_element_impl +std::enable_if_t::value, void> +device_sendrecv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t tx_count, + int dst, + OutputIterator output_first, + size_t rx_count, + int src, + cudaStream_t stream) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_sendrecv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t tx_count, + int dst, + OutputIterator output_first, + size_t rx_count, + int src, + cudaStream_t stream) +{ + using value_type = typename std::iterator_traits::value_type; + static_assert( + std::is_same::value_type, value_type>::value); + // ncclSend/ncclRecv pair needs to be located inside ncclGroupStart/ncclGroupEnd to avoid deadlock + ncclGroupStart(); + ncclSend(iter_to_raw_ptr(input_first), + tx_count * sizeof(value_type), + ncclUint8, + dst, + comm.get_nccl_comm(), + stream); + ncclRecv(iter_to_raw_ptr(output_first), + rx_count * sizeof(value_type), + ncclUint8, + src, + comm.get_nccl_comm(), + stream); + ncclGroupEnd(); +} + +template +struct device_sendrecv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t tx_count, + int dst, + OutputIterator output_first, + size_t rx_count, + int src, + cudaStream_t stream) const + { + using output_value_t = typename thrust:: + tuple_element::value_type>::type; + auto tuple_element_input_first = thrust::get(input_first.get_iterator_tuple()); + auto tuple_element_output_first = thrust::get(output_first.get_iterator_tuple()); + device_sendrecv_impl( + comm, + tuple_element_input_first, + tx_count, + dst, + tuple_element_output_first, + rx_count, + src, + stream); + device_sendrecv_tuple_iterator_element_impl().run( + comm, input_first, tx_count, dst, output_first, rx_count, src, stream); + } +}; + +template +struct device_sendrecv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t count, + int dst, + int base_tag, + raft::comms::request_t* requests) const + { + } +}; + +template +std::enable_if_t::value, void> +device_multicast_sendrecv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + std::vector const& tx_counts, + std::vector const& tx_offsets, + std::vector const& tx_dst_ranks, + OutputIterator output_first, + std::vector const& rx_counts, + std::vector const& rx_offsets, + std::vector const& rx_src_ranks, + cudaStream_t stream) +{ + // no-op +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_multicast_sendrecv_impl(raft::comms::comms_t const& comm, + InputIterator input_first, + std::vector const& tx_counts, + std::vector const& tx_offsets, + std::vector const& tx_dst_ranks, + OutputIterator output_first, + std::vector const& rx_counts, + std::vector const& rx_offsets, + std::vector const& rx_src_ranks, + cudaStream_t stream) +{ + using value_type = typename std::iterator_traits::value_type; + static_assert( + std::is_same::value_type, value_type>::value); + // ncclSend/ncclRecv pair needs to be located inside ncclGroupStart/ncclGroupEnd to avoid deadlock + ncclGroupStart(); + for (size_t i = 0; i < tx_counts.size(); ++i) { + ncclSend(iter_to_raw_ptr(input_first + tx_offsets[i]), + tx_counts[i] * sizeof(value_type), + ncclUint8, + tx_dst_ranks[i], + comm.get_nccl_comm(), + stream); + } + for (size_t i = 0; i < rx_counts.size(); ++i) { + ncclRecv(iter_to_raw_ptr(output_first + rx_offsets[i]), + rx_counts[i] * sizeof(value_type), + ncclUint8, + rx_src_ranks[i], + comm.get_nccl_comm(), + stream); + } + ncclGroupEnd(); +} + +template +struct device_multicast_sendrecv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + std::vector const& tx_counts, + std::vector const& tx_offsets, + std::vector const& tx_dst_ranks, + OutputIterator output_first, + std::vector const& rx_counts, + std::vector const& rx_offsets, + std::vector const& rx_src_ranks, + cudaStream_t stream) const + { + using output_value_t = typename thrust:: + tuple_element::value_type>::type; + auto tuple_element_input_first = thrust::get(input_first.get_iterator_tuple()); + auto tuple_element_output_first = thrust::get(output_first.get_iterator_tuple()); + device_multicast_sendrecv_impl(comm, + tuple_element_input_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + tuple_element_output_first, + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + device_multicast_sendrecv_tuple_iterator_element_impl() + .run(comm, + input_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + output_first, + rx_counts, + rx_offsets, + rx_src_ranks, + stream); + } +}; + +template +struct device_multicast_sendrecv_tuple_iterator_element_impl { + void run(raft::comms::comms_t const& comm, + InputIterator input_first, + std::vector const& tx_counts, + std::vector const& tx_offsets, + std::vector const& tx_dst_ranks, + OutputIterator output_first, + std::vector const& rx_counts, + std::vector const& rx_offsets, + std::vector const& rx_src_ranks, + cudaStream_t stream) const + { + } +}; + template std::enable_if_t::value, void> device_bcast_impl(raft::comms::comms_t const& comm, @@ -490,6 +691,50 @@ host_scalar_allreduce(raft::comms::comms_t const& comm, T input, cudaStream_t st return ret; } +template +std::enable_if_t::value, T> host_scalar_bcast( + raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + rmm::device_uvector d_input(1, stream); + if (comm.get_rank() == root) { raft::update_device(d_input.data(), &input, 1, stream); } + comm.bcast(d_input.data(), 1, root, stream); + auto h_input = input; + if (comm.get_rank() != root) { raft::update_host(&h_input, d_input.data(), 1, stream); } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + return h_input; +} + +template +std::enable_if_t::value, T> +host_scalar_bcast(raft::comms::comms_t const& comm, T input, int root, cudaStream_t stream) +{ + size_t constexpr tuple_size = thrust::tuple_size::value; + std::vector h_tuple_scalar_elements(tuple_size); + rmm::device_uvector d_tuple_scalar_elements(tuple_size, stream); + auto ret = input; + + if (comm.get_rank() == root) { + detail::update_vector_of_tuple_scalar_elements_from_tuple_impl() + .update(h_tuple_scalar_elements, input); + raft::update_device( + d_tuple_scalar_elements.data(), h_tuple_scalar_elements.data(), tuple_size, stream); + } + comm.bcast(d_tuple_scalar_elements.data(), d_tuple_scalar_elements.size(), root, stream); + if (comm.get_rank() != root) { + raft::update_host( + h_tuple_scalar_elements.data(), d_tuple_scalar_elements.data(), tuple_size, stream); + } + auto status = comm.sync_stream(stream); + CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + if (comm.get_rank() != root) { + detail::update_tuple_from_vector_of_tuple_scalar_elements_impl() + .update(ret, h_tuple_scalar_elements); + } + + return ret; +} + template std::enable_if_t::value, std::vector> host_scalar_allgather( raft::comms::comms_t const& comm, T input, cudaStream_t stream) @@ -633,6 +878,123 @@ device_irecv(raft::comms::comms_t const& comm, .run(comm, output_first, count, src, base_tag, requests); } +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_sendrecv(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t tx_count, + int dst, + OutputIterator output_first, + size_t rx_count, + int src, + cudaStream_t stream) +{ + detail::device_sendrecv_impl( + comm, input_first, tx_count, dst, output_first, rx_count, src, stream); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_sendrecv(raft::comms::comms_t const& comm, + InputIterator input_first, + size_t tx_count, + int dst, + OutputIterator output_first, + size_t rx_count, + int src, + cudaStream_t stream) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + // FIXME: NCCL 2.7 supports only one ncclSend and one ncclRecv for a source rank and destination + // rank inside ncclGroupStart/ncclGroupEnd, so we cannot place this inside + // ncclGroupStart/ncclGroupEnd, this restriction will be lifted in NCCL 2.8 + detail::device_sendrecv_tuple_iterator_element_impl() + .run(comm, input_first, tx_count, dst, output_first, rx_count, src, stream); +} + +template +std::enable_if_t< + std::is_arithmetic::value_type>::value, + void> +device_multicast_sendrecv(raft::comms::comms_t const& comm, + InputIterator input_first, + std::vector const& tx_counts, + std::vector const& tx_offsets, + std::vector const& tx_dst_ranks, + OutputIterator output_first, + std::vector const& rx_counts, + std::vector const& rx_offsets, + std::vector const& rx_src_ranks, + cudaStream_t stream) +{ + detail::device_multicast_sendrecv_impl(comm, + input_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + output_first, + rx_counts, + rx_offsets, + rx_src_ranks, + stream); +} + +template +std::enable_if_t< + is_thrust_tuple_of_arithmetic::value_type>::value && + is_thrust_tuple::value_type>::value, + void> +device_multicast_sendrecv(raft::comms::comms_t const& comm, + InputIterator input_first, + std::vector const& tx_counts, + std::vector const& tx_offsets, + std::vector const& tx_dst_ranks, + OutputIterator output_first, + std::vector const& rx_counts, + std::vector const& rx_offsets, + std::vector const& rx_src_ranks, + cudaStream_t stream) +{ + static_assert( + thrust::tuple_size::value_type>::value == + thrust::tuple_size::value_type>::value); + + size_t constexpr tuple_size = + thrust::tuple_size::value_type>::value; + + // FIXME: NCCL 2.7 supports only one ncclSend and one ncclRecv for a source rank and destination + // rank inside ncclGroupStart/ncclGroupEnd, so we cannot place this inside + // ncclGroupStart/ncclGroupEnd, this restriction will be lifted in NCCL 2.8 + detail::device_multicast_sendrecv_tuple_iterator_element_impl() + .run(comm, + input_first, + tx_counts, + tx_offsets, + tx_dst_ranks, + output_first, + rx_counts, + rx_offsets, + rx_src_ranks, + stream); +} + template std::enable_if_t< std::is_arithmetic::value_type>::value, @@ -785,4 +1147,4 @@ auto get_comm_buffer_begin(BufferType& buffer) } } // namespace experimental -} // namespace cugraph \ No newline at end of file +} // namespace cugraph diff --git a/cpp/src/experimental/bfs.cu b/cpp/src/experimental/bfs.cu index 940ff30de07..f297587a1d6 100644 --- a/cpp/src/experimental/bfs.cu +++ b/cpp/src/experimental/bfs.cu @@ -93,7 +93,10 @@ void bfs(raft::handle_t const &handle, enum class Bucket { cur, num_buckets }; std::vector bucket_sizes(static_cast(Bucket::num_buckets), push_graph_view.get_number_of_local_vertices()); - VertexFrontier, vertex_t, false, static_cast(Bucket::num_buckets)> + VertexFrontier, + vertex_t, + GraphViewType::is_multi_gpu, + static_cast(Bucket::num_buckets)> vertex_frontier(handle, bucket_sizes); if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { @@ -158,6 +161,11 @@ void bfs(raft::handle_t const &handle, if (depth >= depth_limit) { break; } } + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is as necessary vertex_frontier will become out-of-scope once + // this function returns (FIXME: should I stream sync in VertexFrontier + // destructor?) + return; } diff --git a/cpp/src/experimental/graph.cu b/cpp/src/experimental/graph.cu index 0294716089c..b6124bff94e 100644 --- a/cpp/src/experimental/graph.cu +++ b/cpp/src/experimental/graph.cu @@ -251,7 +251,7 @@ graph_t(row_comm_size))) || + (edgelists.size() == static_cast(col_comm_size))) || (!(partition.is_hypergraph_partitioned()) && (edgelists.size() == 1)), "Invalid API parameter: errneous edgelists.size()."); @@ -311,9 +311,7 @@ graph_tget_handle_ptr()), edgelists[i], major_first, major_last, minor_first, minor_last); adj_matrix_partition_offsets_.push_back(std::move(offsets)); adj_matrix_partition_indices_.push_back(std::move(indices)); - if (adj_matrix_partition_weights_.size() > 0) { - adj_matrix_partition_weights_.push_back(std::move(weights)); - } + if (is_weighted) { adj_matrix_partition_weights_.push_back(std::move(weights)); } } // update degree-based segment offsets (to be used for graph analytics kernel optimization) @@ -356,23 +354,41 @@ graph_t aggregate_segment_offsets(row_comm_size * segment_offsets.size(), - default_stream); - row_comm.allgather(segment_offsets.data(), - aggregate_segment_offsets.data(), - segment_offsets.size(), - default_stream); + rmm::device_uvector aggregate_segment_offsets(0, default_stream); + if (partition.is_hypergraph_partitioned()) { + rmm::device_uvector aggregate_segment_offsets( + col_comm_size * segment_offsets.size(), default_stream); + col_comm.allgather(segment_offsets.data(), + aggregate_segment_offsets.data(), + segment_offsets.size(), + default_stream); + } else { + rmm::device_uvector aggregate_segment_offsets( + row_comm_size * segment_offsets.size(), default_stream); + row_comm.allgather(segment_offsets.data(), + aggregate_segment_offsets.data(), + segment_offsets.size(), + default_stream); + } - vertex_partition_segment_offsets_.resize(row_comm_size * (segment_offsets.size())); + vertex_partition_segment_offsets_.resize(aggregate_segment_offsets.size()); raft::update_host(vertex_partition_segment_offsets_.data(), aggregate_segment_offsets.data(), aggregate_segment_offsets.size(), default_stream); - auto status = handle.get_comms().sync_stream( - default_stream); // this is necessary as degrees, d_thresholds, and segment_offsets will - // become out-of-scope once control flow exits this block and - // vertex_partition_segment_offsets_ can be used right after return. + raft::comms::status_t status{}; + if (partition.is_hypergraph_partitioned()) { + status = col_comm.sync_stream( + default_stream); // this is necessary as degrees, d_thresholds, and segment_offsets will + // become out-of-scope once control flow exits this block and + // vertex_partition_segment_offsets_ can be used right after return. + } else { + status = row_comm.sync_stream( + default_stream); // this is necessary as degrees, d_thresholds, and segment_offsets will + // become out-of-scope once control flow exits this block and + // vertex_partition_segment_offsets_ can be used right after return. + } CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); } diff --git a/cpp/src/experimental/graph_view.cu b/cpp/src/experimental/graph_view.cu index 999c91df427..04d2ea990df 100644 --- a/cpp/src/experimental/graph_view.cu +++ b/cpp/src/experimental/graph_view.cu @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -101,7 +102,8 @@ graph_view_t{minor_first, minor_last}) == 0, "Invalid API parameter: adj_matrix_partition_indices[] have out-of-range vertex IDs."); } - this->get_handle_ptr()->get_comms().allreduce(&number_of_local_edges_sum, - &number_of_local_edges_sum, - 1, - raft::comms::op_t::SUM, - default_stream); - auto status = handle.get_comms().sync_stream(default_stream); - CUGRAPH_EXPECTS(status == raft::comms::status_t::SUCCESS, "sync_stream() failure."); + number_of_local_edges_sum = host_scalar_allreduce( + this->get_handle_ptr()->get_comms(), number_of_local_edges_sum, default_stream); CUGRAPH_EXPECTS(number_of_local_edges_sum == this->get_number_of_edges(), "Invalid API parameter: the sum of local edges doe counts not match with " "number_of_local_edges."); @@ -168,7 +165,8 @@ graph_view_t #include #include -#include #include -#include #include #include @@ -92,12 +90,18 @@ void katz_centrality(raft::handle_t &handle, // 3. katz centrality iteration // old katz centrality values - rmm::device_vector adj_matrix_row_katz_centralities( - pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), result_t{0.0}); + rmm::device_uvector tmp_katz_centralities( + pull_graph_view.get_number_of_local_vertices(), handle.get_stream()); + rmm::device_uvector adj_matrix_row_katz_centralities( + pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), handle.get_stream()); + auto new_katz_centralities = katz_centralities; + auto old_katz_centralities = tmp_katz_centralities.data(); size_t iter{0}; while (true) { + std::swap(new_katz_centralities, old_katz_centralities); + copy_to_adj_matrix_row( - handle, pull_graph_view, katz_centralities, adj_matrix_row_katz_centralities.begin()); + handle, pull_graph_view, old_katz_centralities, adj_matrix_row_katz_centralities.begin()); copy_v_transform_reduce_in_nbr( handle, @@ -108,14 +112,14 @@ void katz_centrality(raft::handle_t &handle, return static_cast(alpha * src_val * w); }, betas != nullptr ? result_t{0.0} : beta, - katz_centralities); + new_katz_centralities); if (betas != nullptr) { - auto val_first = thrust::make_zip_iterator(thrust::make_tuple(katz_centralities, betas)); + auto val_first = thrust::make_zip_iterator(thrust::make_tuple(new_katz_centralities, betas)); thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), val_first, val_first + pull_graph_view.get_number_of_local_vertices(), - katz_centralities, + new_katz_centralities, [] __device__(auto val) { auto const katz_centrality = thrust::get<0>(val); auto const beta = thrust::get<1>(val); @@ -123,12 +127,11 @@ void katz_centrality(raft::handle_t &handle, }); } - auto diff_sum = transform_reduce_v_with_adj_matrix_row( + auto diff_sum = transform_reduce_v( handle, pull_graph_view, - katz_centralities, - adj_matrix_row_katz_centralities.begin(), - [] __device__(auto v_val, auto row_val) { return std::abs(v_val - row_val); }, + thrust::make_zip_iterator(thrust::make_tuple(new_katz_centralities, old_katz_centralities)), + [] __device__(auto val) { return std::abs(thrust::get<0>(val) - thrust::get<1>(val)); }, result_t{0.0}); iter++; @@ -140,6 +143,13 @@ void katz_centrality(raft::handle_t &handle, } } + if (new_katz_centralities != katz_centralities) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + new_katz_centralities, + new_katz_centralities + pull_graph_view.get_number_of_local_vertices(), + katz_centralities); + } + if (normalize) { auto l2_norm = transform_reduce_v( handle, diff --git a/cpp/src/experimental/pagerank.cu b/cpp/src/experimental/pagerank.cu index f1acd47ac52..4084695deb1 100644 --- a/cpp/src/experimental/pagerank.cu +++ b/cpp/src/experimental/pagerank.cu @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include @@ -44,7 +44,7 @@ namespace detail { template void pagerank(raft::handle_t const& handle, GraphViewType const& pull_graph_view, - typename GraphViewType::weight_type* adj_matrix_row_out_weight_sums, + typename GraphViewType::weight_type* precomputed_vertex_out_weight_sums, typename GraphViewType::vertex_type* personalization_vertices, result_t* personalization_values, typename GraphViewType::vertex_type personalization_vector_size, @@ -79,13 +79,13 @@ void pagerank(raft::handle_t const& handle, CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative."); if (do_expensive_check) { - if (adj_matrix_row_out_weight_sums != nullptr) { - auto has_negative_weight_sums = any_of_adj_matrix_row( - handle, pull_graph_view, adj_matrix_row_out_weight_sums, [] __device__(auto val) { + if (precomputed_vertex_out_weight_sums != nullptr) { + auto num_negative_precomputed_vertex_out_weight_sums = count_if_v( + handle, pull_graph_view, precomputed_vertex_out_weight_sums, [] __device__(auto val) { return val < result_t{0.0}; }); CUGRAPH_EXPECTS( - has_negative_weight_sums == false, + num_negative_precomputed_vertex_out_weight_sums == 0, "Invalid input argument: outgoing edge weight sum values should be non-negative."); } @@ -134,10 +134,10 @@ void pagerank(raft::handle_t const& handle, // 2. compute the sums of the out-going edge weights (if not provided) - rmm::device_vector tmp_adj_matrix_row_out_weight_sums{}; - if (adj_matrix_row_out_weight_sums == nullptr) { - rmm::device_vector tmp_out_weight_sums(pull_graph_view.get_number_of_local_vertices(), - weight_t{0.0}); + rmm::device_uvector tmp_vertex_out_weight_sums(0, handle.get_stream()); + if (precomputed_vertex_out_weight_sums == nullptr) { + tmp_vertex_out_weight_sums.resize(pull_graph_view.get_number_of_local_vertices(), + handle.get_stream()); // FIXME: better refactor this out (computing out-degree). copy_v_transform_reduce_out_nbr( handle, @@ -148,19 +148,12 @@ void pagerank(raft::handle_t const& handle, return w; }, weight_t{0.0}, - tmp_out_weight_sums.data().get()); - - tmp_adj_matrix_row_out_weight_sums.assign( - pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), weight_t{0.0}); - copy_to_adj_matrix_row(handle, - pull_graph_view, - tmp_out_weight_sums.data().get(), - tmp_adj_matrix_row_out_weight_sums.begin()); + tmp_vertex_out_weight_sums.data()); } - auto row_out_weight_sums = adj_matrix_row_out_weight_sums != nullptr - ? adj_matrix_row_out_weight_sums - : tmp_adj_matrix_row_out_weight_sums.data().get(); + auto vertex_out_weight_sums = precomputed_vertex_out_weight_sums != nullptr + ? precomputed_vertex_out_weight_sums + : tmp_vertex_out_weight_sums.data(); // 3. initialize pagerank values @@ -197,43 +190,49 @@ void pagerank(raft::handle_t const& handle, // 5. pagerank iteration // old PageRank values - rmm::device_vector adj_matrix_row_pageranks( - pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), result_t{0.0}); + rmm::device_uvector old_pageranks(pull_graph_view.get_number_of_local_vertices(), + handle.get_stream()); + rmm::device_uvector adj_matrix_row_pageranks( + pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), handle.get_stream()); size_t iter{0}; while (true) { - copy_to_adj_matrix_row(handle, pull_graph_view, pageranks, adj_matrix_row_pageranks.begin()); + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + pageranks, + pageranks + pull_graph_view.get_number_of_local_vertices(), + old_pageranks.data()); - auto row_val_first = thrust::make_zip_iterator( - thrust::make_tuple(adj_matrix_row_pageranks.begin(), row_out_weight_sums)); - thrust::transform( - rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - row_val_first, - row_val_first + pull_graph_view.get_number_of_local_adj_matrix_partition_rows(), - adj_matrix_row_pageranks.begin(), - [] __device__(auto val) { - auto const row_pagerank = thrust::get<0>(val); - auto const row_out_weight_sum = thrust::get<1>(val); - auto const divisor = - row_out_weight_sum == result_t{0.0} ? result_t{1.0} : row_out_weight_sum; - return row_pagerank / divisor; - }); - - auto dangling_sum = transform_reduce_v_with_adj_matrix_row( + auto vertex_val_first = + thrust::make_zip_iterator(thrust::make_tuple(pageranks, vertex_out_weight_sums)); + + auto dangling_sum = transform_reduce_v( handle, pull_graph_view, - thrust::make_constant_iterator(0) /* dummy */, - row_val_first, - [] __device__(auto v_val, auto row_val) { - auto const row_pagerank = thrust::get<0>(row_val); - auto const row_out_weight_sum = thrust::get<1>(row_val); - return row_out_weight_sum == result_t{0.0} ? row_pagerank : result_t{0.0}; + vertex_val_first, + [] __device__(auto val) { + auto const pagerank = thrust::get<0>(val); + auto const out_weight_sum = thrust::get<1>(val); + return out_weight_sum == result_t{0.0} ? pagerank : result_t{0.0}; }, result_t{0.0}); - auto unvarying_part = - personalization_vertices == nullptr - ? (dangling_sum + static_cast(1.0 - alpha)) / static_cast(num_vertices) - : result_t{0.0}; + thrust::transform(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + vertex_val_first, + vertex_val_first + pull_graph_view.get_number_of_local_vertices(), + pageranks, + [] __device__(auto val) { + auto const pagerank = thrust::get<0>(val); + auto const out_weight_sum = thrust::get<1>(val); + auto const divisor = + out_weight_sum == result_t{0.0} ? result_t{1.0} : out_weight_sum; + return pagerank / divisor; + }); + + copy_to_adj_matrix_row(handle, pull_graph_view, pageranks, adj_matrix_row_pageranks.begin()); + + auto unvarying_part = personalization_vertices == nullptr + ? (dangling_sum * alpha + static_cast(1.0 - alpha)) / + static_cast(num_vertices) + : result_t{0.0}; copy_v_transform_reduce_in_nbr( handle, @@ -258,21 +257,16 @@ void pagerank(raft::handle_t const& handle, auto v = thrust::get<0>(val); auto value = thrust::get<1>(val); *(pageranks + vertex_partition.get_local_vertex_offset_from_vertex_nocheck(v)) += - (dangling_sum + static_cast(1.0 - alpha)) * (value / personalization_sum); + (dangling_sum * alpha + static_cast(1.0 - alpha)) * + (value / personalization_sum); }); } - auto diff_sum = transform_reduce_v_with_adj_matrix_row( + auto diff_sum = transform_reduce_v( handle, pull_graph_view, - pageranks, - thrust::make_zip_iterator( - thrust::make_tuple(adj_matrix_row_pageranks.begin(), row_out_weight_sums)), - [] __device__(auto v_val, auto row_val) { - auto multiplier = - thrust::get<1>(row_val) == result_t{0.0} ? result_t{1.0} : thrust::get<1>(row_val); - return std::abs(v_val - thrust::get<0>(row_val) * multiplier); - }, + thrust::make_zip_iterator(thrust::make_tuple(pageranks, old_pageranks.data())), + [] __device__(auto val) { return std::abs(thrust::get<0>(val) - thrust::get<1>(val)); }, result_t{0.0}); iter++; @@ -292,7 +286,7 @@ void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - weight_t* adj_matrix_row_out_weight_sums, + weight_t* precomputed_vertex_out_weight_sums, vertex_t* personalization_vertices, result_t* personalization_values, vertex_t personalization_vector_size, @@ -305,7 +299,7 @@ void pagerank(raft::handle_t const& handle, { detail::pagerank(handle, graph_view, - adj_matrix_row_out_weight_sums, + precomputed_vertex_out_weight_sums, personalization_vertices, personalization_values, personalization_vector_size, @@ -321,7 +315,7 @@ void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* adj_matrix_row_out_weight_sums, + float* precomputed_vertex_out_weight_sums, int32_t* personalization_vertices, float* personalization_values, int32_t personalization_vector_size, @@ -334,7 +328,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* adj_matrix_row_out_weight_sums, + double* precomputed_vertex_out_weight_sums, int32_t* personalization_vertices, double* personalization_values, int32_t personalization_vector_size, @@ -347,7 +341,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* adj_matrix_row_out_weight_sums, + float* precomputed_vertex_out_weight_sums, int32_t* personalization_vertices, float* personalization_values, int32_t personalization_vector_size, @@ -360,7 +354,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* adj_matrix_row_out_weight_sums, + double* precomputed_vertex_out_weight_sums, int32_t* personalization_vertices, double* personalization_values, int32_t personalization_vector_size, @@ -373,7 +367,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* adj_matrix_row_out_weight_sums, + float* precomputed_vertex_out_weight_sums, int64_t* personalization_vertices, float* personalization_values, int64_t personalization_vector_size, @@ -386,7 +380,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* adj_matrix_row_out_weight_sums, + double* precomputed_vertex_out_weight_sums, int64_t* personalization_vertices, double* personalization_values, int64_t personalization_vector_size, @@ -399,7 +393,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* adj_matrix_row_out_weight_sums, + float* precomputed_vertex_out_weight_sums, int32_t* personalization_vertices, float* personalization_values, int32_t personalization_vector_size, @@ -412,7 +406,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* adj_matrix_row_out_weight_sums, + double* precomputed_vertex_out_weight_sums, int32_t* personalization_vertices, double* personalization_values, int32_t personalization_vector_size, @@ -425,7 +419,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* adj_matrix_row_out_weight_sums, + float* precomputed_vertex_out_weight_sums, int32_t* personalization_vertices, float* personalization_values, int32_t personalization_vector_size, @@ -438,7 +432,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* adj_matrix_row_out_weight_sums, + double* precomputed_vertex_out_weight_sums, int32_t* personalization_vertices, double* personalization_values, int32_t personalization_vector_size, @@ -451,7 +445,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - float* adj_matrix_row_out_weight_sums, + float* precomputed_vertex_out_weight_sums, int64_t* personalization_vertices, float* personalization_values, int64_t personalization_vector_size, @@ -464,7 +458,7 @@ template void pagerank(raft::handle_t const& handle, template void pagerank(raft::handle_t const& handle, graph_view_t const& graph_view, - double* adj_matrix_row_out_weight_sums, + double* precomputed_vertex_out_weight_sums, int64_t* personalization_vertices, double* personalization_values, int64_t personalization_vector_size, diff --git a/cpp/src/experimental/sssp.cu b/cpp/src/experimental/sssp.cu index b1bc2968c71..ebcde1b1444 100644 --- a/cpp/src/experimental/sssp.cu +++ b/cpp/src/experimental/sssp.cu @@ -128,7 +128,7 @@ void sssp(raft::handle_t const &handle, push_graph_view.get_number_of_local_vertices()); VertexFrontier, vertex_t, - false, + GraphViewType::is_multi_gpu, static_cast(Bucket::num_buckets)> vertex_frontier(handle, bucket_sizes); @@ -139,13 +139,17 @@ void sssp(raft::handle_t const &handle, push_graph_view.get_number_of_local_adj_matrix_partition_rows() ? true : false; - rmm::device_vector adj_matrix_row_distances{}; + rmm::device_uvector adj_matrix_row_distances(0, handle.get_stream()); if (!vertex_and_adj_matrix_row_ranges_coincide) { - adj_matrix_row_distances.assign(push_graph_view.get_number_of_local_adj_matrix_partition_rows(), - std::numeric_limits::max()); + adj_matrix_row_distances.resize(push_graph_view.get_number_of_local_adj_matrix_partition_rows(), + handle.get_stream()); + thrust::fill(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + adj_matrix_row_distances.begin(), + adj_matrix_row_distances.end(), + std::numeric_limits::max()); } auto row_distances = - !vertex_and_adj_matrix_row_ranges_coincide ? adj_matrix_row_distances.data().get() : distances; + !vertex_and_adj_matrix_row_ranges_coincide ? adj_matrix_row_distances.data() : distances; if (push_graph_view.is_local_vertex_nocheck(source_vertex)) { vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).insert(source_vertex); @@ -208,6 +212,8 @@ void sssp(raft::handle_t const &handle, auto old_near_far_threshold = near_far_threshold; near_far_threshold += delta; + size_t new_near_size{0}; + size_t new_far_size{0}; while (true) { vertex_frontier.split_bucket( static_cast(Bucket::far), @@ -223,18 +229,27 @@ void sssp(raft::handle_t const &handle, return static_cast(Bucket::far); } }); - if (vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).aggregate_size() > - 0) { + new_near_size = + vertex_frontier.get_bucket(static_cast(Bucket::cur_near)).aggregate_size(); + new_far_size = + vertex_frontier.get_bucket(static_cast(Bucket::far)).aggregate_size(); + if ((new_near_size > 0) || (new_far_size == 0)) { break; } else { near_far_threshold += delta; } } + if ((new_near_size == 0) && (new_far_size == 0)) { break; } } else { break; } } + CUDA_TRY(cudaStreamSynchronize( + handle.get_stream())); // this is as necessary vertex_frontier will become out-of-scope once + // this function returns (FIXME: should I stream sync in VertexFrontier + // destructor?) + return; } diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index 9705f229548..b0f24fa377a 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -645,7 +645,15 @@ void call_sssp(raft::handle_t const& handle, } } +// Helper for setting up subcommunicators +void init_subcomms(raft::handle_t& handle, size_t row_comm_size) +{ + partition_2d::subcomm_factory_t subcomm_factory(handle, + row_comm_size); +} + // Explicit instantiations + template std::pair call_louvain(raft::handle_t const& handle, graph_container_t const& graph_container, void* identifiers, @@ -772,12 +780,5 @@ template void call_sssp(raft::handle_t const& handle, int64_t* predecessors, const int64_t source_vertex); -// Helper for setting up subcommunicators -void init_subcomms(raft::handle_t& handle, size_t row_comm_size) -{ - partition_2d::subcomm_factory_t subcomm_factory(handle, - row_comm_size); -} - } // namespace cython } // namespace cugraph From d9457af0a7345c0f988c4725a2fcc9e5b4efbbd0 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Wed, 14 Oct 2020 08:19:22 -0400 Subject: [PATCH 10/16] [REVIEW] FEA MNMG louvain implementation (#1172) * update to use collective_utils.cuh * Updated to call drop() correctly after cudf API update. * Added args to support calling get_vertex_identifiers(). * Style fixes, removed commented out code meant for a future change. * Updated comment with description of new 'identifiers' arg. * MNMG Louvain, with debug code, working on SG * add additional functions to graph_view * fix algorithm API to support old and new graphs * fixed confusing variable names * reorder functions to be consistent with elsewhere * fix a compiler error * accomodate the change of raft allgahterv's input parameter displs type from int[] to size_t[] * update change log * update change log * update RAFT tag * Safety commit, still WIP, does not compile - updates for 2D graph support and upcoming 2D shuffle support * safety commit, does not pass tests: updated enough to be able to run the MG Louvain test. * temporary commit of copy_to_adj_matrix_row.cuh and collective_utils.cuh to checkout another branch * Updated call_louvain() to use the new graph_t types. Still WIP, needs louvain updates to compile. * fix a bug in matrix partitioning ranges * Merged lastest branch, got things compiling * temporary commit * fix errors in previous merge conflicts * extend copy_to_adj_matrix_row.cuh for MNMG * rename collective_utils.cuh to comm_utils.cuh * rename copy_v_transform_reduce_nbr.cuh to copy_v_transform_reduce_in_out_nbr.cuh * merge copy_to_adj_matrix_row.cuh & copy_to_adj_matrix_col.cuh * extend copy_v_transform_reduce_(in|out)_nbr for MNMG * extend Bucket for MNMG * add get_vertex_partition_size * add more explicit instantiation cases for BFS, SSSP, PageRank, KatzCentrality * WIP: updates for incorporating new 2D shuffle data, still does not pass test. * some code cleanup in preparation for MNMG testing * Adding updates from iroy30 for calling shuffle from louvain.py * extend update_frontier_v_push_if_out_nbr.cuh for MNMG * wrap debugging calls in ifdef DEBUG * delete spurious comment * Updated to extract and pass the partition_t info and call the graph_t ctor. Now having a problem finding the right subcommunicator. * Updates to set up subcomms - having a problem with something needed by subcomms not being initialized: "address not mapped to object at address (nil)" * Added p2p flag to comms initialize() to enable initialization of UCX endpoints needed for MG test. * code refinement * refactor copy_v_transform_reduce_in|out_nbr * bug fix (thanks Rick) * clang-format * bug fix * safety commit: committing with debug prints to allow other team members to debug in parallel. * clean up a few things * safety commit: more updates to address problems instantiating graph_t (using num edges for partition instead of global for edgelist) and for debugging (print statments). * Changing how row and col rank are obtained, added debug prints for edge lists info * Fixes to partition_t get_matrix_partition_major/minor methods based on feedback. * bug fixes * bug fix * latest updates * fix to get latest pattern accelerators to work correctly * Update shuffle.py * Integrating changes from iroy30 to produce "option 1" shuffle output by default, with an option to enable "option 2", temporarily enabled graph expensive checks for debugging. * add minimal update to create a PR * pagerank 2D cython/python infrastructure * 2D infra- bfs and sssp * debugging * add a work around for source (or destination) == self case for isend/irecv * fix a warning * remove dummy change log * more debugging * debugging * sgpu pagerank edits * more louvain debugging * edits * debugging * add namespace * debugging * pull branch0.16 * update test * in copy_v_transform_reduce_in|out_nbr, implement missing communication alnog the minor direction * debugging * bug fix (assertion failure)\n * fix merge issues * bug fix in copy_v_transform_reduce_in_out_nbr.cuh * clang-format * fix bug in cython graph creation * debugging * enforce consistency in variable naming related to subcommunicators * bug fix (graph construction) * bug fix (vertex_partition_segment_offsets) * review updates * clang * bug fix (caching comm_rank in partition_t object) * updatelocal_verts * debugging comms * bug fix (scale dangling_sum by damping factor) * remove transform_reduce_v_with_adj_matrix_row * replace device_vector with device_uvector in sssp * bfs updates to 2D infra * sssp 2D integration * sssp * flake8 * clang * add host_scalalr_bcast to comm_utils * remove unnecessary include * review changes * review changes * bug fix in update_frontier_v_push_if_out_nbr * bug fix in VertexFrontier declaration * add debug print for pagerank sum * remove dummy code * bug fix in assert * fix timing bug with isend/irecv * fix compile error * fix debug compile error * add missing cudaStreamSynchronize * guard raft::grid_1d_thread_t * compile error fix * SG bug fix (calling get_rank() on uninitialized comms) * update versions of raft and cuco * latest debugging * BFS bug fix * fix a PageRank bug * pattern accelerator bug fix (found testing SSSP) * more debugging * Update mg_pagerank_wrapper.pyx * review updates * bug fix in BFS communication * review updates * Revert "fix compile error" This reverts commit 900fd1143c6be38a4e974ff598627968eae20a07. * Revert "fix timing bug with isend/irecv" This reverts commit e0e696a580cfd2ef0bbe45dfd7e9845e139bee36. * Revert "bug fix in assert" This reverts commit 97b98ed4259a28afb050b1f6142ed91adae40264. * Revert "remove dummy code" This reverts commit facc70c50a0bde5ba06a5ddef830e23275ff5751. * Revert "add debug print for pagerank sum" This reverts commit c479b6df0855b70eb9340df761186ef85e247dcc. * Revert "bug fix in VertexFrontier declaration" This reverts commit 44e3e10d1da49fa5de3a54c31ff9f9d6bc3f1808. * Revert "bug fix in update_frontier_v_push_if_out_nbr" This reverts commit dd800014e2ce9985234e38db81c9b6276238873b. * Revert "remove unnecessary include" This reverts commit c55dbfb2af9a89ef289ffa6a0501c68b63f47900. * Revert "add host_scalalr_bcast to comm_utils" This reverts commit 6430ad55fef31749d340fc9daffe689966f8d83c. * Revert "replace device_vector with device_uvector in sssp" This reverts commit d6b2e5883f2a98f0e4ebc904ec4513bcb5f3aabe. * Revert "remove transform_reduce_v_with_adj_matrix_row" This reverts commit 21d4e104da02ef4d2609e2c05cd26471a40a6188. * Revert "bug fix (scale dangling_sum by damping factor)" This reverts commit 15818f74fe160c81e40987feb1162248d41e9c06. * Revert "bug fix (caching comm_rank in partition_t object)" This reverts commit bd2dd834f8df92944f64ff56fa698573fea9f416. * Revert "bug fix (vertex_partition_segment_offsets)" This reverts commit a006b9940b8d32d4e56cfc2b4a5746c14a51388c. * Revert "bug fix (graph construction)" This reverts commit 59fadefd0c7e6fd25c1332d5403b0c86e71defc2. * Revert "enforce consistency in variable naming related to subcommunicators" This reverts commit 790549f141c46dd463618504303f96f64e2ce712. * Revert "clang-format" This reverts commit 761f7aa81761d99832c6a9748679418e776ac495. * Revert "bug fix in copy_v_transform_reduce_in_out_nbr.cuh" This reverts commit f874f6517bfe76a317e51a681e9c33e7aa268004. * Revert "bug fix (assertion failure)\n" This reverts commit a33c2d10bcea579a12e298c0b5bb8b4917fd21e0. * Revert "in copy_v_transform_reduce_in|out_nbr, implement missing communication alnog the minor direction" This reverts commit 6e1b152630e1a5579d55a2f0948c0c010a5466a5. * Revert "fix a warning" This reverts commit 25607cad97cc6107586dfe0d9d30ad5ee2ca74b8. * Revert "add a work around for source (or destination) == self case for isend/irecv" This reverts commit 2be9e5f9a016d5884423b6e2b59e43ed646cde07. * revert * clang * update tests and predecessor * working MNMG Louvain on Karate and Dolphin with 2 GPUs * turn off debugging * clean up some output * support compiling on systems without libcu++ * Update mg_pagerank_wrapper.pyx * debugging 2 x N case * use default * use default if prows not specified * disable check for libcu++, not working * update changelog * fix some unit testing * rename shuffle2 back to shuffle, debug some unit test stuff * fix clang issues * update from Ishika/Rick * somehow lost shuffle update * undo last merge * add some synchronization calls, turn on debugging, to tryand isolate 2 x 4 error * remove some old debugging from Rick * fix the mess-up in merging with unmerged PRs * transitioning from UCX send/recv to NCCL send/recv * get new cuco from Jake to retrieve libcu++ * Jake's technique needs to be applied to our CMakefile * debugging * update cuco version to latest * fix clang formatting * move MurmurHash again... * update raft version * manually merge branch-0.16 * shuffle no longer takes prows/pcols, pass to init instead * working version * code cleanup * remove some old print statements * fix clang-format issues * update cuco revision * revert to running karate test Co-authored-by: Seunghwa Kang Co-authored-by: Rick Ratzel Co-authored-by: Iroy30 <41401566+Iroy30@users.noreply.github.com> Co-authored-by: Ishika Roy Co-authored-by: Charles Hastings Co-authored-by: Charles Hastings Co-authored-by: Charles Hastings Co-authored-by: Charles Hastings --- CHANGELOG.md | 2 + cpp/CMakeLists.txt | 41 + cpp/include/compute_partition.cuh | 192 ++ cpp/include/experimental/graph_view.hpp | 31 + cpp/src/community/louvain.cu | 25 +- cpp/src/community/louvain.cuh | 2 + cpp/src/experimental/louvain.cuh | 1669 ++++++++++++++++- cpp/src/experimental/shuffle.cuh | 226 +++ cpp/src/utilities/cython.cu | 37 +- cpp/src/utilities/graph_utils.cuh | 27 +- cpp/tests/CMakeLists.txt | 11 + cpp/tests/community/louvain_test.cu | 62 + cpp/tests/experimental/louvain_test.cu | 115 ++ cpp/tests/utilities/test_utilities.hpp | 2 - python/cugraph/dask/community/louvain.py | 55 +- .../dask/community/louvain_wrapper.pyx | 9 +- 16 files changed, 2412 insertions(+), 94 deletions(-) create mode 100644 cpp/include/compute_partition.cuh create mode 100644 cpp/src/experimental/shuffle.cuh create mode 100644 cpp/tests/experimental/louvain_test.cu diff --git a/CHANGELOG.md b/CHANGELOG.md index 36bb67cf326..ebdb268d2b1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - PR #1178 Refactored cython graph factory code to scale to additional data types - PR #1175 Integrated 2D pagerank python/cython infra - PR #1177 Integrated 2D bfs and sssp python/cython infra +- PR #1172 MNMG Louvain implementation ## Improvements - PR 1081 MNMG Renumbering - sort partitions by degree @@ -52,6 +53,7 @@ - PR #1174 Fix bugs in MNMG pattern accelerators and pattern accelerator based implementations of MNMG PageRank, BFS, and SSSP + # cuGraph 0.15.0 (26 Aug 2020) ## New Features diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 48b5e0835f0..3a696b9e8b7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -210,6 +210,45 @@ if(NOT thrust_POPULATED) endif() set(THRUST_INCLUDE_DIR "${thrust_SOURCE_DIR}") +# - cuco +message("Fetching cuco") + +FetchContent_Declare( + cuco + GIT_REPOSITORY https://github.com/NVIDIA/cuCollections.git + GIT_TAG 729d07db2e544e173efefdd168db21f7b8adcfaf + GIT_SHALLOW true +) + +FetchContent_GetProperties(cuco) +if(NOT cuco_POPULATED) + FetchContent_Populate(cuco) +endif() +set(CUCO_INCLUDE_DIR "${cuco_SOURCE_DIR}/include") + +# - libcudacxx +# NOTE: This is necessary because libcudacxx is not supported in +# debian cuda 10.2 packages. Once 10.2 is deprecated +# we should not need this any longer. +message("Fetching libcudacxx") + +FetchContent_Declare( + libcudacxx + GIT_REPOSITORY https://github.com/NVIDIA/libcudacxx.git + GIT_TAG 1.3.0-rc0 + GIT_SHALLOW true +) + +FetchContent_GetProperties(libcudacxx) +if(NOT libcudacxx_POPULATED) + message("populating libcudacxx") + FetchContent_Populate(libcudacxx) +endif() +set(LIBCUDACXX_INCLUDE_DIR "${libcudacxx_SOURCE_DIR}/include") +message("set LIBCUDACXX_INCLUDE_DIR to: ${LIBCUDACXX_INCLUDE_DIR}") + + + ################################################################################################### # - External Projects ----------------------------------------------------------------------------- @@ -354,6 +393,8 @@ add_dependencies(cugraph raft) target_include_directories(cugraph PRIVATE "${THRUST_INCLUDE_DIR}" + "${CUCO_INCLUDE_DIR}" + "${LIBCUDACXX_INCLUDE_DIR}" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" "${LIBCYPHERPARSER_INCLUDE}" "${Boost_INCLUDE_DIRS}" diff --git a/cpp/include/compute_partition.cuh b/cpp/include/compute_partition.cuh new file mode 100644 index 00000000000..c81a6237b31 --- /dev/null +++ b/cpp/include/compute_partition.cuh @@ -0,0 +1,192 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +#include + +#include + +namespace cugraph { +namespace experimental { +namespace detail { + +/** + * @brief Class to help compute what partition a vertex id or edge id belongs to + * + * + * FIXME: This should probably be part of the experimental::partition_t class + * rather than having to copy things out of it + * + */ +template +class compute_partition_t { + public: + using graph_view_t = graph_view_type; + using vertex_t = typename graph_view_type::vertex_type; + + compute_partition_t(graph_view_t const &graph_view) + { + init(graph_view); + } + + private: + template * = nullptr> + void init(graph_view_t const &graph_view) + { + } + + template * = nullptr> + void init(graph_view_t const &graph_view) + { + auto partition = graph_view.get_partition(); + row_size_ = partition.get_row_size(); + col_size_ = partition.get_col_size(); + size_ = row_size_ * col_size_; + + vertex_partition_offsets_v_.resize(size_ + 1); + vertex_partition_offsets_v_ = partition.get_vertex_partition_offsets(); + } + + public: + /** + * @brief Compute the partition id for a vertex + * + * This is a device view of the partition data that allows for a device + * function to determine the partition number that is associated with + * a given vertex id. + * + * `vertex_device_view_t` is trivially-copyable and is intended to be passed by + * value. + * + */ + class vertex_device_view_t { + public: + vertex_device_view_t(vertex_t const *d_vertex_partition_offsets, int size) + : d_vertex_partition_offsets_(d_vertex_partition_offsets), size_(size) + { + } + + /** + * @brief Compute the partition id for a vertex + * + * Given a vertex v, return the partition number to which that vertex is assigned + * + */ + __device__ int operator()(vertex_t v) const + { + if (graph_view_t::is_multi_gpu) { + return thrust::distance(d_vertex_partition_offsets_, + thrust::upper_bound(thrust::seq, + d_vertex_partition_offsets_, + d_vertex_partition_offsets_ + size_ + 1, + v)) - + 1; + } else + return 0; + } + + private: + vertex_t const *d_vertex_partition_offsets_; + int size_; + }; + + class edge_device_view_t { + public: + edge_device_view_t(vertex_t const *d_vertex_partition_offsets, + int row_size, + int col_size, + int size) + : d_vertex_partition_offsets_(d_vertex_partition_offsets), + row_size_(row_size), + col_size_(col_size), + size_(size) + { + } + + /** + * @brief Compute the partition id for a vertex + * + * Given a pair of vertices (src, dst), return the partition number to + * which an edge between src and dst would be assigned. + * + */ + __device__ int operator()(vertex_t src, vertex_t dst) const + { + if (graph_view_t::is_multi_gpu) { + std::size_t src_partition = + thrust::distance(d_vertex_partition_offsets_, + thrust::upper_bound(thrust::seq, + d_vertex_partition_offsets_, + d_vertex_partition_offsets_ + size_ + 1, + src)) - + 1; + std::size_t dst_partition = + thrust::distance(d_vertex_partition_offsets_, + thrust::upper_bound(thrust::seq, + d_vertex_partition_offsets_, + d_vertex_partition_offsets_ + size_ + 1, + dst)) - + 1; + + std::size_t row = src_partition / row_size_; + std::size_t col = dst_partition / col_size_; + + return row * row_size_ + col; + } else { + return 0; + } + } + + private: + vertex_t const *d_vertex_partition_offsets_; + int row_size_; + int col_size_; + int size_; + }; + + /** + * @brief get a vertex device view so that device code can identify which + * gpu a vertex is assigned to + * + */ + vertex_device_view_t vertex_device_view() const + { + return vertex_device_view_t(vertex_partition_offsets_v_.data().get(), size_); + } + + /** + * @brief get an edge device view so that device code can identify which + * gpu an edge is assigned to + * + */ + edge_device_view_t edge_device_view() const + { + return edge_device_view_t( + vertex_partition_offsets_v_.data().get(), row_size_, col_size_, size_); + } + + private: + rmm::device_vector vertex_partition_offsets_v_{}; + int row_size_{1}; + int col_size_{1}; + int size_{1}; +}; + +} // namespace detail +} // namespace experimental +} // namespace cugraph diff --git a/cpp/include/experimental/graph_view.hpp b/cpp/include/experimental/graph_view.hpp index 93fb44e7faf..ba327047b1d 100644 --- a/cpp/include/experimental/graph_view.hpp +++ b/cpp/include/experimental/graph_view.hpp @@ -114,6 +114,17 @@ class partition_t { } } + int get_row_size() const { return row_comm_size_; } + + int get_col_size() const { return col_comm_size_; } + + int get_comm_rank() const { return comm_rank_; } + + std::vector const& get_vertex_partition_offsets() const + { + return vertex_partition_offsets_; + } + std::tuple get_local_vertex_range() const { return std::make_tuple(vertex_partition_offsets_[comm_rank_], @@ -321,6 +332,8 @@ class graph_view_t 0; } + partition_t get_partition() const { return partition_; } + vertex_t get_number_of_local_vertices() const { return partition_.get_local_vertex_last() - partition_.get_local_vertex_first(); @@ -428,6 +441,24 @@ class graph_view_t louvain( int32_t *, size_t, double); -template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int64_t *, - size_t, - float); -template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int64_t *, - size_t, - double); template std::pair louvain( raft::handle_t const &, experimental::graph_view_t const &, @@ -135,6 +123,7 @@ template std::pair louvain( int32_t *, size_t, double); + template std::pair louvain( raft::handle_t const &, experimental::graph_view_t const &, @@ -147,18 +136,6 @@ template std::pair louvain( int32_t *, size_t, double); -template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int64_t *, - size_t, - float); -template std::pair louvain( - raft::handle_t const &, - experimental::graph_view_t const &, - int64_t *, - size_t, - double); template std::pair louvain( raft::handle_t const &, experimental::graph_view_t const &, diff --git a/cpp/src/community/louvain.cuh b/cpp/src/community/louvain.cuh index 8cec3eccfe6..0e112e836e1 100644 --- a/cpp/src/community/louvain.cuh +++ b/cpp/src/community/louvain.cuh @@ -600,6 +600,8 @@ class Louvain { CHECK_CUDA(stream_); src_indices_v_.resize(graph.number_of_edges); + indices_v_.resize(graph.number_of_edges); + weights_v_.resize(graph.number_of_edges); } protected: diff --git a/cpp/src/experimental/louvain.cuh b/cpp/src/experimental/louvain.cuh index cadc685b119..1f6f8633bcd 100644 --- a/cpp/src/experimental/louvain.cuh +++ b/cpp/src/experimental/louvain.cuh @@ -15,11 +15,368 @@ */ #pragma once +#include + #include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +//#define TIMING + +#ifdef TIMING +#include +#endif + namespace cugraph { namespace experimental { +namespace detail { + +template +struct create_cuco_pair_t { + cuco::pair_type __device__ operator()(data_t data) + { + cuco::pair_type tmp; + tmp.first = data; + tmp.second = data_t{0}; + return tmp; + } +}; + +// +// These classes should allow cuco::static_map to generate hash tables of +// different configurations. +// + +// +// Compare edges based on src[e] and dst[e] matching +// +template +class src_dst_equality_comparator_t { + public: + src_dst_equality_comparator_t(rmm::device_vector const &src, + rmm::device_vector const &dst, + sentinel_t sentinel_value) + : d_src_{src.data().get()}, d_dst_{dst.data().get()}, sentinel_value_(sentinel_value) + { + } + + src_dst_equality_comparator_t(data_t const *d_src, data_t const *d_dst, sentinel_t sentinel_value) + : d_src_{d_src}, d_dst_{d_dst}, sentinel_value_(sentinel_value) + { + } + + template + __device__ bool operator()(idx_type lhs_index, idx_type rhs_index) const noexcept + { + return (lhs_index != sentinel_value_) && (rhs_index != sentinel_value_) && + (d_src_[lhs_index] == d_src_[rhs_index]) && (d_dst_[lhs_index] == d_dst_[rhs_index]); + } + + private: + data_t const *d_src_; + data_t const *d_dst_; + sentinel_t sentinel_value_; +}; + +// +// Hash edges based src[e] and dst[e] +// +template +class src_dst_hasher_t { + public: + src_dst_hasher_t(rmm::device_vector const &src, rmm::device_vector const &dst) + : d_src_{src.data().get()}, d_dst_{dst.data().get()} + { + } + + src_dst_hasher_t(data_t const *d_src, data_t const *d_dst) : d_src_{d_src}, d_dst_{d_dst} {} + + template + __device__ auto operator()(idx_type index) const + { + cuco::detail::MurmurHash3_32 hasher; + + auto h_src = hasher(d_src_[index]); + auto h_dst = hasher(d_dst_[index]); + + /* + * Combine the source hash and the dest hash into a single hash value + * + * Taken from the Boost hash_combine function + * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html + */ + h_src ^= h_dst + 0x9e3779b9 + (h_src << 6) + (h_src >> 2); + + return h_src; + } + + private: + data_t const *d_src_; + data_t const *d_dst_; +}; + +// +// Compare edges based on src[e] and cluster[dst[e]] matching +// +template +class src_cluster_equality_comparator_t { + public: + src_cluster_equality_comparator_t(rmm::device_vector const &src, + rmm::device_vector const &dst, + rmm::device_vector const &dst_cluster_cache, + data_t base_dst_id, + sentinel_t sentinel_value) + : d_src_{src.data().get()}, + d_dst_{dst.data().get()}, + d_dst_cluster_{dst_cluster_cache.data().get()}, + base_dst_id_(base_dst_id), + sentinel_value_(sentinel_value) + { + } + + src_cluster_equality_comparator_t(data_t const *d_src, + data_t const *d_dst, + data_t const *d_dst_cluster_cache, + data_t base_dst_id, + sentinel_t sentinel_value) + : d_src_{d_src}, + d_dst_{d_dst}, + d_dst_cluster_{d_dst_cluster_cache}, + base_dst_id_(base_dst_id), + sentinel_value_(sentinel_value) + { + } + + __device__ bool operator()(sentinel_t lhs_index, sentinel_t rhs_index) const noexcept + { + return (lhs_index != sentinel_value_) && (rhs_index != sentinel_value_) && + (d_src_[lhs_index] == d_src_[rhs_index]) && + (d_dst_cluster_[d_dst_[lhs_index] - base_dst_id_] == + d_dst_cluster_[d_dst_[rhs_index] - base_dst_id_]); + } + + private: + data_t const *d_src_; + data_t const *d_dst_; + data_t const *d_dst_cluster_; + data_t base_dst_id_; + sentinel_t sentinel_value_; +}; + +// +// Hash edges based src[e] and cluster[dst[e]] +// +template +class src_cluster_hasher_t { + public: + src_cluster_hasher_t(rmm::device_vector const &src, + rmm::device_vector const &dst, + rmm::device_vector const &dst_cluster_cache, + data_t base_dst_id) + : d_src_{src.data().get()}, + d_dst_{dst.data().get()}, + d_dst_cluster_{dst_cluster_cache.data().get()}, + base_dst_id_(base_dst_id) + { + } + + src_cluster_hasher_t(data_t const *d_src, + data_t const *d_dst, + data_t const *d_dst_cluster_cache, + data_t base_dst_id) + : d_src_{d_src}, d_dst_{d_dst}, d_dst_cluster_{d_dst_cluster_cache}, base_dst_id_(base_dst_id) + { + } + + template + __device__ auto operator()(idx_type index) const + { + cuco::detail::MurmurHash3_32 hasher; + + auto h_src = hasher(d_src_[index]); + auto h_cluster = hasher(d_dst_cluster_[d_dst_[index] - base_dst_id_]); + + /* + * Combine the source hash and the cluster hash into a single hash value + * + * Taken from the Boost hash_combine function + * https://www.boost.org/doc/libs/1_35_0/doc/html/boost/hash_combine_id241013.html + */ + h_src ^= h_cluster + 0x9e3779b9 + (h_src << 6) + (h_src >> 2); + + return h_src; + } + + private: + data_t const *d_src_; + data_t const *d_dst_; + data_t const *d_dst_cluster_; + data_t base_dst_id_; +}; + +// +// Skip edges where src[e] == dst[e] +// +template +class skip_edge_t { + public: + skip_edge_t(rmm::device_vector const &src, rmm::device_vector const &dst) + : d_src_{src.data().get()}, d_dst_{dst.data().get()} + { + } + + skip_edge_t(data_t const *src, data_t const *dst) : d_src_{src}, d_dst_{dst} {} + + template + __device__ auto operator()(idx_type index) const + { + return d_src_[index] == d_dst_[index]; + } + + private: + data_t const *d_src_; + data_t const *d_dst_; +}; + +template +struct lookup_by_vertex_id { + public: + lookup_by_vertex_id(data_t const *d_array, vertex_t const *d_vertices, vertex_t base_vertex_id) + : d_array_(d_array), d_vertices_(d_vertices), base_vertex_id_(base_vertex_id) + { + } + + template + data_t operator() __device__(edge_t edge_id) const + { + return d_array_[d_vertices_[edge_id] - base_vertex_id_]; + } + + private: + data_t const *d_array_; + vertex_t const *d_vertices_; + vertex_t base_vertex_id_; +}; + +template +vector_t remove_elements_from_vector(vector_t const &input_v, + iterator_t iterator_begin, + iterator_t iterator_end, + function_t function, + cudaStream_t stream) +{ + vector_t temp_v(input_v.size()); + + auto last = thrust::copy_if( + rmm::exec_policy(stream)->on(stream), iterator_begin, iterator_end, temp_v.begin(), function); + + temp_v.resize(thrust::distance(temp_v.begin(), last)); + + return temp_v; +} + +template +vector_t remove_elements_from_vector(vector_t const &input_v, + function_t function, + cudaStream_t stream) +{ + return remove_elements_from_vector(input_v, input_v.begin(), input_v.end(), function, stream); +} + +// FIXME: This should be a generic utility. The one in cython.cu +// is very close to this +template * = nullptr> +std::unique_ptr> +create_graph(raft::handle_t const &handle, + rmm::device_vector const &src_v, + rmm::device_vector const &dst_v, + rmm::device_vector const &weight_v, + std::size_t num_local_verts, + experimental::graph_properties_t graph_props, + view_t const &view) +{ + std::vector> edgelist( + {{src_v.data().get(), + dst_v.data().get(), + weight_v.data().get(), + static_cast(src_v.size())}}); + + return std::make_unique>( + handle, + edgelist, + view.get_partition(), + num_local_verts, + src_v.size(), + graph_props, + false, + false); +} + +template * = nullptr> +std::unique_ptr> +create_graph(raft::handle_t const &handle, + rmm::device_vector const &src_v, + rmm::device_vector const &dst_v, + rmm::device_vector const &weight_v, + std::size_t num_local_verts, + experimental::graph_properties_t graph_props, + view_t const &view) +{ + experimental::edgelist_t edgelist{ + src_v.data().get(), + dst_v.data().get(), + weight_v.data().get(), + static_cast(src_v.size())}; + + return std::make_unique>( + handle, edgelist, num_local_verts, graph_props, false, false); +} + +} // namespace detail + +// +// FIXME: Ultimately, this would be cleaner and more efficient if we did the following: +// +// 1) Create an object that does a single level Louvain computation on an input graph +// (no graph contraction) +// 2) Create an object that does graph contraction +// 3) Create Louvain to use these objects in sequence to compute the aggregate result. +// +// In MNMG-world, the graph contraction step is going to create another graph that likely +// fits efficiently in a smaller number of GPUs (eventually one). Decomposing the algorithm +// as above would allow us to eventually run the single GPU version of single level Louvain +// on the contracted graphs - which should be more efficient. +// +// FIXME: We should return the dendogram and let the python layer clean it up (or have a +// separate C++ function to flatten the dendogram). There are customers that might +// like the dendogram and the implementation would be a bit cleaner if we did the +// collapsing as a separate step +// template class Louvain { public: @@ -34,21 +391,1327 @@ class Louvain { graph_view_t::is_multi_gpu>; Louvain(raft::handle_t const &handle, graph_view_t const &graph_view) - : handle_(handle), current_graph_view_(graph_view) + : +#ifdef TIMING + hr_timer_(), +#endif + handle_(handle), + current_graph_view_(graph_view), + compute_partition_(graph_view), + local_num_vertices_(graph_view.get_number_of_local_vertices()), + local_num_rows_(graph_view.get_number_of_local_adj_matrix_partition_rows()), + local_num_cols_(graph_view.get_number_of_local_adj_matrix_partition_cols()), + local_num_edges_(graph_view.get_number_of_edges()), + vertex_weights_v_(graph_view.get_number_of_local_vertices()), + cluster_weights_v_(graph_view.get_number_of_local_vertices()), + cluster_v_(graph_view.get_number_of_local_vertices()), + number_of_vertices_(graph_view.get_number_of_local_vertices()), + stream_(handle.get_stream()) { + if (graph_view_t::is_multi_gpu) { + rank_ = handle.get_comms().get_rank(); + base_vertex_id_ = graph_view.get_local_vertex_first(); + base_src_vertex_id_ = graph_view.get_local_adj_matrix_partition_row_first(0); + base_dst_vertex_id_ = graph_view.get_local_adj_matrix_partition_col_first(0); + + raft::copy(&local_num_edges_, + graph_view.offsets() + graph_view.get_local_adj_matrix_partition_row_last(0) - + graph_view.get_local_adj_matrix_partition_row_first(0), + 1, + stream_); + + CUDA_TRY(cudaStreamSynchronize(stream_)); + } + + src_indices_v_.resize(local_num_edges_); + + cugraph::detail::offsets_to_indices( + current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); + + if (base_src_vertex_id_ > 0) { + thrust::transform(rmm::exec_policy(stream_)->on(stream_), + src_indices_v_.begin(), + src_indices_v_.end(), + thrust::make_constant_iterator(base_src_vertex_id_), + src_indices_v_.begin(), + thrust::plus()); + } } virtual std::pair operator()(vertex_t *d_cluster_vec, size_t max_level, weight_t resolution) { - CUGRAPH_FAIL("unimplemented"); + size_t num_level{0}; + + weight_t total_edge_weight; + total_edge_weight = experimental::transform_reduce_e( + handle_, + current_graph_view_, + thrust::make_constant_iterator(0), + thrust::make_constant_iterator(0), + [] __device__(auto, auto, weight_t wt, auto, auto) { return wt; }, + weight_t{0}); + + weight_t best_modularity = weight_t{-1}; + + // + // Initialize every cluster to reference each vertex to itself + // + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + cluster_v_.begin(), + cluster_v_.end(), + base_vertex_id_); + thrust::copy( + rmm::exec_policy(stream_)->on(stream_), cluster_v_.begin(), cluster_v_.end(), d_cluster_vec); + + while (num_level < max_level) { + compute_vertex_and_cluster_weights(); + + weight_t new_Q = update_clustering(total_edge_weight, resolution); + + if (new_Q <= best_modularity) { break; } + + best_modularity = new_Q; + + shrink_graph(d_cluster_vec); + + num_level++; + } + + timer_display(std::cout); + + return std::make_pair(num_level, best_modularity); + } + + protected: + void timer_start(std::string const ®ion) + { +#ifdef TIMING + if (rank_ == 0) hr_timer_.start(region); +#endif + } + + void timer_stop(cudaStream_t stream) + { +#ifdef TIMING + if (rank_ == 0) { + CUDA_TRY(cudaStreamSynchronize(stream)); + hr_timer_.stop(); + } +#endif + } + + void timer_display(std::ostream &os) + { +#ifdef TIMING + if (rank_ == 0) hr_timer_.display(os); +#endif + } + + public: + weight_t modularity(weight_t total_edge_weight, weight_t resolution) + { + weight_t sum_degree_squared = experimental::transform_reduce_v( + handle_, + current_graph_view_, + cluster_weights_v_.begin(), + [] __device__(weight_t p) { return p * p; }, + weight_t{0}); + + weight_t sum_internal = experimental::transform_reduce_e( + handle_, + current_graph_view_, + src_cluster_cache_v_.begin(), + dst_cluster_cache_v_.begin(), + [] __device__(auto src, auto dst, weight_t wt, auto src_cluster, auto nbr_cluster) { + if (src_cluster == nbr_cluster) { + return wt; + } else { + return weight_t{0}; + } + }, + weight_t{0}); + + weight_t Q = sum_internal / total_edge_weight - + (resolution * sum_degree_squared) / (total_edge_weight * total_edge_weight); + + return Q; + } + + void compute_vertex_and_cluster_weights() + { + timer_start("compute_vertex_and_cluster_weights"); + + experimental::copy_v_transform_reduce_out_nbr( + handle_, + current_graph_view_, + thrust::make_constant_iterator(0), + thrust::make_constant_iterator(0), + [] __device__(auto src, auto, auto wt, auto, auto) { return wt; }, + weight_t{0}, + vertex_weights_v_.begin()); + + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + vertex_weights_v_.begin(), + vertex_weights_v_.end(), + cluster_weights_v_.begin()); + + cache_vertex_properties( + vertex_weights_v_, src_vertex_weights_cache_v_, dst_vertex_weights_cache_v_); + + cache_vertex_properties( + cluster_weights_v_, src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + + timer_stop(stream_); + } + + // + // FIXME: Consider returning d_src_cache and d_dst_cache + // (as a pair). This would be a nice optimization + // for single GPU, as we wouldn't need to make 3 copies + // of the data, could return a pair of device pointers to + // local_input_v. + // + template + void cache_vertex_properties(rmm::device_vector const &local_input_v, + rmm::device_vector &src_cache_v, + rmm::device_vector &dst_cache_v, + bool src = true, + bool dst = true) + { + if (src) { + src_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_rows()); + copy_to_adj_matrix_row( + handle_, current_graph_view_, local_input_v.begin(), src_cache_v.begin()); + } + + if (dst) { + dst_cache_v.resize(current_graph_view_.get_number_of_local_adj_matrix_partition_cols()); + copy_to_adj_matrix_col( + handle_, current_graph_view_, local_input_v.begin(), dst_cache_v.begin()); + } + } + + virtual weight_t update_clustering(weight_t total_edge_weight, weight_t resolution) + { + timer_start("update_clustering"); + + rmm::device_vector next_cluster_v(cluster_v_); + + cache_vertex_properties(next_cluster_v, src_cluster_cache_v_, dst_cluster_cache_v_); + + weight_t new_Q = modularity(total_edge_weight, resolution); + weight_t cur_Q = new_Q - 1; + + // To avoid the potential of having two vertices swap clusters + // we will only allow vertices to move up (true) or down (false) + // during each iteration of the loop + bool up_down = true; + + while (new_Q > (cur_Q + 0.0001)) { + cur_Q = new_Q; + + update_by_delta_modularity(total_edge_weight, resolution, next_cluster_v, up_down); + + up_down = !up_down; + + cache_vertex_properties(next_cluster_v, src_cluster_cache_v_, dst_cluster_cache_v_); + + new_Q = modularity(total_edge_weight, resolution); + + if (new_Q > cur_Q) { + thrust::copy(rmm::exec_policy(stream_)->on(stream_), + next_cluster_v.begin(), + next_cluster_v.end(), + cluster_v_.begin()); + } + } + + // cache the final clustering locally on each cpu + cache_vertex_properties(cluster_v_, src_cluster_cache_v_, dst_cluster_cache_v_); + + timer_stop(stream_); + return cur_Q; + } + + void update_by_delta_modularity(weight_t total_edge_weight, + weight_t resolution, + rmm::device_vector &next_cluster_v, + bool up_down) + { + rmm::device_vector old_cluster_sum_v(local_num_vertices_); + rmm::device_vector src_old_cluster_sum_cache_v; + + experimental::copy_v_transform_reduce_out_nbr( + handle_, + current_graph_view_, + src_cluster_cache_v_.begin(), + dst_cluster_cache_v_.begin(), + [] __device__(auto src, auto dst, auto wt, auto src_cluster, auto nbr_cluster) { + if ((src != dst) && (src_cluster == nbr_cluster)) { + return wt; + } else + return weight_t{0}; + }, + weight_t{0}, + old_cluster_sum_v.begin()); + + cache_vertex_properties( + old_cluster_sum_v, src_old_cluster_sum_cache_v, empty_cache_weight_v_, true, false); + + detail::src_cluster_equality_comparator_t compare( + src_indices_v_.data().get(), + current_graph_view_.indices(), + dst_cluster_cache_v_.data().get(), + base_dst_vertex_id_, + std::numeric_limits::max()); + detail::src_cluster_hasher_t hasher(src_indices_v_.data().get(), + current_graph_view_.indices(), + dst_cluster_cache_v_.data().get(), + base_dst_vertex_id_); + detail::skip_edge_t skip_edge(src_indices_v_.data().get(), + current_graph_view_.indices()); + + // + // Group edges that lead from same source to same neighboring cluster together + // local_cluster_edge_ids_v will contain edge ids of unique pairs of (src,nbr_cluster). + // If multiple edges exist, one edge id will be chosen (by a parallel race). + // nbr_weights_v will contain the combined weight of all of the edges that connect + // that pair. + // + rmm::device_vector local_cluster_edge_ids_v; + rmm::device_vector nbr_weights_v; + + // + // Perform this combining on the local edges + // + std::tie(local_cluster_edge_ids_v, nbr_weights_v) = combine_local_src_nbr_cluster_weights( + hasher, compare, skip_edge, current_graph_view_.weights(), local_num_edges_); + + // + // In order to compute delta_Q for a given src/nbr_cluster pair, I need the following + // information: + // src + // old_cluster - the cluster that src is currently assigned to + // nbr_cluster + // sum of edges going to new cluster + // vertex weight of the src vertex + // sum of edges going to old cluster + // cluster_weights of old cluster + // cluster_weights of nbr_cluster + // + // Each GPU has locally cached: + // The sum of edges going to the old cluster (computed from + // experimental::copy_v_transform_reduce_out_nbr call above. + // old_cluster + // nbr_cluster + // vertex weight of src vertex + // partial sum of edges going to the new cluster (in nbr_weights) + // + // So the plan is to take the tuple: + // (src, old_cluster, src_vertex_weight, old_cluster_sum, nbr_cluster, nbr_weights) + // and shuffle it around the cluster so that they arrive at the GPU where the pair + // (old_cluster, new_cluster) would be assigned. Then we can aggregate this information + // and compute the delta_Q values. + // + + // + // Define the communication pattern, we're going to send detail + // for edge i to the GPU that is responsible for the vertex + // pair (cluster[src[i]], cluster[dst[i]]) + // + auto communication_schedule = thrust::make_transform_iterator( + local_cluster_edge_ids_v.begin(), + [d_edge_device_view = compute_partition_.edge_device_view(), + d_src_indices = src_indices_v_.data().get(), + d_src_cluster = src_cluster_cache_v_.data().get(), + d_dst_indices = current_graph_view_.indices(), + d_dst_cluster = dst_cluster_cache_v_.data().get(), + base_src_vertex_id = base_src_vertex_id_, + base_dst_vertex_id = base_dst_vertex_id_] __device__(edge_t edge_id) { + return d_edge_device_view(d_src_cluster[d_src_indices[edge_id] - base_src_vertex_id], + d_dst_cluster[d_dst_indices[edge_id] - base_dst_vertex_id]); + }); + + // FIXME: This should really be a variable_shuffle of a tuple, for time + // reasons I'm just doing 6 independent shuffles. + // + rmm::device_vector ocs_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_transform_iterator( + local_cluster_edge_ids_v.begin(), + detail::lookup_by_vertex_id(src_old_cluster_sum_cache_v.data().get(), + src_indices_v_.data().get(), + base_src_vertex_id_)), + communication_schedule); + + rmm::device_vector src_cluster_v = + variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_transform_iterator( + local_cluster_edge_ids_v.begin(), + detail::lookup_by_vertex_id( + src_cluster_cache_v_.data().get(), src_indices_v_.data().get(), base_src_vertex_id_)), + communication_schedule); + + rmm::device_vector src_vertex_weight_v = + variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_transform_iterator( + local_cluster_edge_ids_v.begin(), + detail::lookup_by_vertex_id(src_vertex_weights_cache_v_.data().get(), + src_indices_v_.data().get(), + base_src_vertex_id_)), + communication_schedule); + + rmm::device_vector src_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_permutation_iterator(src_indices_v_.begin(), local_cluster_edge_ids_v.begin()), + communication_schedule); + + rmm::device_vector nbr_cluster_v = + variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_transform_iterator( + local_cluster_edge_ids_v.begin(), + detail::lookup_by_vertex_id( + dst_cluster_cache_v_.data().get(), current_graph_view_.indices(), base_dst_vertex_id_)), + communication_schedule); + + nbr_weights_v = variable_shuffle( + handle_, nbr_weights_v.size(), nbr_weights_v.begin(), communication_schedule); + + // + // At this point, src_v, nbr_cluster_v and nbr_weights_v have been + // shuffled to the correct GPU. We can now compute the final + // value of delta_Q for each neigboring cluster + // + // Again, we'll combine edges that connect the same source to the same + // neighboring cluster and sum their weights. + // + detail::src_dst_equality_comparator_t compare2( + src_v, nbr_cluster_v, std::numeric_limits::max()); + detail::src_dst_hasher_t hasher2(src_v, nbr_cluster_v); + + auto skip_edge2 = [] __device__(auto) { return false; }; + + std::tie(local_cluster_edge_ids_v, nbr_weights_v) = combine_local_src_nbr_cluster_weights( + hasher2, compare2, skip_edge2, nbr_weights_v.data().get(), src_v.size()); + + // + // Now local_cluster_edge_ids_v contains the edge ids of the src id/dest + // cluster id pairs, and nbr_weights_v contains the weight of edges + // going to that cluster id + // + // Now we can compute (locally) each delta_Q value + // + auto iter = thrust::make_zip_iterator( + thrust::make_tuple(local_cluster_edge_ids_v.begin(), nbr_weights_v.begin())); + + thrust::transform(rmm::exec_policy(stream_)->on(stream_), + iter, + iter + local_cluster_edge_ids_v.size(), + nbr_weights_v.begin(), + [total_edge_weight, + resolution, + d_src = src_v.data().get(), + d_src_cluster = src_cluster_v.data().get(), + d_nbr_cluster = nbr_cluster_v.data().get(), + d_src_vertex_weights = src_vertex_weight_v.data().get(), + d_src_cluster_weights = src_cluster_weights_cache_v_.data().get(), + d_dst_cluster_weights = dst_cluster_weights_cache_v_.data().get(), + d_ocs = ocs_v.data().get(), + base_src_vertex_id = base_src_vertex_id_, + base_dst_vertex_id = base_dst_vertex_id_] __device__(auto tuple) { + edge_t edge_id = thrust::get<0>(tuple); + vertex_t nbr_cluster = d_nbr_cluster[edge_id]; + weight_t new_cluster_sum = thrust::get<1>(tuple); + vertex_t old_cluster = d_src_cluster[edge_id]; + weight_t k_k = d_src_vertex_weights[edge_id]; + weight_t old_cluster_sum = d_ocs[edge_id]; + + weight_t a_old = d_src_cluster_weights[old_cluster - base_src_vertex_id]; + weight_t a_new = d_dst_cluster_weights[nbr_cluster - base_dst_vertex_id]; + + return 2 * (((new_cluster_sum - old_cluster_sum) / total_edge_weight) - + resolution * (a_new * k_k - a_old * k_k + k_k * k_k) / + (total_edge_weight * total_edge_weight)); + }); + + // + // Pick the largest delta_Q value for each vertex on this gpu. + // Then we will shuffle back to the gpu by vertex id + // + rmm::device_vector final_src_v(local_cluster_edge_ids_v.size()); + rmm::device_vector final_nbr_cluster_v(local_cluster_edge_ids_v.size()); + rmm::device_vector final_nbr_weights_v(local_cluster_edge_ids_v.size()); + + auto final_input_iter = thrust::make_zip_iterator(thrust::make_tuple( + thrust::make_permutation_iterator(src_v.begin(), local_cluster_edge_ids_v.begin()), + thrust::make_permutation_iterator(nbr_cluster_v.begin(), local_cluster_edge_ids_v.begin()), + nbr_weights_v.begin())); + + auto final_output_iter = thrust::make_zip_iterator(thrust::make_tuple( + final_src_v.begin(), final_nbr_cluster_v.begin(), final_nbr_weights_v.begin())); + + auto final_output_pos = + thrust::copy_if(rmm::exec_policy(stream_)->on(stream_), + final_input_iter, + final_input_iter + local_cluster_edge_ids_v.size(), + final_output_iter, + [] __device__(auto p) { return (thrust::get<2>(p) > weight_t{0}); }); + + final_src_v.resize(thrust::distance(final_output_iter, final_output_pos)); + final_nbr_cluster_v.resize(thrust::distance(final_output_iter, final_output_pos)); + final_nbr_weights_v.resize(thrust::distance(final_output_iter, final_output_pos)); + + // + // Sort the results, pick the largest version + // + thrust::sort(rmm::exec_policy(stream_)->on(stream_), + thrust::make_zip_iterator(thrust::make_tuple( + final_src_v.begin(), final_nbr_weights_v.begin(), final_nbr_cluster_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple( + final_src_v.end(), final_nbr_weights_v.end(), final_nbr_cluster_v.begin())), + [] __device__(auto left, auto right) { + if (thrust::get<0>(left) < thrust::get<0>(right)) return true; + if (thrust::get<0>(left) > thrust::get<0>(right)) return false; + if (thrust::get<1>(left) > thrust::get<1>(right)) return true; + if (thrust::get<1>(left) < thrust::get<1>(right)) return false; + return (thrust::get<2>(left) < thrust::get<2>(right)); + }); + + // + // Now that we're sorted the first entry for each src value is the largest. + // + local_cluster_edge_ids_v.resize(final_src_v.size()); + + thrust::transform(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(final_src_v.size()), + local_cluster_edge_ids_v.begin(), + [sentinel = std::numeric_limits::max(), + d_src = final_src_v.data().get()] __device__(edge_t edge_id) { + if (edge_id == 0) { return edge_id; } + + if (d_src[edge_id - 1] != d_src[edge_id]) { return edge_id; } + + return sentinel; + }); + + local_cluster_edge_ids_v = detail::remove_elements_from_vector( + local_cluster_edge_ids_v, + [sentinel = std::numeric_limits::max()] __device__(auto edge_id) { + return (edge_id != sentinel); + }, + stream_); + + final_nbr_cluster_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), + local_cluster_edge_ids_v.begin()), + thrust::make_transform_iterator( + thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + })); + + final_nbr_weights_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_permutation_iterator(final_nbr_weights_v.begin(), + local_cluster_edge_ids_v.begin()), + thrust::make_transform_iterator( + thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + })); + + final_src_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), + thrust::make_transform_iterator( + thrust::make_permutation_iterator(final_src_v.begin(), local_cluster_edge_ids_v.begin()), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + })); + + // + // At this point... + // final_src_v contains the source indices + // final_nbr_cluster_v contains the neighboring clusters + // final_nbr_weights_v contains delta_Q for moving src to the neighboring + // + // They have been shuffled to the gpus responsible for their source vertex + // + // FIXME: Think about how this should work. + // I think Leiden is broken. I don't think that the code we have + // actually does anything. For now I'm going to ignore Leiden in + // MNMG, we can reconsider this later. + // + // If we ignore Leiden, I'd like to think about whether the reduction + // should occur now... + // + + // + // Sort the results, pick the largest version + // + thrust::sort(rmm::exec_policy(stream_)->on(stream_), + thrust::make_zip_iterator(thrust::make_tuple( + final_src_v.begin(), final_nbr_weights_v.begin(), final_nbr_cluster_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple( + final_src_v.end(), final_nbr_weights_v.end(), final_nbr_cluster_v.begin())), + [] __device__(auto left, auto right) { + if (thrust::get<0>(left) < thrust::get<0>(right)) return true; + if (thrust::get<0>(left) > thrust::get<0>(right)) return false; + if (thrust::get<1>(left) > thrust::get<1>(right)) return true; + if (thrust::get<1>(left) < thrust::get<1>(right)) return false; + return (thrust::get<2>(left) < thrust::get<2>(right)); + }); + + // + // Now that we're sorted (ascending), the last entry for each src value is the largest. + // + local_cluster_edge_ids_v.resize(final_src_v.size()); + + thrust::transform(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(final_src_v.size()), + local_cluster_edge_ids_v.begin(), + [sentinel = std::numeric_limits::max(), + d_src = final_src_v.data().get()] __device__(edge_t edge_id) { + if (edge_id == 0) { return edge_id; } + + if (d_src[edge_id - 1] != d_src[edge_id]) { return edge_id; } + + return sentinel; + }); + + local_cluster_edge_ids_v = detail::remove_elements_from_vector( + local_cluster_edge_ids_v, + [sentinel = std::numeric_limits::max()] __device__(auto edge_id) { + return (edge_id != sentinel); + }, + stream_); + + rmm::device_vector cluster_increase_v(final_src_v.size()); + rmm::device_vector cluster_decrease_v(final_src_v.size()); + rmm::device_vector old_cluster_v(final_src_v.size()); + + // + // Then we can, on each gpu, do a local assignment for all of the + // vertices assigned to that gpu using the up_down logic + // + local_cluster_edge_ids_v = detail::remove_elements_from_vector( + local_cluster_edge_ids_v, + local_cluster_edge_ids_v.begin(), + local_cluster_edge_ids_v.end(), + [d_final_src = final_src_v.data().get(), + d_final_nbr_cluster = final_nbr_cluster_v.data().get(), + d_final_nbr_weights = final_nbr_weights_v.data().get(), + d_cluster_increase = cluster_increase_v.data().get(), + d_cluster_decrease = cluster_decrease_v.data().get(), + d_vertex_weights = src_vertex_weights_cache_v_.data().get(), + d_next_cluster = next_cluster_v.data().get(), + d_old_cluster = old_cluster_v.data().get(), + base_vertex_id = base_vertex_id_, + base_src_vertex_id = base_src_vertex_id_, + up_down] __device__(edge_t idx) { + vertex_t src = d_final_src[idx]; + vertex_t new_cluster = d_final_nbr_cluster[idx]; + vertex_t old_cluster = d_next_cluster[src - base_vertex_id]; + weight_t src_weight = d_vertex_weights[src - base_src_vertex_id]; + + if (d_final_nbr_weights[idx] <= weight_t{0}) return false; + if (new_cluster == old_cluster) return false; + if ((new_cluster > old_cluster) != up_down) return false; + + d_next_cluster[src - base_vertex_id] = new_cluster; + d_cluster_increase[idx] = src_weight; + d_cluster_decrease[idx] = src_weight; + d_old_cluster[idx] = old_cluster; + return true; + }, + stream_); + + cluster_increase_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_permutation_iterator(cluster_increase_v.begin(), + local_cluster_edge_ids_v.begin()), + thrust::make_transform_iterator( + thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), + local_cluster_edge_ids_v.begin()), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + })); + + final_nbr_cluster_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), + local_cluster_edge_ids_v.begin()), + thrust::make_transform_iterator( + thrust::make_permutation_iterator(final_nbr_cluster_v.begin(), + local_cluster_edge_ids_v.begin()), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + })); + + cluster_decrease_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_permutation_iterator(cluster_decrease_v.begin(), + local_cluster_edge_ids_v.begin()), + thrust::make_transform_iterator( + thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + })); + + old_cluster_v = variable_shuffle( + handle_, + local_cluster_edge_ids_v.size(), + thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), + thrust::make_transform_iterator( + thrust::make_permutation_iterator(old_cluster_v.begin(), local_cluster_edge_ids_v.begin()), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + })); + + thrust::for_each(rmm::exec_policy(stream_)->on(stream_), + thrust::make_zip_iterator( + thrust::make_tuple(final_nbr_cluster_v.begin(), cluster_increase_v.begin())), + thrust::make_zip_iterator( + thrust::make_tuple(final_nbr_cluster_v.end(), cluster_increase_v.end())), + [d_cluster_weights = cluster_weights_v_.data().get(), + base_vertex_id = base_vertex_id_] __device__(auto p) { + vertex_t cluster_id = thrust::get<0>(p); + weight_t weight = thrust::get<1>(p); + + atomicAdd(d_cluster_weights + cluster_id - base_vertex_id, weight); + }); + + thrust::for_each( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_zip_iterator( + thrust::make_tuple(old_cluster_v.begin(), cluster_decrease_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(old_cluster_v.end(), cluster_decrease_v.end())), + [d_cluster_weights = cluster_weights_v_.data().get(), + base_vertex_id = base_vertex_id_] __device__(auto p) { + vertex_t cluster_id = thrust::get<0>(p); + weight_t weight = thrust::get<1>(p); + + atomicAdd(d_cluster_weights + cluster_id - base_vertex_id, -weight); + }); + + cache_vertex_properties( + cluster_weights_v_, src_cluster_weights_cache_v_, dst_cluster_weights_cache_v_); + } + + template + std::pair, rmm::device_vector> + combine_local_src_nbr_cluster_weights(hash_t hasher, + compare_t compare, + skip_edge_t skip_edge, + weight_t const *d_weights, + count_t num_weights) + { + rmm::device_vector relevant_edges_v; + rmm::device_vector relevant_edge_weights_v; + + if (num_weights > 0) { + std::size_t capacity{static_cast(num_weights / 0.7)}; + + cuco::static_map hash_map( + capacity, std::numeric_limits::max(), count_t{0}); + detail::create_cuco_pair_t create_cuco_pair; + + CUDA_TRY(cudaStreamSynchronize(stream_)); + + hash_map.insert(thrust::make_transform_iterator(thrust::make_counting_iterator(0), + create_cuco_pair), + thrust::make_transform_iterator( + thrust::make_counting_iterator(num_weights), create_cuco_pair), + hasher, + compare); + + CUDA_TRY(cudaStreamSynchronize(stream_)); + + relevant_edges_v.resize(num_weights); + + relevant_edges_v = detail::remove_elements_from_vector( + relevant_edges_v, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(num_weights), + [d_hash_map = hash_map.get_device_view(), hasher, compare] __device__(count_t idx) { + auto pos = d_hash_map.find(idx, hasher, compare); + return (pos->first == idx); + }, + stream_); + + thrust::for_each_n( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + relevant_edges_v.size(), + [d_hash_map = hash_map.get_device_view(), + hasher, + compare, + d_relevant_edges = relevant_edges_v.data().get()] __device__(count_t idx) mutable { + count_t edge_id = d_relevant_edges[idx]; + auto pos = d_hash_map.find(edge_id, hasher, compare); + pos->second.store(idx); + }); + + relevant_edge_weights_v.resize(relevant_edges_v.size()); + thrust::fill(rmm::exec_policy(stream_)->on(stream_), + relevant_edge_weights_v.begin(), + relevant_edge_weights_v.end(), + weight_t{0}); + + thrust::for_each_n( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + num_weights, + [d_hash_map = hash_map.get_device_view(), + hasher, + compare, + skip_edge, + d_relevant_edge_weights = relevant_edge_weights_v.data().get(), + d_weights] __device__(count_t idx) { + if (!skip_edge(idx)) { + auto pos = d_hash_map.find(idx, hasher, compare); + if (pos != d_hash_map.end()) { + atomicAdd(d_relevant_edge_weights + pos->second.load(cuda::std::memory_order_relaxed), + d_weights[idx]); + } + } + }); + } + + return std::make_pair(relevant_edges_v, relevant_edge_weights_v); + } + + void shrink_graph(vertex_t *d_cluster_vec) + { + timer_start("shrinking graph"); + + std::size_t capacity{static_cast((local_num_rows_ + local_num_cols_) / 0.7)}; + + cuco::static_map hash_map( + capacity, std::numeric_limits::max(), std::numeric_limits::max()); + + // renumber the clusters to the range 0..(num_clusters-1) + vertex_t num_clusters = renumber_clusters(hash_map); + + renumber_result(hash_map, d_cluster_vec, num_clusters); + + // shrink our graph to represent the graph of supervertices + generate_supervertices_graph(hash_map, num_clusters); + + // assign each new vertex to its own cluster + // MNMG: This can be done locally with no communication required + thrust::sequence(rmm::exec_policy(stream_)->on(stream_), + cluster_v_.begin(), + cluster_v_.end(), + base_vertex_id_); + + timer_stop(stream_); + } + + vertex_t renumber_clusters(cuco::static_map &hash_map) + { + rmm::device_vector cluster_inverse_v(local_num_vertices_, vertex_t{0}); + + // + // FIXME: Faster to iterate from graph_.get_vertex_partition_first() + // to graph_.get_vertex_partition_last()? That would potentially + // result in adding a cluster that isn't used on this GPU, + // although I don't think it would break the result in any way. + // + // This would also eliminate this use of src_indices_v_. + // + auto it_src = thrust::make_transform_iterator( + src_indices_v_.begin(), + [base_src_vertex_id = base_src_vertex_id_, + d_src_cluster_cache = src_cluster_cache_v_.data().get()] __device__(auto idx) { + return detail::create_cuco_pair_t()( + d_src_cluster_cache[idx - base_src_vertex_id]); + }); + + auto it_dst = thrust::make_transform_iterator( + current_graph_view_.indices(), + [base_dst_vertex_id = base_dst_vertex_id_, + d_dst_cluster_cache = dst_cluster_cache_v_.data().get()] __device__(auto idx) { + return detail::create_cuco_pair_t()( + d_dst_cluster_cache[idx - base_dst_vertex_id]); + }); + + hash_map.insert(it_src, it_src + local_num_edges_); + hash_map.insert(it_dst, it_dst + local_num_edges_); + + // Now I need to get the keys into an array and shuffle them + rmm::device_vector used_cluster_ids_v(hash_map.get_size()); + + auto transform_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [d_hash_map = hash_map.get_device_view()] __device__(std::size_t idx) { + return d_hash_map.begin_slot()[idx].first.load(); + }); + + used_cluster_ids_v = detail::remove_elements_from_vector( + used_cluster_ids_v, + transform_iter, + transform_iter + hash_map.get_capacity(), + [vmax = std::numeric_limits::max()] __device__(vertex_t cluster) { + return cluster != vmax; + }, + stream_); + + auto partition_cluster_ids_iter = thrust::make_transform_iterator( + used_cluster_ids_v.begin(), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + }); + + rmm::device_vector original_gpus_v; + rmm::device_vector my_cluster_ids_v = + variable_shuffle( + handle_, used_cluster_ids_v.size(), used_cluster_ids_v.begin(), partition_cluster_ids_iter); + + if (graph_view_t::is_multi_gpu) { + original_gpus_v = variable_shuffle( + handle_, + used_cluster_ids_v.size(), + thrust::make_constant_iterator(rank_), + partition_cluster_ids_iter); + } + + // + // Now my_cluster_ids contains the cluster ids that this gpu is + // responsible for. I'm going to set cluster_inverse_v to one + // for each cluster in this list. + // + thrust::for_each( + rmm::exec_policy(stream_)->on(stream_), + my_cluster_ids_v.begin(), + my_cluster_ids_v.end(), + [base_vertex_id = base_vertex_id_, + d_cluster_inverse = cluster_inverse_v.data().get()] __device__(vertex_t cluster) { + d_cluster_inverse[cluster - base_vertex_id] = 1; + }); + + rmm::device_vector my_cluster_ids_deduped_v = detail::remove_elements_from_vector( + my_cluster_ids_v, + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(cluster_inverse_v.size()), + [d_cluster_inverse = cluster_inverse_v.data().get()] __device__(auto idx) { + return d_cluster_inverse[idx] == 1; + }, + stream_); + + // + // Need to gather everything to be able to compute base addresses + // + vertex_t base_address{0}; + + if (graph_view_t::is_multi_gpu) { + int num_gpus{1}; + rmm::device_vector sizes_v(num_gpus + 1, my_cluster_ids_deduped_v.size()); + + handle_.get_comms().allgather( + sizes_v.data().get() + num_gpus, sizes_v.data().get(), num_gpus, stream_); + + base_address = thrust::reduce(rmm::exec_policy(stream_)->on(stream_), + sizes_v.begin(), + sizes_v.begin() + rank_, + vertex_t{0}); + } + + // + // Now let's update cluster_inverse_v to contain + // the mapping of old cluster id to new vertex id + // + thrust::fill( + cluster_inverse_v.begin(), cluster_inverse_v.end(), std::numeric_limits::max()); + + thrust::for_each_n(rmm::exec_policy(stream_)->on(stream_), + thrust::make_counting_iterator(0), + my_cluster_ids_deduped_v.size(), + [base_address, + d_my_cluster_ids_deduped = my_cluster_ids_deduped_v.data().get(), + d_cluster_inverse = cluster_inverse_v.data().get()] __device__(auto idx) { + d_cluster_inverse[d_my_cluster_ids_deduped[idx]] = idx + base_address; + }); + + // + // Now I need to shuffle back to original gpus the + // subset of my mapping that is required + // + rmm::device_vector new_vertex_ids_v = + variable_shuffle( + handle_, + my_cluster_ids_v.size(), + thrust::make_transform_iterator(my_cluster_ids_v.begin(), + [d_cluster_inverse = cluster_inverse_v.data().get(), + base_vertex_id = base_vertex_id_] __device__(auto v) { + return d_cluster_inverse[v - base_vertex_id]; + }), + original_gpus_v.begin()); + + if (graph_view_t::is_multi_gpu) { + my_cluster_ids_v = variable_shuffle( + handle_, my_cluster_ids_v.size(), my_cluster_ids_v.begin(), original_gpus_v.begin()); + } + + // + // Now update the hash map with the new vertex id + // + thrust::for_each_n(rmm::exec_policy(stream_)->on(stream_), + thrust::make_zip_iterator( + thrust::make_tuple(my_cluster_ids_v.begin(), new_vertex_ids_v.begin())), + my_cluster_ids_v.size(), + [d_hash_map = hash_map.get_device_view()] __device__(auto p) mutable { + auto pos = d_hash_map.find(thrust::get<0>(p)); + pos->second.store(thrust::get<1>(p)); + }); + + // + // At this point we have a renumbered COO that is + // improperly distributed around the cluster, which + // will be fixed by generate_supervertices_graph + // + if (graph_t::is_multi_gpu) { + return host_scalar_allreduce( + handle_.get_comms(), static_cast(my_cluster_ids_deduped_v.size()), stream_); + } else { + return static_cast(my_cluster_ids_deduped_v.size()); + } + } + + void renumber_result(cuco::static_map const &hash_map, + vertex_t *d_cluster_vec, + vertex_t num_clusters) + { + if (graph_view_t::is_multi_gpu) { + // + // FIXME: Perhaps there's a general purpose function hidden here... + // Given a set of vertex_t values, and a distributed set of + // vertex properties, go to the proper node and retrieve + // the vertex properties and return them to this gpu. + // + std::size_t capacity{static_cast((local_num_vertices_) / 0.7)}; + cuco::static_map result_hash_map( + capacity, std::numeric_limits::max(), std::numeric_limits::max()); + + auto cluster_iter = thrust::make_transform_iterator(d_cluster_vec, [] __device__(vertex_t c) { + return detail::create_cuco_pair_t()(c); + }); + + result_hash_map.insert(cluster_iter, cluster_iter + local_num_vertices_); + + rmm::device_vector used_cluster_ids_v(result_hash_map.get_size()); + + auto transform_iter = thrust::make_transform_iterator( + thrust::make_counting_iterator(0), + [d_result_hash_map = result_hash_map.get_device_view()] __device__(std::size_t idx) { + return d_result_hash_map.begin_slot()[idx].first.load(); + }); + + used_cluster_ids_v = detail::remove_elements_from_vector( + used_cluster_ids_v, + transform_iter, + transform_iter + result_hash_map.get_capacity(), + [vmax = std::numeric_limits::max()] __device__(vertex_t cluster) { + return cluster != vmax; + }, + stream_); + + auto partition_cluster_ids_iter = thrust::make_transform_iterator( + used_cluster_ids_v.begin(), + [d_vertex_device_view = compute_partition_.vertex_device_view()] __device__(vertex_t v) { + return d_vertex_device_view(v); + }); + + rmm::device_vector old_cluster_ids_v = + variable_shuffle(handle_, + used_cluster_ids_v.size(), + used_cluster_ids_v.begin(), + partition_cluster_ids_iter); + + rmm::device_vector original_gpus_v = + variable_shuffle( + handle_, + used_cluster_ids_v.size(), + thrust::make_constant_iterator(rank_), + partition_cluster_ids_iter); + + // Now each GPU has old cluster ids, let's compute new cluster ids + rmm::device_vector new_cluster_ids_v(old_cluster_ids_v.size()); + + thrust::transform(rmm::exec_policy(stream_)->on(stream_), + old_cluster_ids_v.begin(), + old_cluster_ids_v.end(), + new_cluster_ids_v.begin(), + [base_vertex_id = base_vertex_id_, + d_cluster = cluster_v_.data().get(), + d_hash_map = hash_map.get_device_view()] __device__(vertex_t cluster_id) { + vertex_t c = d_cluster[cluster_id - base_vertex_id]; + auto pos = d_hash_map.find(c); + return pos->second.load(); + }); + + // Shuffle everything back + old_cluster_ids_v = variable_shuffle( + handle_, old_cluster_ids_v.size(), old_cluster_ids_v.begin(), original_gpus_v.begin()); + new_cluster_ids_v = variable_shuffle( + handle_, new_cluster_ids_v.size(), new_cluster_ids_v.begin(), original_gpus_v.begin()); + + // Update result_hash_map + thrust::for_each_n( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_zip_iterator( + thrust::make_tuple(old_cluster_ids_v.begin(), new_cluster_ids_v.begin())), + old_cluster_ids_v.size(), + [d_result_hash_map = result_hash_map.get_device_view()] __device__(auto pair) mutable { + auto pos = d_result_hash_map.find(thrust::get<0>(pair)); + pos->second.store(thrust::get<1>(pair)); + }); + + thrust::transform( + rmm::exec_policy(stream_)->on(stream_), + d_cluster_vec, + d_cluster_vec + number_of_vertices_, + d_cluster_vec, + [d_result_hash_map = result_hash_map.get_device_view()] __device__(vertex_t c) { + auto pos = d_result_hash_map.find(c); + return pos->second.load(); + }); + + } else { + thrust::transform(rmm::exec_policy(stream_)->on(stream_), + d_cluster_vec, + d_cluster_vec + number_of_vertices_, + d_cluster_vec, + [d_hash_map = hash_map.get_device_view(), + d_dst_cluster = dst_cluster_cache_v_.data()] __device__(vertex_t v) { + vertex_t c = d_dst_cluster[v]; + auto pos = d_hash_map.find(c); + return pos->second.load(); + }); + } + } + + void generate_supervertices_graph(cuco::static_map const &hash_map, + vertex_t num_clusters) + { + rmm::device_vector new_src_v(local_num_edges_); + rmm::device_vector new_dst_v(local_num_edges_); + rmm::device_vector new_weight_v(current_graph_view_.weights(), + current_graph_view_.weights() + local_num_edges_); + + thrust::transform(rmm::exec_policy(stream_)->on(stream_), + src_indices_v_.begin(), + src_indices_v_.end(), + new_src_v.begin(), + [base_src_vertex_id = base_src_vertex_id_, + d_src_cluster = src_cluster_cache_v_.data().get(), + d_hash_map = hash_map.get_device_view()] __device__(vertex_t v) { + vertex_t c = d_src_cluster[v - base_src_vertex_id]; + auto pos = d_hash_map.find(c); + return pos->second.load(); + }); + + thrust::transform(rmm::exec_policy(stream_)->on(stream_), + current_graph_view_.indices(), + current_graph_view_.indices() + local_num_edges_, + new_dst_v.begin(), + [base_dst_vertex_id = base_dst_vertex_id_, + d_dst_cluster = dst_cluster_cache_v_.data().get(), + d_hash_map = hash_map.get_device_view()] __device__(vertex_t v) { + vertex_t c = d_dst_cluster[v - base_dst_vertex_id]; + auto pos = d_hash_map.find(c); + return pos->second.load(); + }); + + // Combine common edges on local gpu + std::tie(new_src_v, new_dst_v, new_weight_v) = + combine_local_edges(new_src_v, new_dst_v, new_weight_v); + + if (graph_view_t::is_multi_gpu) { + // + // Shuffle the data to the proper GPU + // FIXME: This needs some performance exploration. It is + // possible (likely?) that the shrunken graph is + // more dense than the original graph. Perhaps that + // changes the dynamic of partitioning efficiently. + // + // For now, we're going to keep the partitioning the same, + // but because we've renumbered to lower numbers, fewer + // partitions will actually have data. + // + rmm::device_vector partition_v(new_src_v.size()); + + thrust::transform( + rmm::exec_policy(stream_)->on(stream_), + thrust::make_zip_iterator(thrust::make_tuple(new_src_v.begin(), new_dst_v.begin())), + thrust::make_zip_iterator(thrust::make_tuple(new_src_v.end(), new_dst_v.end())), + partition_v.begin(), + [d_edge_device_view = compute_partition_.edge_device_view()] __device__( + thrust::tuple tuple) { + return d_edge_device_view(thrust::get<0>(tuple), thrust::get<1>(tuple)); + }); + + new_src_v = variable_shuffle( + handle_, partition_v.size(), new_src_v.begin(), partition_v.begin()); + + new_dst_v = variable_shuffle( + handle_, partition_v.size(), new_dst_v.begin(), partition_v.begin()); + + new_weight_v = variable_shuffle( + handle_, partition_v.size(), new_weight_v.begin(), partition_v.begin()); + + // + // Now everything is on the correct node, again combine like edges + // + std::tie(new_src_v, new_dst_v, new_weight_v) = + combine_local_edges(new_src_v, new_dst_v, new_weight_v); + } + + // + // Now I have a COO of the new graph, distributed according to the + // original clustering (eventually this likely fits on one GPU and + // everything else is empty). + // + current_graph_ = + detail::create_graph(handle_, + new_src_v, + new_dst_v, + new_weight_v, + num_clusters, + experimental::graph_properties_t{true, true}, + current_graph_view_); + + current_graph_view_ = current_graph_->view(); + + src_indices_v_.resize(new_src_v.size()); + + local_num_vertices_ = current_graph_view_.get_number_of_local_vertices(); + local_num_rows_ = current_graph_view_.get_number_of_local_adj_matrix_partition_rows(); + local_num_cols_ = current_graph_view_.get_number_of_local_adj_matrix_partition_cols(); + local_num_edges_ = new_src_v.size(); + + cugraph::detail::offsets_to_indices( + current_graph_view_.offsets(), local_num_rows_, src_indices_v_.data().get()); + } + + std:: + tuple, rmm::device_vector, rmm::device_vector> + combine_local_edges(rmm::device_vector &src_v, + rmm::device_vector &dst_v, + rmm::device_vector &weight_v) + { + thrust::stable_sort_by_key( + rmm::exec_policy(stream_)->on(stream_), + dst_v.begin(), + dst_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), weight_v.begin()))); + thrust::stable_sort_by_key( + rmm::exec_policy(stream_)->on(stream_), + src_v.begin(), + src_v.end(), + thrust::make_zip_iterator(thrust::make_tuple(dst_v.begin(), weight_v.begin()))); + + rmm::device_vector combined_src_v(src_v.size()); + rmm::device_vector combined_dst_v(src_v.size()); + rmm::device_vector combined_weight_v(src_v.size()); + + // + // Now we reduce by key to combine the weights of duplicate + // edges. + // + auto start = thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), dst_v.begin())); + auto new_start = + thrust::make_zip_iterator(thrust::make_tuple(combined_src_v.begin(), combined_dst_v.begin())); + auto new_end = thrust::reduce_by_key(rmm::exec_policy(stream_)->on(stream_), + start, + start + src_v.size(), + weight_v.begin(), + new_start, + combined_weight_v.begin(), + thrust::equal_to>(), + thrust::plus()); + + auto num_edges = thrust::distance(new_start, new_end.first); + + combined_src_v.resize(num_edges); + combined_dst_v.resize(num_edges); + combined_weight_v.resize(num_edges); + + return std::make_tuple(combined_src_v, combined_dst_v, combined_weight_v); } protected: raft::handle_t const &handle_; + cudaStream_t stream_; + + vertex_t number_of_vertices_; + vertex_t base_vertex_id_{0}; + vertex_t base_src_vertex_id_{0}; + vertex_t base_dst_vertex_id_{0}; + int rank_{0}; + + vertex_t local_num_vertices_; + vertex_t local_num_rows_; + vertex_t local_num_cols_; + edge_t local_num_edges_; + + // + // Copy of graph + // + std::unique_ptr current_graph_{}; graph_view_t current_graph_view_; -}; + + // + // For partitioning + // + detail::compute_partition_t compute_partition_; + + rmm::device_vector src_indices_v_; + + // + // Weights and clustering across iterations of algorithm + // + rmm::device_vector vertex_weights_v_; + rmm::device_vector src_vertex_weights_cache_v_{}; + rmm::device_vector dst_vertex_weights_cache_v_{}; + + rmm::device_vector cluster_weights_v_; + rmm::device_vector src_cluster_weights_cache_v_{}; + rmm::device_vector dst_cluster_weights_cache_v_{}; + + rmm::device_vector cluster_v_; + rmm::device_vector src_cluster_cache_v_{}; + rmm::device_vector dst_cluster_cache_v_{}; + + rmm::device_vector empty_cache_weight_v_{}; + +#ifdef TIMING + HighResTimer hr_timer_; +#endif +}; // namespace experimental } // namespace experimental } // namespace cugraph diff --git a/cpp/src/experimental/shuffle.cuh b/cpp/src/experimental/shuffle.cuh new file mode 100644 index 00000000000..40f3b510b10 --- /dev/null +++ b/cpp/src/experimental/shuffle.cuh @@ -0,0 +1,226 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +namespace cugraph { +namespace experimental { + +namespace detail { + +// +// FIXME: This implementation of variable_shuffle stages the data for transfer +// in host memory. It would be more efficient, I believe, to stage the +// data in device memory, but it would require actually instantiating +// the data in device memory which is already precious in the Louvain +// implementation. We should explore if it's actually more efficient +// through device memory and whether the improvement is worth the extra +// memory required. +// +template +rmm::device_vector variable_shuffle(raft::handle_t const &handle, + std::size_t n_elements, + iterator_t data_iter, + partition_iter_t partition_iter) +{ + // + // We need to compute the size of data movement + // + raft::comms::comms_t const &comms = handle.get_comms(); + + cudaStream_t stream = handle.get_stream(); + int num_gpus = comms.get_size(); + int my_gpu = comms.get_rank(); + + rmm::device_vector local_sizes_v(num_gpus, size_t{0}); + + thrust::for_each(rmm::exec_policy(stream)->on(stream), + partition_iter, + partition_iter + n_elements, + [num_gpus, d_local_sizes = local_sizes_v.data().get()] __device__(auto p) { + atomicAdd(d_local_sizes + p, size_t{1}); + }); + + std::vector h_local_sizes_v(num_gpus); + std::vector h_global_sizes_v(num_gpus); + std::vector h_input_v(n_elements); + std::vector h_partitions_v(n_elements); + + thrust::copy(local_sizes_v.begin(), local_sizes_v.end(), h_local_sizes_v.begin()); + thrust::copy(partition_iter, partition_iter + n_elements, h_partitions_v.begin()); + + std::vector requests(2 * num_gpus); + + int request_pos = 0; + + for (int gpu = 0; gpu < num_gpus; ++gpu) { + if (gpu != my_gpu) { + comms.irecv(&h_global_sizes_v[gpu], 1, gpu, 0, &requests[request_pos]); + ++request_pos; + comms.isend(&h_local_sizes_v[gpu], 1, gpu, 0, &requests[request_pos]); + ++request_pos; + } else { + h_global_sizes_v[gpu] = h_local_sizes_v[gpu]; + } + } + + if (request_pos > 0) { comms.waitall(request_pos, requests.data()); } + + comms.barrier(); + + // + // Now global_sizes contains all of the counts, we need to + // allocate an array of the appropriate size + // + int64_t receive_size = + thrust::reduce(thrust::host, h_global_sizes_v.begin(), h_global_sizes_v.end()); + + std::vector temp_data; + + if (receive_size > 0) temp_data.resize(receive_size); + + rmm::device_vector input_v(n_elements); + + auto input_start = input_v.begin(); + + for (int gpu = 0; gpu < num_gpus; ++gpu) { + input_start = thrust::copy_if(rmm::exec_policy(stream)->on(stream), + data_iter, + data_iter + n_elements, + partition_iter, + input_start, + [gpu] __device__(int32_t p) { return p == gpu; }); + } + + thrust::copy(input_v.begin(), input_v.end(), h_input_v.begin()); + + std::vector temp_v(num_gpus + 1); + + thrust::exclusive_scan( + thrust::host, h_global_sizes_v.begin(), h_global_sizes_v.end(), temp_v.begin()); + + temp_v[num_gpus] = temp_v[num_gpus - 1] + h_global_sizes_v[num_gpus - 1]; + h_global_sizes_v = temp_v; + + thrust::exclusive_scan( + thrust::host, h_local_sizes_v.begin(), h_local_sizes_v.end(), temp_v.begin()); + + temp_v[num_gpus] = temp_v[num_gpus - 1] + h_local_sizes_v[num_gpus - 1]; + h_local_sizes_v = temp_v; + + CUDA_TRY(cudaStreamSynchronize(handle.get_stream())); + comms.barrier(); + + request_pos = 0; + + for (int gpu = 0; gpu < num_gpus; ++gpu) { + size_t to_receive = h_global_sizes_v[gpu + 1] - h_global_sizes_v[gpu]; + size_t to_send = h_local_sizes_v[gpu + 1] - h_local_sizes_v[gpu]; + + if (gpu != my_gpu) { + if (to_receive > 0) { + comms.irecv( + temp_data.data() + h_global_sizes_v[gpu], to_receive, gpu, 0, &requests[request_pos]); + ++request_pos; + } + + if (to_send > 0) { + comms.isend( + h_input_v.data() + h_local_sizes_v[gpu], to_send, gpu, 0, &requests[request_pos]); + ++request_pos; + } + } else if (to_receive > 0) { + std::copy(h_input_v.begin() + h_local_sizes_v[gpu], + h_input_v.begin() + h_local_sizes_v[gpu + 1], + temp_data.begin() + h_global_sizes_v[gpu]); + } + } + + comms.barrier(); + + if (request_pos > 0) { comms.waitall(request_pos, requests.data()); } + + comms.barrier(); + + return rmm::device_vector(temp_data); +} + +} // namespace detail + +/** + * @brief shuffle data to the desired partition + * + * MNMG algorithms require shuffling data between partitions + * to get the data to the right location for computation. + * + * This function operates dynamically, there is no + * a priori knowledge about where the data will need + * to be transferred. + * + * This function will be executed on each GPU. Each gpu + * has a portion of the data (specified by begin_data and + * end_data iterators) and an iterator that identifies + * (for each corresponding element) which GPU the data + * should be shuffled to. + * + * The return value will be a device vector containing + * the data received by this GPU. + * + * Note that this function accepts iterators as input. + * `partition_iterator` will be traversed multiple times. + * + * @tparam is_multi_gpu If true, multi-gpu - shuffle will occur + * If false, single GPU - simple copy will occur + * @tparam data_t Type of the data being shuffled + * @tparam iterator_t Iterator referencing data to be shuffled + * @tparam partition_iter_t Iterator identifying the destination partition + * + * @param handle Library handle (RAFT) + * @param n_elements Number of elements to transfer + * @param data_iter Iterator that returns the elements to be transfered + * @param partition_iter Iterator that returns the partition where elements + * should be transfered. + */ +template * = nullptr> +rmm::device_vector variable_shuffle(raft::handle_t const &handle, + std::size_t n_elements, + iterator_t data_iter, + partition_iter_t partition_iter) +{ + return detail::variable_shuffle(handle, n_elements, data_iter, partition_iter); +} + +template * = nullptr> +rmm::device_vector variable_shuffle(raft::handle_t const &handle, + std::size_t n_elements, + iterator_t data_iter, + partition_iter_t partition_iter) +{ + return rmm::device_vector(data_iter, data_iter + n_elements); +} + +} // namespace experimental +} // namespace cugraph diff --git a/cpp/src/utilities/cython.cu b/cpp/src/utilities/cython.cu index b0f24fa377a..215069302c1 100644 --- a/cpp/src/utilities/cython.cu +++ b/cpp/src/utilities/cython.cu @@ -66,7 +66,9 @@ create_graph(raft::handle_t const& handle, graph_container_t const& graph_contai static_cast(graph_container.num_global_vertices), static_cast(graph_container.num_global_edges), graph_container.graph_props, - graph_container.sorted_by_degree, + // FIXME: This currently fails if sorted_by_degree is true... + // graph_container.sorted_by_degree, + false, graph_container.do_expensive_check); } @@ -269,30 +271,6 @@ void populate_graph_container_legacy(graph_container_t& graph_container, //////////////////////////////////////////////////////////////////////////////// -namespace detail { -template -std::pair call_louvain(raft::handle_t const& handle, - graph_view_t const& graph_view, - void* identifiers, - void* parts, - size_t max_level, - weight_t resolution) -{ - thrust::copy( // rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), - thrust::device, - thrust::make_counting_iterator(graph_view.get_local_vertex_first()), - thrust::make_counting_iterator(graph_view.get_local_vertex_last()), - reinterpret_cast(identifiers)); - - return louvain(handle, - graph_view, - reinterpret_cast(parts), - max_level, - static_cast(resolution)); -} - -} // namespace detail - namespace detail { // Final, fully-templatized call. @@ -365,10 +343,10 @@ return_t call_function(raft::handle_t const& handle, function_t function) { if (graph_container.weightType == numberTypeEnum::floatType) { - return call_function( + return call_function( handle, graph_container, function); } else if (graph_container.weightType == numberTypeEnum::doubleType) { - return call_function( + return call_function( handle, graph_container, function); } else { CUGRAPH_FAIL("weightType unsupported"); @@ -415,6 +393,11 @@ class louvain_functor { std::pair operator()(raft::handle_t const& handle, graph_view_t const& graph_view) { + thrust::copy(rmm::exec_policy(handle.get_stream())->on(handle.get_stream()), + thrust::make_counting_iterator(graph_view.get_local_vertex_first()), + thrust::make_counting_iterator(graph_view.get_local_vertex_last()), + reinterpret_cast(identifiers_)); + return cugraph::louvain(handle, graph_view, reinterpret_cast(parts_), diff --git a/cpp/src/utilities/graph_utils.cuh b/cpp/src/utilities/graph_utils.cuh index 6b7e8558e86..4bb1ccc2823 100644 --- a/cpp/src/utilities/graph_utils.cuh +++ b/cpp/src/utilities/graph_utils.cuh @@ -460,30 +460,29 @@ void remove_duplicate( } } -template -__global__ void offsets_to_indices_kernel(const IndexType *offsets, IndexType v, IndexType *indices) +template +__global__ void offsets_to_indices_kernel(const offsets_t *offsets, index_t v, index_t *indices) { - int tid, ctaStart; - tid = threadIdx.x; - ctaStart = blockIdx.x; + auto tid{threadIdx.x}; + auto ctaStart{blockIdx.x}; - for (int j = ctaStart; j < v; j += gridDim.x) { - IndexType colStart = offsets[j]; - IndexType colEnd = offsets[j + 1]; - IndexType rowNnz = colEnd - colStart; + for (index_t j = ctaStart; j < v; j += gridDim.x) { + offsets_t colStart = offsets[j]; + offsets_t colEnd = offsets[j + 1]; + offsets_t rowNnz = colEnd - colStart; - for (int i = 0; i < rowNnz; i += blockDim.x) { + for (offsets_t i = 0; i < rowNnz; i += blockDim.x) { if ((colStart + tid + i) < colEnd) { indices[colStart + tid + i] = j; } } } } -template -void offsets_to_indices(const IndexType *offsets, IndexType v, IndexType *indices) +template +void offsets_to_indices(const offsets_t *offsets, index_t v, index_t *indices) { cudaStream_t stream{nullptr}; - IndexType nthreads = min(v, (IndexType)CUDA_MAX_KERNEL_THREADS); - IndexType nblocks = min((v + nthreads - 1) / nthreads, (IndexType)CUDA_MAX_BLOCKS); + index_t nthreads = min(v, (index_t)CUDA_MAX_KERNEL_THREADS); + index_t nblocks = min((v + nthreads - 1) / nthreads, (index_t)CUDA_MAX_BLOCKS); offsets_to_indices_kernel<<>>(offsets, v, indices); CHECK_CUDA(stream); } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index ac3a27c7b77..40ae7933b65 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -31,6 +31,8 @@ function(ConfigureTest CMAKE_TEST_NAME CMAKE_TEST_SRC CMAKE_EXTRA_LIBS) PRIVATE "${CUB_INCLUDE_DIR}" "${THRUST_INCLUDE_DIR}" + "${CUCO_INCLUDE_DIR}" + "${LIBCUDACXX_INCLUDE_DIR}" "${CMAKE_CUDA_TOOLKIT_INCLUDE_DIRECTORIES}" "${GTEST_INCLUDE_DIR}" "${RMM_INCLUDE}" @@ -291,6 +293,15 @@ set(EXPERIMENTAL_PAGERANK_TEST_SRCS ConfigureTest(EXPERIMENTAL_PAGERANK_TEST "${EXPERIMENTAL_PAGERANK_TEST_SRCS}" "") +################################################################################################### +# - Experimental LOUVAIN tests ------------------------------------------------------------------- + +set(EXPERIMENTAL_LOUVAIN_TEST_SRCS + "${CMAKE_SOURCE_DIR}/../thirdparty/mmio/mmio.c" + "${CMAKE_CURRENT_SOURCE_DIR}/experimental/louvain_test.cu") + +ConfigureTest(EXPERIMENTAL_LOUVAIN_TEST "${EXPERIMENTAL_LOUVAIN_TEST_SRCS}" "") + ################################################################################################### # - Experimental KATZ_CENTRALITY tests ------------------------------------------------------------ diff --git a/cpp/tests/community/louvain_test.cu b/cpp/tests/community/louvain_test.cu index 20fa7b1d3d9..da89cc3c0c5 100644 --- a/cpp/tests/community/louvain_test.cu +++ b/cpp/tests/community/louvain_test.cu @@ -68,6 +68,68 @@ TEST(louvain, success) int min = *min_element(cluster_id.begin(), cluster_id.end()); + std::cout << "modularity = " << modularity << std::endl; + + ASSERT_GE(min, 0); + ASSERT_GE(modularity, 0.402777 * 0.95); +} + +TEST(louvain_renumbered, success) +{ + std::vector off_h = {0, 16, 25, 30, 34, 38, 42, 44, 46, 48, 50, 52, + 54, 56, 73, 85, 95, 101, 107, 112, 117, 121, 125, 129, + 132, 135, 138, 141, 144, 147, 149, 151, 153, 155, 156 + + }; + std::vector ind_h = { + 1, 3, 7, 11, 15, 16, 17, 18, 19, 20, 21, 23, 24, 25, 30, 33, 0, 5, 11, 15, 16, 19, 21, + 25, 30, 4, 13, 14, 22, 27, 0, 9, 20, 24, 2, 13, 15, 26, 1, 13, 14, 18, 13, 15, 0, 16, + 13, 14, 3, 20, 13, 14, 0, 1, 13, 22, 2, 4, 5, 6, 8, 10, 12, 14, 17, 18, 19, 22, 25, + 28, 29, 31, 32, 2, 5, 8, 10, 13, 15, 17, 18, 22, 29, 31, 32, 0, 1, 4, 6, 14, 16, 18, + 19, 21, 28, 0, 1, 7, 15, 19, 21, 0, 13, 14, 26, 27, 28, 0, 5, 13, 14, 15, 0, 1, 13, + 16, 16, 0, 3, 9, 23, 0, 1, 15, 16, 2, 12, 13, 14, 0, 20, 24, 0, 3, 23, 0, 1, 13, + 4, 17, 27, 2, 17, 26, 13, 15, 17, 13, 14, 0, 1, 13, 14, 13, 14, 0}; + + std::vector w_h = { + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, + 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0}; + + int num_verts = off_h.size() - 1; + int num_edges = ind_h.size(); + + std::vector cluster_id(num_verts, -1); + + rmm::device_vector offsets_v(off_h); + rmm::device_vector indices_v(ind_h); + rmm::device_vector weights_v(w_h); + rmm::device_vector result_v(cluster_id); + + cugraph::GraphCSRView G( + offsets_v.data().get(), indices_v.data().get(), weights_v.data().get(), num_verts, num_edges); + + float modularity{0.0}; + size_t num_level = 40; + + raft::handle_t handle; + + std::tie(num_level, modularity) = cugraph::louvain(handle, G, result_v.data().get()); + + cudaMemcpy((void*)&(cluster_id[0]), + result_v.data().get(), + sizeof(int) * num_verts, + cudaMemcpyDeviceToHost); + + int min = *min_element(cluster_id.begin(), cluster_id.end()); + + std::cout << "modularity = " << modularity << std::endl; + ASSERT_GE(min, 0); ASSERT_GE(modularity, 0.402777 * 0.95); } diff --git a/cpp/tests/experimental/louvain_test.cu b/cpp/tests/experimental/louvain_test.cu new file mode 100644 index 00000000000..e38b2c020d9 --- /dev/null +++ b/cpp/tests/experimental/louvain_test.cu @@ -0,0 +1,115 @@ +/* + * Copyright (c) 2020, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include +#include + +#include + +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +typedef struct Louvain_Usecase_t { + std::string graph_file_full_path{}; + bool test_weighted{false}; + + Louvain_Usecase_t(std::string const& graph_file_path, bool test_weighted) + : test_weighted(test_weighted) + { + if ((graph_file_path.length() > 0) && (graph_file_path[0] != '/')) { + graph_file_full_path = cugraph::test::get_rapids_dataset_root_dir() + "/" + graph_file_path; + } else { + graph_file_full_path = graph_file_path; + } + }; +} Louvain_Usecase; + +class Tests_Louvain : public ::testing::TestWithParam { + public: + Tests_Louvain() {} + static void SetupTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(Louvain_Usecase const& configuration) + { + raft::handle_t handle{}; + + std::cout << "read graph file: " << configuration.graph_file_full_path << std::endl; + + auto graph = + cugraph::test::read_graph_from_matrix_market_file( + handle, configuration.graph_file_full_path, configuration.test_weighted); + + auto graph_view = graph.view(); + + louvain(graph_view); + } + + template + void louvain(graph_t const& graph_view) + { + using vertex_t = typename graph_t::vertex_type; + using weight_t = typename graph_t::weight_type; + + raft::handle_t handle{}; + + rmm::device_vector clustering_v(graph_view.get_number_of_local_vertices()); + size_t level; + weight_t modularity; + + std::tie(level, modularity) = + cugraph::louvain(handle, graph_view, clustering_v.data().get(), size_t{100}, weight_t{1}); + + CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + + std::cout << "level = " << level << std::endl; + std::cout << "modularity = " << modularity << std::endl; + } +}; + +// FIXME: add tests for type combinations +TEST_P(Tests_Louvain, CheckInt32Int32FloatFloat) +{ + run_current_test(GetParam()); +} + +INSTANTIATE_TEST_CASE_P(simple_test, + Tests_Louvain, + ::testing::Values(Louvain_Usecase("test/datasets/karate.mtx", true) +#if 0 + , + Louvain_Usecase("test/datasets/web-Google.mtx", true), + Louvain_Usecase("test/datasets/ljournal-2008.mtx", true), + Louvain_Usecase("test/datasets/webbase-1M.mtx", true) +#endif + )); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_utilities.hpp b/cpp/tests/utilities/test_utilities.hpp index c87c63c56fb..518e7c2860e 100644 --- a/cpp/tests/utilities/test_utilities.hpp +++ b/cpp/tests/utilities/test_utilities.hpp @@ -348,7 +348,6 @@ edgelist_from_market_matrix_file_t read_edgelist_from_matrix MM_typecode mc{}; vertex_t m{}; - vertex_t k{}; edge_t nnz{}; FILE* file = fopen(graph_file_full_path.c_str(), "r"); @@ -359,7 +358,6 @@ edgelist_from_market_matrix_file_t read_edgelist_from_matrix auto mm_ret = cugraph::test::mm_properties(file, 1, &mc, &tmp_m, &tmp_k, &nnz); CUGRAPH_EXPECTS(mm_ret == 0, "could not read Matrix Market file properties."); m = static_cast(tmp_m); - k = static_cast(tmp_k); CUGRAPH_EXPECTS(mm_is_matrix(mc) && mm_is_coordinate(mc) && !mm_is_complex(mc) && !mm_is_skew(mc), "invalid Matrix Market file properties."); diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index 186bd63ddc8..fa42fb92f42 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -11,12 +11,15 @@ # See the License for the specific language governing permissions and # limitations under the License. +import operator as op + from dask.distributed import wait, default_client import cugraph.comms.comms as Comms from cugraph.dask.common.input_utils import get_distributed_data from cugraph.structure.shuffle import shuffle from cugraph.dask.community import louvain_wrapper as c_mg_louvain +import dask_cudf def call_louvain(sID, @@ -42,7 +45,7 @@ def call_louvain(sID, resolution) -def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): +def louvain(input_graph, max_iter=100, resolution=1.0): """ Compute the modularity optimizing partition of the input graph using the Louvain method on multiple GPUs @@ -68,40 +71,50 @@ def louvain(input_graph, max_iter=100, resolution=1.0, load_balance=True): # symmetric DiGraphs. # if type(graph) is not Graph: # raise Exception("input graph must be undirected") - client = default_client() # Calling renumbering results in data that is sorted by degree input_graph.compute_renumber_edge_list(transposed=False) sorted_by_degree = True + (ddf, num_verts, partition_row_size, partition_col_size, vertex_partition_offsets) = shuffle(input_graph, transposed=False) + num_edges = len(ddf) data = get_distributed_data(ddf) - result = dict([(data.worker_info[wf[0]]["rank"], - client.submit( - call_louvain, - Comms.get_session_id(), - wf[1], - num_verts, - num_edges, - vertex_partition_offsets, - sorted_by_degree, - max_iter, - resolution, - workers=[wf[0]])) - for idx, wf in enumerate(data.worker_to_parts.items())]) - - wait(result) - - (parts, modularity_score) = result[0].result() + futures = [client.submit(call_louvain, + Comms.get_session_id(), + wf[1], + num_verts, + num_edges, + vertex_partition_offsets, + sorted_by_degree, + max_iter, + resolution, + workers=[wf[0]]) + for idx, wf in enumerate(data.worker_to_parts.items())] + + wait(futures) + + # futures is a list of Futures containing tuples of (DataFrame, mod_score), + # unpack using separate calls to client.submit with a callable to get + # individual items. + # FIXME: look into an alternate way (not returning a tuples, accessing + # tuples differently, etc.) since multiple client.submit() calls may not be + # optimal. + df_futures = [client.submit(op.getitem, f, 0) for f in futures] + mod_score_futures = [client.submit(op.getitem, f, 1) for f in futures] + + ddf = dask_cudf.from_delayed(df_futures) + # Each worker should have computed the same mod_score + mod_score = mod_score_futures[0].result() if input_graph.renumbered: # MG renumbering is lazy, but it's safe to assume it's been called at # this point if renumbered=True - parts = input_graph.unrenumber(parts, "vertex") + ddf = input_graph.unrenumber(ddf, "vertex") - return parts, modularity_score + return (ddf, mod_score) diff --git a/python/cugraph/dask/community/louvain_wrapper.pyx b/python/cugraph/dask/community/louvain_wrapper.pyx index a1a1e629732..c2a12cf81f3 100644 --- a/python/cugraph/dask/community/louvain_wrapper.pyx +++ b/python/cugraph/dask/community/louvain_wrapper.pyx @@ -81,6 +81,8 @@ def louvain(input_df, vertex_partition_offsets_host = vertex_partition_offsets.values_host cdef uintptr_t c_vertex_partition_offsets = vertex_partition_offsets_host.__array_interface__['data'][0] + num_local_verts = vertex_partition_offsets_host[rank+1] - vertex_partition_offsets_host[rank] + cdef graph_container_t graph_container # FIXME: The excessive casting for the enum arg is needed to make cython @@ -98,10 +100,11 @@ def louvain(input_df, sorted_by_degree, False, True) # store_transposed, multi_gpu - # Create the output dataframe + # Create the output dataframe, column lengths must be equal to the number of + # vertices in the partition df = cudf.DataFrame() - df['vertex'] = cudf.Series(np.zeros(num_global_verts, dtype=vertex_t)) - df['partition'] = cudf.Series(np.zeros(num_global_verts, dtype=vertex_t)) + df['vertex'] = cudf.Series(np.zeros(num_local_verts, dtype=vertex_t)) + df['partition'] = cudf.Series(np.zeros(num_local_verts, dtype=vertex_t)) cdef uintptr_t c_identifiers = df['vertex'].__cuda_array_interface__['data'][0] cdef uintptr_t c_partition = df['partition'].__cuda_array_interface__['data'][0] From 715e3744b260d1fc5b37c73327806efcaae911ad Mon Sep 17 00:00:00 2001 From: Brad Rees <34135411+BradReesWork@users.noreply.github.com> Date: Wed, 14 Oct 2020 12:02:37 -0400 Subject: [PATCH 11/16] [REVIEW] DOC Adding Nx transition doc and preping for more (#1217) * Added new Medium entry * updated to match cuML * copied to match cuML * updated list of MG algorithms * converted to Markdown * updated reference to new markdown file * removed rst file * should really be a HTML file (next release) * copy pdf files over * updates * removed ref to cuml * changelog * addressing review issues * migrated to RST Co-authored-by: BradReesWork --- CHANGELOG.md | 2 +- README.md | 14 +- docs/Makefile | 2 + docs/source/_static/copybutton.css | 42 ++++++ docs/source/_static/example_mod.js | 61 +++++++++ docs/source/_static/references.css | 23 ++++ docs/source/conf.py | 44 ++++-- docs/source/cugraph_blogs.rst | 1 + docs/source/cugraph_intro.md | 22 +++ docs/source/cugraph_intro.rst | 13 -- docs/source/images/Nx_Cg_1.png | Bin 0 -> 69661 bytes docs/source/images/Nx_Cg_2.png | Bin 0 -> 49831 bytes docs/source/index.rst | 4 +- docs/source/nx_transition.rst | 198 +++++++++++++++++++++++++++ docs/source/sphinxext/github_link.py | 146 ++++++++++++++++++++ 15 files changed, 534 insertions(+), 38 deletions(-) create mode 100644 docs/source/_static/copybutton.css create mode 100644 docs/source/_static/example_mod.js create mode 100644 docs/source/_static/references.css create mode 100644 docs/source/cugraph_intro.md delete mode 100644 docs/source/cugraph_intro.rst create mode 100644 docs/source/images/Nx_Cg_1.png create mode 100644 docs/source/images/Nx_Cg_2.png create mode 100644 docs/source/nx_transition.rst create mode 100644 docs/source/sphinxext/github_link.py diff --git a/CHANGELOG.md b/CHANGELOG.md index ebdb268d2b1..54df86f3a24 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,7 +33,7 @@ - PR #1165 updated remaining algorithms to be NetworkX compatible - PR #1176 Update ci/local/README.md - PR #1184 BLD getting latest tags - +- PR #1217 NetworkX Transition doc ## Bug Fixes - PR #1131 Show style checker errors with set +e diff --git a/README.md b/README.md index a51b9fb4e0c..52797f5e6e4 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,7 @@ for i in range(len(df_page)): | | Edge Betweenness Centrality | Single-GPU | | | Community | | | | | | Leiden | Single-GPU | | -| | Louvain | Single-GPU | | +| | Louvain | Multiple-GPU | | | | Ensemble Clustering for Graphs | Single-GPU | | | | Spectral-Clustering - Balanced Cut | Single-GPU | | | | Spectral-Clustering - Modularity | Single-GPU | | @@ -57,16 +57,16 @@ for i in range(len(df_page)): | Layout | | | | | | Force Atlas 2 | Single-GPU | | | Link Analysis| | | | -| | Pagerank | Multiple-GPU | limited to 2 billion vertices | -| | Personal Pagerank | Multiple-GPU | limited to 2 billion vertices | +| | Pagerank | Multiple-GPU | | +| | Personal Pagerank | Single-GPU | | | | HITS | Single-GPU | leverages Gunrock | | Link Prediction | | | | | | Jaccard Similarity | Single-GPU | | | | Weighted Jaccard Similarity | Single-GPU | | | | Overlap Similarity | Single-GPU | | | Traversal | | | | -| | Breadth First Search (BFS) | Multiple-GPU | limited to 2 billion vertices | -| | Single Source Shortest Path (SSSP) | Single-GPU | | +| | Breadth First Search (BFS) | Multiple-GPU | | +| | Single Source Shortest Path (SSSP) | Multiple-GPU | | | Structure | | | | | | Renumbering | Single-GPU | Also for multiple columns | | | Symmetrize | Single-GPU | | @@ -81,9 +81,7 @@ for i in range(len(df_page)): ## cuGraph Notice The current version of cuGraph has some limitations: -- Vertex IDs need to be 32-bit integers (that restriction is going away in 0.16) - Vertex IDs are expected to be contiguous integers starting from 0. --- If the starting index is not zero, cuGraph will add disconnected vertices to fill in the missing range. (Auto-) Renumbering fixes this issue cuGraph provides the renumber function to mitigate this problem, which is by default automatically called when data is addted to a graph. Input vertex IDs for the renumber function can be any type, can be non-contiguous, can be multiple columns, and can start from an arbitrary number. The renumber function maps the provided input vertex IDs to 32-bit contiguous integers starting from 0. cuGraph still requires the renumbered vertex IDs to be representable in 32-bit integers. These limitations are being addressed and will be fixed soon. @@ -96,7 +94,7 @@ The amount of memory required is dependent on the graph structure and the analyt | Size | Recommended GPU Memory | |-------------------|------------------------| -| 500 million edges | 32 GB | +| 500 million edges | 32 GB | | 250 million edges | 16 GB | The use of managed memory for oversubscription can also be used to exceed the above memory limitations. See the recent blog on _Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs_: https://medium.com/rapids-ai/tackling-large-graphs-with-rapids-cugraph-and-unified-virtual-memory-b5b69a065d4 diff --git a/docs/Makefile b/docs/Makefile index e8838279733..9c35aa6fc8d 100644 --- a/docs/Makefile +++ b/docs/Makefile @@ -7,6 +7,7 @@ SPHINXBUILD = sphinx-build SPHINXPROJ = cuGraph SOURCEDIR = source BUILDDIR = build +IMGDIR = images # Put it first so that "make" without argument is like "make help". help: @@ -18,3 +19,4 @@ help: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) + cp -r $(SOURCEDIR)/$(IMGDIR) $(BUILDDIR)/html diff --git a/docs/source/_static/copybutton.css b/docs/source/_static/copybutton.css new file mode 100644 index 00000000000..5eef6e366d0 --- /dev/null +++ b/docs/source/_static/copybutton.css @@ -0,0 +1,42 @@ +/* This contains code with copyright by the scikit-learn project, subject to +the license in /thirdparty/LICENSES/LICENSE.scikit_learn */ + +/* copybutton */ +/* Adds "Show/Hide Output" button to Examples */ + +.copybutton { + cursor: pointer; + position: absolute; + top: 0px; + right: 0px; + border: 1px solid rgb(221, 221, 221); + color: rgb(221, 221, 221); + font-family: monospace; + padding-left: 0.2rem; + padding-right: 0.2rem; +} + +div.highlight:hover span.copybutton::after { + background: #3F556B; + border-radius: 0.25rem; + color: white; + content: attr(title); + padding: 0.25rem; + position: absolute; + z-index: 98; + width: 100px; + font-size: 0.7rem; + top: 0; + right: 0; +} + +/* copy buttonn */ +div.highlight:hover span.copybutton { + background-color: #3F556B; + color: white; +} + +div.highlight:hover span.copybutton:hover { + background-color: #20252B; +} + diff --git a/docs/source/_static/example_mod.js b/docs/source/_static/example_mod.js new file mode 100644 index 00000000000..77dc618a82d --- /dev/null +++ b/docs/source/_static/example_mod.js @@ -0,0 +1,61 @@ +// This contains code with copyright by the scikit-learn project, subject to +// the license in /thirdparty/LICENSES/LICENSE.scikit_learn + +$(document).ready(function () { + /* Add a [>>>] button on the top-right corner of code samples to hide + * the >>> and ... prompts and the output and thus make the code + * copyable. */ + var div = $('.highlight-python .highlight,' + + '.highlight-python3 .highlight,' + + '.highlight-pycon .highlight,' + + '.highlight-default .highlight') + var pre = div.find('pre'); + + // get the styles from the current theme + pre.parent().parent().css('position', 'relative'); + var hide_text = 'Hide prompts and outputs'; + var show_text = 'Show prompts and outputs'; + + // create and add the button to all the code blocks that contain >>> + div.each(function (index) { + var jthis = $(this); + if (jthis.find('.gp').length > 0) { + var button = $('>>>'); + button.attr('title', hide_text); + button.data('hidden', 'false'); + jthis.prepend(button); + } + // tracebacks (.gt) contain bare text elements that need to be + // wrapped in a span to work with .nextUntil() (see later) + jthis.find('pre:has(.gt)').contents().filter(function () { + return ((this.nodeType == 3) && (this.data.trim().length > 0)); + }).wrap(''); + }); + + // define the behavior of the button when it's clicked + $('.copybutton').click(function (e) { + e.preventDefault(); + var button = $(this); + if (button.data('hidden') === 'false') { + // hide the code output + button.parent().find('.go, .gp, .gt').hide(); + button.next('pre') + .find('.gt') + .nextUntil('.gp, .go') + .css('visibility', 'hidden'); + button.css('text-decoration', 'line-through'); + button.attr('title', show_text); + button.data('hidden', 'true'); + } else { + // show the code output + button.parent().find('.go, .gp, .gt').show(); + button.next('pre') + .find('.gt') + .nextUntil('.gp, .go') + .css('visibility', 'visible'); + button.css('text-decoration', 'none'); + button.attr('title', hide_text); + button.data('hidden', 'false'); + } + }); +}); \ No newline at end of file diff --git a/docs/source/_static/references.css b/docs/source/_static/references.css new file mode 100644 index 00000000000..225cf13ba94 --- /dev/null +++ b/docs/source/_static/references.css @@ -0,0 +1,23 @@ + +/* Fix references to not look like parameters */ +dl.citation > dt.label { + display: unset !important; + float: left !important; + border: unset !important; + background: unset !important; + padding: unset !important; + margin: unset !important; + font-size: unset !important; + line-height: unset !important; + padding-right: 0.5rem !important; +} + +/* Add opening bracket */ +dl.citation > dt.label > span::before { + content: "["; +} + +/* Add closing bracket */ +dl.citation > dt.label > span::after { + content: "]"; +} \ No newline at end of file diff --git a/docs/source/conf.py b/docs/source/conf.py index 0c8a0316278..a1b7d348395 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# Copyright (c) 2018, NVIDIA CORPORATION. +# Copyright (c) 2018-2020 NVIDIA CORPORATION. # # pygdf documentation build configuration file, created by # sphinx-quickstart on Wed May 3 10:59:22 2017. @@ -21,8 +21,17 @@ # import os import sys + +# If extensions (or modules to document with autodoc) are in another +# directory, add these directories to sys.path here. If the directory +# is relative to the documentation root, use os.path.abspath to make it +# absolute, like shown here. +sys.path.insert(0, os.path.abspath('sphinxext')) +sys.path.insert(0, os.path.abspath('../../python')) sys.path.insert(0, os.path.abspath('../..')) +from github_link import make_linkcode_resolve # noqa + # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. @@ -33,14 +42,20 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.intersphinx', + 'numpydoc', 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', - 'numpydoc', - 'IPython.sphinxext.ipython_console_highlighting', - 'IPython.sphinxext.ipython_directive', + 'sphinx.ext.doctest', + 'sphinx.ext.intersphinx', + 'sphinx.ext.linkcode', + "IPython.sphinxext.ipython_console_highlighting", + "IPython.sphinxext.ipython_directive", + "nbsphinx", + "recommonmark", + "sphinx_markdown_tables", ] + ipython_mplbackend = 'str' # Add any paths that contain templates here, relative to this directory. @@ -50,7 +65,7 @@ # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = {".rst": "restructuredtext", ".md": "markdown"} # The master toctree document. master_doc = 'index' @@ -151,7 +166,7 @@ # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'cugraph.tex', 'cugraph Documentation', - 'Continuum Analytics', 'manual'), + 'nvidia', 'manual'), ] @@ -187,12 +202,13 @@ def setup(app): - app.add_stylesheet('params.css') + app.add_css_file('copybutton.css') + app.add_css_file('params.css') + app.add_css_file('references.css') -from recommonmark.parser import CommonMarkParser - -source_parsers = { - '.md': CommonMarkParser, -} -source_suffix = ['.rst', '.md'] \ No newline at end of file +# The following is used by sphinx.ext.linkcode to provide links to github +linkcode_resolve = make_linkcode_resolve( + 'cugraph', 'https://github.com/rapidsai/' + 'cugraph/blob/{revision}/python/' + '{package}/{path}#L{lineno}') \ No newline at end of file diff --git a/docs/source/cugraph_blogs.rst b/docs/source/cugraph_blogs.rst index a9954aee5cb..84e31d40a19 100644 --- a/docs/source/cugraph_blogs.rst +++ b/docs/source/cugraph_blogs.rst @@ -22,6 +22,7 @@ BLOGS ------ * `Status of RAPIDS cuGraph — Refactoring Code And Rethinking Graphs `_ * `Tackling Large Graphs with RAPIDS cuGraph and CUDA Unified Memory on GPUs `_ + * `RAPIDS cuGraph adds NetworkX and DiGraph Compatibility `_ Media diff --git a/docs/source/cugraph_intro.md b/docs/source/cugraph_intro.md new file mode 100644 index 00000000000..5bf2b715462 --- /dev/null +++ b/docs/source/cugraph_intro.md @@ -0,0 +1,22 @@ + +# cuGraph Introduction + + +## Terminology + +cuGraph is a collection of GPU accelerated graph algorithms and graph utility +functions. The application of graph analysis covers a lot of areas. +For Example: +* [Network Science](https://en.wikipedia.org/wiki/Network_science) +* [Complex Network](https://en.wikipedia.org/wiki/Complex_network) +* [Graph Theory](https://en.wikipedia.org/wiki/Graph_theory) +* [Social Network Analysis](https://en.wikipedia.org/wiki/Social_network_analysis) + +cuGraph does not favor one field over another. Our developers span the +breadth of fields with the focus being to produce the best graph library +possible. However, each field has its own argot (jargon) for describing the +graph (or network). In our documentation, we try to be consistent. In Python +documentation we will mostly use the terms __Node__ and __Edge__ to better +match NetworkX preferred term use, as well as other Python-based tools. At +the CUDA/C layer, we favor the mathematical terms of __Vertex__ and __Edge__. + diff --git a/docs/source/cugraph_intro.rst b/docs/source/cugraph_intro.rst deleted file mode 100644 index cd2d750e35f..00000000000 --- a/docs/source/cugraph_intro.rst +++ /dev/null @@ -1,13 +0,0 @@ - -cuGraph Intro ------------------------------- - - - -Graph Type - - -Algorithms - - -Using diff --git a/docs/source/images/Nx_Cg_1.png b/docs/source/images/Nx_Cg_1.png new file mode 100644 index 0000000000000000000000000000000000000000..6d29f76ad4ab2ec93c784fe5afd9db2379ae7629 GIT binary patch literal 69661 zcmce;1ymeS*CmR(lK{bkh2ZW^aDux#!5xC9AwYr#cMI;py>WM!#)3ORJNPst`TlwT zy!F<4Yi3PWb-GerRekH;)93EJ&%XJpq#%WkLWlwd1%)mn{Z0i63bqgm3c3La0dhnd zX)p!y2JNCEB?eVKO1ua8$47H58H@Mtq39s{NKkOlgi!ELr$Amp&_w^)mxQK;f_**? z0|gag1qJu_90kbx)1PmU*V8qB-eI$#|DFvwmId?gY}mpq*nj)b4NsS?^9!&+-jE%o zwOyc~@Ti|&&@w7izo4K*pk&^Os(pk$Oh^2vu6}ie|5gM?I%wScB#VPyidDWORKL8yzQ`2kiS9wZ)_sy6iQ?IiGG&CSM^@XtT{_Np7JnMB2SxJeEY+Cr{cEX zuep$tD&PB2&%0Ew%kPA0=E)3UaMGWiHfuJ({cYH}$)Qh?#!i{TKJ8(=?u5+<0JO(TX{B7@lt}FaMeo>Ylg7uCDva@R@ zhZV7=S9}WJXo&)sgCqFG^~KNZv&|9T1GC$C+UH)2fLkYeH-z1)lBapwTU&WkT8#)e z<-TIQasM#(BC13{;(3IiV9dFca`wLp_^FaLf(MVCmp&~vYS1lyi^HO-8=*)B+_>m#B=16 z^`~b);?hhSMWCl8d2i zBX?5*9cX{r1M*p8o8yY~b4!ho;)bNa#UYl-D{bLNx7O6XgVJ-hJse)N&~XsGQ%tS)UI<_Ef>ua#BqF6A=S9){6tUQzB5W*!RJcXb~_ zTJEm--gY{c4kn@U$sOc7o!!K_vF-QcP1d?mE+iV}dY;7Kzp04V_eO;^L=PW452=mm zYNC-#z2F&`Xwh^Sjh0!pk25E_f}2bhxV)K8PKMzw>i(>iTt_R1vthcK1QePxi#s3Z z6dVy$^!*{rWs$(uU?)`JU#lRAW=$jL#xxGRGf8gsDQNk*mluq}*IOt^X z)CE~?jSe-*dO5>hgF*CW;uhuaI^kTBevvTfH0u)52LhI3-Y=7Vv@05^>%N+PJ!1Ad z%aw$YIiVna#BlVa^m36|v9)dRm*hHol>O^IumgqM>`{WHuz~8jA91(jT(S%iFYSHr ztI2A8Q|$(edV$A1!r1rUo7^7ntL#;TxnwhZJj}C549tHKKL;c?;P?iu%Y|s{TI$jh zvvRDh9%xW8wre5zxUPNOwR9vI5-IS#aV>D?Za%qF+COV(lxS9kuU4MrT2yM(pi*_^!6N4c-t|TO$W)nbo!Qna z!H5<*ojI|3Jy#Q(SpTq^;3_w%jA6rgyG4|HV);3-u}qin3PG~RDs5o$)|B|b;My^L zQ?iBduN#$x%I5Y%%-t9&&F~NOsF(Vi@H61(U7e*5ZIs~KzKvLo6rXf|mgU`w8;rQz z6WX@I^1ifyBbQfFRl_{3Az=w?7;}*!%Yd`qd52tz>X0 zKQW&AEDf9aoX`BT{E1AnY&5(EJP6-oKMF8TJvNl$ZLm$+%gK-K` z6SWSlocs_v<4x}4#g$P|UKwEr?ZKkd4uL7&;Li!78)*l|p|=fS0`x`K6Z$Y%Or6vM zcE)f%T043c9YptYcsgli_$~9SMV}=%n+o&w41Zwdu$hNzrds2{*>NvY^|iPY#!)hz zsi8iFFmwOe`I)|3!u@@k9khFEZR@$l5!=^aF#v(d8OLe_>+@-Zd{`*tEH!SsIs8PV zB)HqitleejmQhCF)}4ZiVp*Srjk`>pJC^GQY-VJ1)6KCRx0p5o&Q(|1cBE1=+&pFk z;7!vx??DnCB$6L!|^C|KZeNz;R%bRTISH`g4U$moU3q&K~rh< z9GW^eZ(A(vIAgd_gcs9KyE!&;V_bnHoe~jW265GmyqG4_yYWy+Aj(W4Qk(m3+fpyW z-QHejClO+|svtwt_F7P7T5Hw%N>Kxr^lm~~7yE<^fO6|$MD=jKpD|hX-qbYC@Ap)` z>{KmM!{y0=_raRzy$I(oUolMtcBLZ+#_3x{&s9vA__e>G!ednCsmCx@bhIDJ@iS+} z;`10GDT{)CN}ImW9jmza$s&QoC&V^RrSvTyA>S~cJD1=zwuaHX-%TQluC7}Z9+(b7(xN%M=BqvA@nsgnUPS%Y8d~w-TCiOIP;nm#9?)>d_ z)~ZhPR+7JK-~}V*CBLDR%*2qVd>ai;f#CO`;+AsZ>Ks;uo&DX_wiTUx!T3`)FEjq5 zB3mlW^u^IQ;p4I`(V>T~w|4M@CSN440s$!EFM8j?q0|OCw@aTIsb_rC=Y{7ZY@#=z z*`|5SgyiJSWK2yW?xW#DAFE=7AMst6)LU0!%}Mz2d3~R!@oz1^0a>;%{*^8t3-S$JE(ds~|&YSRKE;&0|?sU*Z_?^i+f&gXDmQv*>6qLn< zn88~kz*&KD?-zEa$T4ec3xDL1wAYa8c%P`RI&pv)Oc=%z!W zD<|It4oUpyiQvc~qR~U6@TvEp8eFx^kzZzfy8I-HM2kzi0Nm*V^J0IvNsGg2Mw|N0 ze&w6A3TgeYXp^>aie4A{!p>w}Avku><3EB~tXPk2%s1mtBtR7zfUW=RnJ1y6q(misY z!%er*PzpO+dd(hAG|C#*t=1JqPnb+qqQPXt06>qi*(gfc4mUuRb-nQNOhRFa7JfthzxS^Rgejgis?rP;$ zeH9$5w2Hjc@{n`f|4rll`gA;qd-0P3a!rufQwFL{Znv~fi=4wykuOXBJ%TXxB0M-- z^;FJxd%e-7RjJgk>L{q3G>0;4oWDB3oCp?nE~{kCeENMsNtO#H%ssEe7L#_)m_=N@ zpG_n$CRnqe!41{bT;f0{j=LtYBKFGW=wmPdjU*TxI>P4*2jj7>)P#3JSxBFsMtsaI zl74gbt6TeN7O8I4pRZUrD zWbuwoAD`>4<&+QK!t9U@k?dST{iwm#pJ`hQ<|SW1N^MCyK?IizEm}B?$-8mFPr7%K zZzheB69xK2W(7En(yNprzFx%T=O^Os=yIOEmB0AJXWko=4(O`st%UBvdvj{!7FCFV zlW2?>+KYnbK+Dr0-to)!r~<#d&2vs9wX~Z{;-FT0XA=&$h1TeS(Ey7DAD1#2s{ya0 z2PJs#R@h#pYaFcdE364koWk4$=YXt%H7AEWNlWwa)<7GFMIjmwjV!Z9ETh>?WKAPH z)Rak;wquuDa_E%*lbmzc4RHL%2(g3Kn(XM)C;EvJ4^pO&p6y>4AP`Ze@B|M0{e0cB zjMD40Yz;o_&lsk+>v77LX>Fk0m_&~@*TeXNq%hO$n;H=GTa|anbdpb!6egq-MXid` zy|B8&9k)YVcFBW7zD=pW*y-BC%l-PUQ*dtLC(^5I1C8GeJ^IYxONMmACN)8C7L+gy z0#W&E#XaM z^_*3RHkLm}y7FfdYK%m(hb!Us~A$t&QH` zmHIi9vdfY;)82WL@I60G3mhnRBVWQ+fxsnOcnXc7hhKYwIEjoZMx3@`8D5lB!WU!f^FM zf8Fa+66|2VL+RjSQa4AY#OtNnK3aKLf|;_3;)woo%U=m5cPNP40`Ig$-9FJyB~@g& zDR0NXh<7Z-2%>%TLPlfY4km~s+A$n7gNHmKY_9xWz zpT819)mnhKOS-dsQaYql5PCtJ0w=5UX=WG-8ZFIaIc7Wz6#xkQ5~x3YDd}o9Ez3=O zORc-3DM^=9p^9ClegF^X1cJAy^XGW=fX7v+Y6p)%vk>Bva~)>>#t2EGNc9{p!(|7q zRNhj@<|cz3I8h6mb~0Kb>0J4g0+J+4Yx|4e)uQ_OmV43=YJ2^CpJ_bfchvN!nzO`n zK9GL3pu)~azkK`74g(|w;m*0{2+s~oVC8gpe@3~o?}Yb395W|5{L%zgCBX#R&xeRc zR!4e%g1zPJ%Cy339(aWFvM~n7jCG+WEJ>^ax9)mO?fQ*V1bYJtW(;pvm%j&w?^RA% zQiXM%%>i6>*cwlCsP3wSSEeQ(L4Fbj3??amYae~O;Ep<%p8MlR9sZ3@@5<5UAo3(q zzEtBZ3Ik}02k}w>VNp3Jx5TfV8Rl85>Bj_`-)pV@yG(P!TyNHD)wG-NR_XaNrf}v8 zofZ{LWpY!mn%XvyiVf*xv=8*?U^AW>HzFXka-8@T>G1J~qYgf-rJOgH8r#Ta>;1yW&p6AtYenK8+FXIJp& z1SYqcICM_3``!yy^57TS@?*X%|AEDz80eYLexH*!>o!0qpIbyr(~o*q8adCTf8v7O ze2k_~D1Cs*D-%euaLvcURYo+G>E=2|kHd0YdG4`>6Com}6o90(6(^@)Z6~(UC4m$j ziC;p5liQ9BJ((;t={voL$NBlRGw8~wh*(-UVw$IujN$EMOH$;MOtOyyEf7XVuq@6G zYZ{KxqQ)=r`brzUUc2rEq-RByfmpHNZqLBCl!Q1RaTyTcy& zN6gneuXC$h8CQ}eiyneHA~8Zd+Q2>i?af0SUU=~ZqxqG0;!l&3DAIHxw%Ty4sMan2 zg{a-maMs)zKYA0K=;lgnZ67An_YH?bn>a*C!m=>NS?Zu{Tm>%cAEZcnUskXCjJ_&M zaNI;MP;ZX~4|RExVBQY!#qh{VkxrbW)MulmSI1=Nt-jmV&vJ3Zjv>P-wuKW4`2OQq zLn#SsIIo*88>_+aem^NtS6N~!FUJK{%7;h z_5)$UqF3p}Eb{yZsOaD20<&FwmjmC1fu9r$5j#qjEV-(dk{itQEF=OrQAcs4Z9NJo z>~+NU5=$lUi)yaMBiDEBxo`I`aTLV%njOIi%|gm^aD7Xv!!Lc$wt-a&fn3H-$O&TN zpV~l~`bK;nUGWjh(=Kkw>`1dabuP}vxm`w|@AjiEKi!AU*7oD(w4dw|E4||rQYrXl z6d%ZbDF~wy-Gq1jlfSz~byxSDR_|gR*1MmKVLQo<1E+|Aa9f_jj%FSDPr!>-B1@05 zu&$o*$EBidWnbM3Z`*nvhn=!^$4@2@pe1joM4r}%L+#jC<3f@x(ksBe`VP0-jbR98 z;|0nVFcBXSOSkCC790f(et+jov)r&{MBLZ(FtY<2w)00P!>*x$ zwuky*me2Gr6Yc<@Bcb%Mue*a-;qGDpF@;G5SY}V1L_V!br>MrySwt+-N5uMf7)tmI z(4WsLxtPLLYj9DB%11i7>pJZ1_`Ji9ko4P01CWA{zGN53cN^=8>mrm?}|kbCX! zp1<_n<<+Tt(8rkx59%6Vwbdb)Ezuo(r}s|9;?FP)rPB8(%M%j(q!V~>PxP z(hI(@i3!IOxv)U@Lqwa9a{&{{xPs9_+@Pl<5P(fZgF7bvAw}>L_YGI?II3Prgv)cf*1bAH4`{JK(ln zFEZ*mUhR6N+MgEZ@S}S@{4yF*p~PV-G{9jXdq*yLd#IoD#uH z=0}x!pKO|B6TDowg^sA8k$jhQ4Ku=3%5t#j`XvePiJQOw2THHpBh=8_rZRe<)DGDG zyauEeVT6{41Rv#!)D9?XMeN(NM#A5VGASYK8+8W>q=BS?xDCI18~k$myB26HGnU>! z#O`ni@y3W(H% z1B?FP*ZX4-q)p6@3srgZUqCyEq^3FIL)y-=J>x! z0=nd*&Xz+4%C6TNYB7Av;*dXxeO1X^yg&zb*%NayP+{6guJEUq=c;f(?G#&gv%2}7 zP7U_Sn}~~hvyjNNU|mcBE&b)yuVu|Md$eyR!LYe`Pcin1T1iih95grf*6@X4rjzny zmJ3lfEz;oRfzOW+p<_`ho{Rk;+)X07bWM_FPl|r=1`*7TGk_Qai)qEN9(Dt*A={RS z($~qi-Ejx%Tn0Ex6Tck&e|YhnWAt?&H2(m5>$BT2T=l*^p2YhMe0 zC&m~Ze7o%Cx}y!5YVCS`CY2y*)^yzKA~3`Tvpf-ePdN{^9AdZ_*L|61)A2S2j61}x zrJvq}XJxm1KVSzmUr~_&0Zajxekm zaMIu;~3Wobji+r@0pQT+$H2@cGt@w6! zj%fd#?mW`-K|qt-Cnlg9xY>N@KX?1Plcj;31<)x0GHE3I{hI%rvCjleSor_{B7pO& z!gO8g{t4g7TqnJVFO_PHs|qbl)4uO2)b?h8PPK7kVt<}~p9^~b6*gNTsLiSFc>-@i zidztgD1LMH`@Qex3VrjC6Ku-IE$xl5Y;+l9Rve-neq$=F#ecH8BT_6pb1Wp5o>Epa z^0d1kuDrh&04braj{TQ4{n(R^4Tb!fM#%!U_G6p0X!e6LK&&Xj`Kpp}Yq?T<6G-p< zxyN;zn8=n(NHeIN_SaJ?eGPY)Y<6d!b#^+K za{;YIO#i`etBJ>PrTTimT7vyXO7Wf=F>MV~t3L)em1$ro`=h2d9iXY}`TWGv!nq^?RjQ>I;pvee#z1J&7qLP}lfx!DLaL%-b~wi(tfGNSwqe){}kO__v__(Bt<;&#O#*WT6E zt)wmN*T!e8M5R=K?CLPrAa;pnLL=fZGCn$^H&fY#m@lV7xw}t%ynpZWt0p|R z{fWBhG!H#`Q6gjFiqUy`mS9Wfv0n2(6+RJ8oc~!3jwSjq;~}S`;=4EVdcr5o&MIbF z1mk4Z&?wiq`(=O&i4K)?IHBp7UrTS=LAETxbwqPrJ#U@=Vlj4jRk^QgB&&sSnvj<` z*O@=*Su2Ru%=6|%9C#@-^Mpvq^$h|$vk5ZaeiTpJ5eF<>RP3FDxYSS z`%-#Vpmd&%tfeVY(7Y}YJt`4Z;RE%hDF)g?fcUDySLX#-cJrMK7*P;ub7hpP1Bl1b zAteDIaQjCNrN8w(fdw2az5Y`oY=%oIv=724p@sZNG{xt#rs^{YtyhN!IYgok)9F0) zSKga)#!c(q^Sv1$pGmD35M15OHS)x}DS&y+UjI<4pdWTayo_jfz%+3~Y673$Dej#= zIIo%5KZaU$X!SRn6$9F#M^r_F95fT>5Dg6J9`DWGlS>g42#>5X)eBXNY?obJDrA%j z)*GB%Uobk~o4RaL9u0hM|NWT_kj9nk+0K*B;}&@5w8YSO5F}A$>H$>dn2XrYLYbvv zJhj{(E|c6NKXGzkOa%MQo0T)`RLhmCZs2E1*kFX4Ev62|%&wT$#}?QnfM^(41gCfq zH(XDEV%OmAt+OwFS0%Lzyc&)5M(ZMsdEvmZ|bL_NGRW`!zw@&TUt+Vz1iEUIviP zAXHL^JzmU>oXzvot0^D8Y*^~-c{$l^8nd87X_w#hrsLA%epWBJ{J0ZWNXM?cUL(<9 zr|^x!r~y#5_uirLj>n<-x}Dm?yUtD>qf}oJA4(2=DHgC%wl2p&M+l zfo>aTUmDqXPeI}c;6b$D01{b*_wf2c{xju}4&*Ovu9|6noUF53PG)9LJc$3!tX!745w$%v)NJ2foC&_PEk)Gpr*qf93^6KXmlNh2? z>cz(hc4d_}9d`KXjK>!rNdaC!i1s$NpTFzfZILc6_xG1d3iHxBt0~nekeqkS zJKe>=<5*moFVEOFGuygr6l4=LdozP8!O4)I^y8*Ie(r5uA6`zct~>EZryo{WpF;mZ z1ACb0{a0Y$t9hoxU}YsA!db8<_m)jxv2N);o%^o7N6J1=WSppP$PQirRueUMg_eE> ziQvvLoB++AeBla1&Dbu>%;XAI)@4*-@Jp+R|VT4P{we0dq}uzek66R;J}Ru}JHJE5jy5Y^4E1Sln9WVCvFhBLLIwC5S|j8C%m zk>k?fbS}3NX`O0<+sI~D>*0or#8Z;~0b_+1#1j`S6@BP;Jn1RBTd4n}##xi$fLmN)JUjj|%zXBIktkXh(!r5s5mRx(`U zqmzaPIU{=-$@gtjS#ntKjs>rOxmKHv{3g1!CeIeopX1Od9U2YGEQ|O6+>=C)g9D(R z@;?$Vuof3e+I$v+(|4BpQFH8y=q;c;=hU+_^*fmL$?ssEj_|#m4>DDCAH)bKJKt8L zHVA<_#J2DMC7{_kcscu4qpiS1Bm<~jY5&}PM(T-A zyOUZO9c0FtMA^98r$-C-rA?3fM1QMequAP4H(gnxF)rr6gSt-&T|!n06hS=#E+g=3 zLR!QV8tVs`!?-{VS1nb4fDqiRSVNzoe)56eGW45)WZdwxtRYS*no3lwqoZ-U87cTk zKD+8tx{@sSRu%m99g|*L4j}vb{=Q>S*$9Xfm%OD`v%M&ps}_S^cVL@8svB5|&l_RXGTO5(m?RUhCjRZyqO&6REu@3wipU zBPmnumr@&jFM|fVPsk3BYeaJ9H{VHU-~4>vA$eSghc|au(u0CIBJ$`n1+sK?^4KB_MVEWUUpd0BXF4a)YdK!Okf-ASrnLR_{B z+g+^X-1N5bfsR~HYuKEziC3k?rEv`hy9#T0!pl~Nm6|#p4-kr^Y@tfR`(9~`y1T;2 zk?NMwQSNE}n-R}yC={uy$fVqIJ`KXw(NcS6g&*+3*(%OG;mfmC5n)T;TT4lk6v!P> ztSarb?C*}kiIP&FU#vl%uFlB(c!+t~fY!5SM(&#*qm+E>_@5dIM3$wK8-t=i8*Fm5-QsIJ^}&^z>+EpUzY|4}$;)B4Q%q3MpA`uRp|Yj}5E^GGBu8tjUjH!o z69807Z;;y6!DSWP(_ia;VE;MjmI$Tqb8zf2l< zP3yyODE%M3NQnuy)};iFw@ERDBeDcr^pnj?$NvySiq}}HungQ~z3+ zP7a}3oW&Xa8{C)C-W7*Tedr%l=wE03BX4eM7(raK6BUso6i<_k_*!^Q%Fw*<70XQ> z7X`1{1r{F;VPAC@kn2HG+Y*NykRp>cj9`807absWp98a{O0;!osyo~^hx`HSJ}6lvF!OG?9rqV&C*V5T{Vy2ZIbCycd7m3VdpClq_hGbW3dT0z<;5!}zapda>VK4dtz26}na$=8JN$g(LGk}vekr>^WzUBn8qQjCW>&GJZ+KI{ z`y;$;d!UFa+pY`|zSshTOg?tiHJbP&Ecz)tbD8f=KZ&R2c2dHfNt^Nu8h45?H3oX+ z9RizUM;iqlwB+`t**Y3d$(Ijb@^2kEaLF1gFMzO!^c9)-Dr>F8|I4w%2ys^#fQ}AW z`_d>`bZgw}-?|vpn+;5@Tp$|g()A1;4d>`ek6nGu7v%Mns8Z5yz1iX$SW0GoqKo}p zlASDS*YsS+=#JaZR*G*TTUfg*qXmduX4``~4izjxTFX~|Ii%#{b}`0p<74+6afryR z_X{wz2XQ3jXyTQS0_Jya&^WzZ)cY{Vl9*&7vPxVw!&dA1>#OUNU9S|Wn#Q*GX4Q48 z&cY+b4PF-6*m*|RWlMo1omZU7;Oqb$EqtaG?Y}Zx1wb5A+dJj{R83d?D@8Y>lY0Ao zzrC6rcuE325p}BEqbB+wRwK1ezk*|MeBYZg4!2FRrwN;98e*~&Im!!GZr(nf}fN6v-<(L2Gqswauk!>36qYtRYK2i~VCBn7j03lvaQ>>fa6{D`HUD>qVQR2OzYT>=`>to~EXANjm z`N1#XW@g4kcQ48c5SF;B+1~p_?*omRzK;vPL`qiAEl1bL&?JgH zhP_|b|E}(!BU24Amp_k_6dF6=+aXw+Os8} z44t&npqZCeol+(6y=$B~i>bN)<+v#)n2>yF5t9ZXm9rvl+h7QjHE;d$coL*P4&ThS{Lo zu3Bi>QfVmzvg8K6?egIS)esIRvPU%e z-wjy|%-4-wtVPaFXb?P6krQ|cLx z>+;$eCoEO8#{b&&y{C_sDpzC^EQT~O98?_iuvd{9(b>(aXOC?lESLwlI8HJ{QzLNr zmRyXrc7rOa&GO}>r8Kn$!dt^hJI!YY54ZTuA%>MyWf^b#WNvxs@WGRbOA+nW z*JPx>omnKk-B&T~Uv$wFW&<6k9D(*%d>RQKK4Ob9v+0!u*0QPu*FjaB9dZ7{10heY z5*_^UudBA-_^123Ym)gh4PT6_7oaI<>c^X16jmpU&zMf%TIRSriW#W*@@;oA_4WrP zDmPm=na}zfG<8POY6O>~&mpvUuiZnRG0&{(y4|1(6$y6_9^rd%+V${%d0BB2GFAKW zv7H|-pQ@t-S>T9YBDaZa3DVtUf4ZCYxw~yNMcaAcyGOKd!vTB_Gfbvw>q~ecr(ets z&QKg@-<;PZPQ68Si(?VYE284KXsoWYUT4Q+O`Lwqo^XgpoX;7_Xq;ujF(vq&P@34H zz&w^G7UtZd%*omT5FNfUuf~a#jsrvo3P|lDu?pEc?I6~hUOezoOMt!@>dd~lES#uQcj|{GUjIeX!u+n06fasNmo_-hS9W0 zSiqA}!HQPOF?l&Mlyi$AsTo($;%N8b$lp+;d8NfB%g*2{qiYeQ8#IH;-3SX2&w(bO z4#sB%75a%eg(i{RozN2_=nqC7R@a_?KTdv60h zhBn4i_#iLcIb)Y7Accbm5vBROz{n1v`s@v>;2-EdLi5bF(d*7@1ADzbkxS!T*m=4d z7UDHue&e_E97LT9F1rCjiB8{o8yc7_ZZUCBJ88Qyo;laCtLPIIW3)&xHuNN-&)4}Hggp`mlPp#i zRMQ#fi#mx^)8>0#t7-G>Zm-hK5}n9OBVP85m)QTYmQpF5BWHD80wx4G@ZN<_01`8n zdesP(S1K9wJY6$2O9J;^dn}&C;g_RpZDWu$j<3$8JBj$`*fI!CX>Le=j**5u=d=KQ|WKrf#50|=R4f6fNfslgK4|R~@ zJUyuD+?8ZptFl~x*{_c4`*l8%+k0N5C5Jx+dU!4!Sj>erh3)STqBVMOY+~I%O>a4) zMVD3OS>WEG1(#0LZsgwqXAVKa#-J?d2o^90&^TPc)5TD`p0lqsZ!ueS8f}&!%L&B5wc8(~M*XG;w0n2yTLc-OJXK ztsZD`ygA!s_b?X?3ntLpHilmkgA$5wtaG`F3&J7ZF`?g1sz!{O1O-S#k4J~{jax;Y zfHlRdZAZILVJC*_Z_CG(jZ0&L^NgEzrRnrau+XfQ@iI?scQ6_c+T>5Xug`1q`?y3S z^au(LX8M;aK|6l7Q;0UExoCm$4xoYOBmx1QIKR9wK$b1b-w~c9gR;ZQk-m&RycL0Q zeinsF4c-fsg!h#=MaR{ipsK_Bd4wtPQ4V+E$unCoTS(YKI6ZuN)dw+JQcK5xuXo7IQr~eT~ne_8e}-|KaM@=epAmQ zEL0?+#Kc-WZuD+F+X3Dty|aPA70&z<5gnSeC0}lc93#bnPJI%-J&}@s0BEk2Zro)O_gOU2R5@@6j(9Ma$-3qyaO$my9{&L@}I^6z&(Qj$#uVS}b zJkH_O7ZUt-!dEkA^V{A@B!;)Gl?LAO63f?%X12Ejgen6fH3{S!o6t~Pk1Lx<_m?$Y z@m+HbIPDYm*4|0&(H!0NTpHoXf`5{FkSt$+o)kZ>;lWTh@C1#kNhwSeh4H*RUX#w= zCo3us!Uz^(*7!yMa7Q^4o(I3Q^m|4=fF{}hS5*KC(R|NMsG^X=;Wm}Atme$vUn$-EVFWoGavJMDtC&g zV;5p0?8m)IR-ajsr7vKr$(~$e`I{H4-$2bR)B{(PjxKqUP^N>ph~;lRnV)6`VidUD zb#GdqAzRA-ML>Xo2naSEiQRO6u>XIl4(-_@pAh!5e=$}^{P(9&LPZ)EDT6n}DfzSf z=zn^=5Ew}1IHa?FnCrjaPlS;iqGI6r$dpn%H=;dTiX7Ns!25kpakAvTMZDtFEw?AI z`H?PD$R|L+^PJNySHc$I=~Oi#0Ab~@{1$gC^*_Dgh{CZ~`DExZ{UPmeKAi%82`qC| z+%n5mY!J;6xk}CJd8(o`pEWzvMnSjqL$sXZ%Sd9cQ!_mU|F-D!6bA<#nMI0Y$WA2l zr@ui+oA}e*>XDIj#dhl4mx8zRGLUsx%8+$f{uS|;mU(}w*2N)EQc5Ku{BnQr4msc( zoY38#z_st5(!gRo9-IFARs!{kc{Rr31j2dFc>06qb!2YoQ0UW559rl@qoFoN1neHR zxgR~&A3V%;whQt|;RynA2oelJFIT&d2CGqI~_l*6j2 z&VUY6-z99pe?L&y%+vyevo#;s)eKqwLB=k#$fawlz_>d89(0YU-B=Wcw#z2|z_Jdn z9X}zH;d8>}9TrY-k(|c2iJM%vOAB#fZ*BYN1rj&@?w*6c&f^$`l&`$N3jBiYq2bZRg89Y&HvP zF?+;PB)SZ9-Yc;gNBFWPK6Os<1`SP7EjL>q29_3v6U*FT((B`cc zNcPfGwFJP11#x2_XvxbyMqD3K+jl7Df`Vi)LH0fgCRHtk$}SdY{aNp-y}8Oou{d|nsR9Fa_{}$I52IY-S-0S6Rm%NT?XHnL zO7BN<4}HI=?%o#&F%)fn>6Sy}oMH~1H?J1jgy}wH1O*Af;&H#BX?MI|-i)365K;i} z8eCH}8xGolF*h!d&ZRXxWb7X-fxuOmpoO>xu$Mv49Vw& zZbDxaRa)k`O`$t)EJT6C&zCPw*i_4>Ty3~K(9%4P1)R>^CV%)Dtf>{7A)oCnS`@8- zt6rQ$agiWV@0YmQr2`G}M~TaOo!i=5-QRH0C}unieTePN=mLidRD)@(q}*zj zYI8XTWBh@v8X!E9-;5(VT+$ZB*}g}G1tTXP9P9%Q$5)A-#oIx-PcBMBa*^P_dMv~s z09l5TyTtVqnIyu-!5X3~R77Uod#^vnO3|zoA><}O33lE1sum}JX6$}vB$CGPXLt|8 z8H)wTDfo=!hDD4f5{>$F5)`D?)yA=o3B1)5K1<0GKtk)pMUZE`9TjfAq8aRRSYcG$ z-Ce*viHHXKHMHv))Z{0!78TD2=xUJ0n@QVx=y4QBGUbwM8D>!BD&wOMTmBPo= z%vso>Ig}@}Zu8vF|Kx;%fq%ater+E)R>;5*I$I~8zJFE3u9#3+vBI3GNJj1LLWH~z z<+rnPUZi*VOSj6{hR9`%)fY(b#;gEYzm%)@I3IR4p}3QDuq(pTGzQL_Jf2Tyz-;x5 z68NaQ#@Gdo4N$~%!u1^0+Ud5tnGB2&Gp zWdzbDs_DnW63l)*H|E;O)+3YZFj}DF6grLLYX$2#JEpd~YtAZLagS>MJ{Oi4%y4pq z=2cIi#G<5&mJ&e@Q)5ArvSc_4+1w-1*RM=5M+@)M24?IoSwx9g32n{KF4zytoo4RO zz*F)Z=HVjE@a1iaEeAv+e*1ytO$_5>!5$7<5BE#Bumc@;v&fIpZn}I)E>c$yZ`Y2A zq&F8EZ=&bq(mZcLa$9HE%*hHNHQjbqem4AenH4L)M^k7VIPGg6?>BpKz`U3&Dj3>T zz=D>?jUWsEz$QKan;~tLU4qjdc!HIv)^qMyg!8NkE`k|?1*wHcPycWMmK%4g8MB-w z$0*jl@mHr`wZ1vg88E z73g}yw6Vn*Dxu2;L#0A)}eMj=oF0oW${r7(XNaD@L1eqP%TBB)LJ`ixa5f(JK z*F?r;2>e2IV_l&g;FGG)UM(YF8n;ibuWH+PvA+r;C`3jF^Nijm>2AEh=d!agFXwh_ z_?6Hgim5{AkmwTStGg3A8eh{3nr`J))NeR{&&FRB-k7udc{lq!-mvvtEIFSt-C=ik zJ2YZ>X(8Ek$g86mNL`Nq;+jH|U(sZ=C_em%apa3ns~181s-=RWp-=9ez#{=~@mLgB zayWN7P0E+&+C8Eu1aOJAZ0IA`&Rl z!ANg0RPr0OdWBE-*-!>%+r>IyZs!?Vmz0h;ZPUxk=F(5&)=c4hKk$ftDiY2FJH(>p z{73?YM(^?Hk8$8wz0f-0sEO84dU11!3J51{{@L5EH<9ig69HnFRgIX9Hlinca0McN zbla1uNa1Z}!3(*%ld$)#B5&a3GLw%`9peg>KVc)!%AwpN=vN_VJEOo5@WR^{VhwZa*A~UvswS=km3Wu% z?=$i_oMDdUX51E(08p;&PL+5cR}?ofFj zzF}uW$5tI6uql>JzJeAVe0hUHM->|u-0Og_{aeC9WEXQ* zoRqBNp`TQFm19tK3@mdlxlEL}@{&<$!>mB$mv(u)G{CSwD2?xC&rBY}Q@;}0{a>LyD1e~U7 z2c4^UBR4T&td^Z0c+gF#b=vIDE)e~>>My4XC$w%t{)DtRc}>(n$L6s2e_XHDQ~+pw zi-G1x0xLa;TV$6?KM^;8w~S@q=#HdBSr^ewc&7B!Dx#j#T1nyhe)`AJV$#B5Zb!?k z#{;XZ$@DnMeLoRRd*rzVoXw4S$3#H8C?$fwy^v!?kHBOZe@b$$*3iqG7_HSyApHpk zv~QZ+{`{Qav}Rrib0z z#bYZAl9>hsAaCNRt2>!n%fXrVR%F--_hpagu-m=;=vOY8#yWap1g7nT<4J0xABT~u z<4%q)JS`=^8eUQwxu%iEZkC<&rn6J?=j`!9~a z{7vl*G=Io6Z^hU#h;iQ%d^9_|0I?n%dfNVcK*@pYwX%m;Kku$*2z@UPMPg24?PD?h z1QaVcInJ4l)3)~tH|whvAzlVAJ}le*ZMz{9vn+lsBHZ9NaJ`#;6Uy2oB$qM%_a`TW zmscMvuRDHqsqj=>FWZ3k^!gB28#JrHRIhSSu8on?#{4c{sV7k)^VY4_TZA>4XyY+t z2W%Ox-Jpn{pZ#P`J*We^+bHa376{*kArS1_V(4-$n0=oe1tY$o)9}!(H$o<~k>4Op zC~Uk;ek=@gDwVMYZ7}tX{&dT%zov{~1;+ZI`@oyx^zL~v+Vt74o-sCH9+aAVC60sm zX^hgoGij~t{15itDk`pR+X4*)4ek(vYj6ne4hbIIT?-BF4#6$BJHg%EgS%Uh!mDtX zSJ``?bML$7w)gk`ZfmrHTC0jRb&NU3=)KQ6(m~)zl&m&^5^oM9S@?*;gOhS2H6*MLq9I6xo&n#Ow2rCDFTygFuk(FnE- z+@F7D{Gzvo@lQ+7i9qu#6U5hjGM>q}wY_0n)0D6|ahnZ8+@J={}~ld_f@Q_ zp#`KWsH2`-Hm~nG-;}mW0JuHe7 zB_5azw1}!wy{f#-PdO0)Qq7K2jLvBP> zI&#cE)*&YZ-M;I9FN)F}=XP(14{cm9@>fQC(AWLbuwn9+g-=XfH^fzt76P#^dn&Ux z$Jp&odXIY}?~Js3h~kBuW zpZix&+tNy>SL;93eM#;nG%^5{@8!v0+=s?d*mGvf3XSv0h5Pd z+-=T{LPTcV%}|emY1&ouTQEj5i-1MtB!WXT~AqZ&ycoI`3TQN!e5f zEm+`S`QQ1E445yW1&(@qe-Ru1ac+<*{`I4R0SWM^!lVuSkL!@PLDa%;IoL_`{)vwK zl@t1leu!B@4)K&_2<#Dm10Daq`1dP<97H0>La_Z`+unaK-zgO(Ai6?o7#6sHL&N`D zkckpK4H8nB@^VZ37w+a?L<#Af@IRp`@+pRp|F}iU4k>x={TbaqHTka!2;=-=LP+Fj zBLC+t3rNYuE`O{3YYp@7TKUucLFI%C*Wv%?ttm*!Lx6o{|5cfPm0ndGQe7N>GuHpS zm5Vqfj4~do{%=qDcV|FeOTR;^O9wq}^zSSG{zDWi$NzmATGjuIwn3t1|JPA}_tpP* zM~&FzER@mAa5G67g#yYV z&lcoY$!c%0(dybM<=-?H3J0P}e{ln)`F8;kVL^NcEv~f)+nD=v;VIZw(UKhrxQ|_PfEbNS# z;sz$YKg$-QVyx->IVj_b9E(PbPj{L=q;B3oPM6-g5c-joObsHXMAf2XExv|YC_Ys; z)#UJ?C;8XpM5k?1onfoUC2Mw6@L%RsxQ^;xHg>Fqd-d=2eSIN=2&d$~*O5{C8%MKZRJDqNtYY4rlO1AmKd-A`)pl$MB;U6z1T@>b6bg`>e;mJ-8)6>SUOp(dz zYGe7TmSg>yqI1^JrZ@U0n>?SHxH}WCtQVK+Pfm=rhFQI53uFMB(z_o6(|`GNTud|( z3TUIcW{-w8#R+C&MzwuisD!>Z1aYQbJxd-=H`eF~)OJ8*oK~ARAxN9@FU=an5FIB7 zLSMa#EB-PXK$WRQCH=#yW1hfs4}t}Qg!WE^HHozQM^mMmVzW-M<};}!zg>iz{65mX zI7&XIbTse$u*|0_NwloP5(@t}d`#|XKaG^z%ON(5u}7;P;29gd{~XV#-QRp`hule`3Mo;N<=C!qZyJe>3sU|NJOPv9(b{ zRkPas5TXe6h0+7vQdGdP?@()w3o{dBtKR%={qWSG`X=eKYl)!ObD6Vb%#F%(gloX^ zsOty>Ow9qNYb>WEx_;TQJXMjR`SLbXsNG`RwkD)iF3^S&LPWJ*-+b`sKvpM0eNHa} za|)H9hB6Vli#uNS)ECYZ@Bkrvo=&OY_)a3M@wN|6{Jcj^2W|2qxnQFGKoT~UP&_zq z5_&ZO;={*Zkq``_y7S4I|6zOqXdG~@yZsMxEwPk%qlHCotezQR^()so3}gmpQbM4V zIB0bu?}Z1yz^i86Y53R7s|o1^y!oQ=TA0l`eBwOje^P4O$Wm<}dvg?PA$Y^vs3)pTI%QoL#TZeW6H(@_^}mq0xK_I*Kz|uQ zSWNsGih+Iv;iLl<=OaCQoWNF^WCRt92-IRg*PRVuOY>ksAnvHgesT?$_kvJ%F)u6b z$Z3_j4Mn}udtvp;1(5v=x{*xF?%1s0V?pDVK&kSzf~SLls(88F_bCcoO|p^dIc05x zqnLoho{v&h+O$2Iw%=-Cg7P4E`{w!zg5}msZFH;aIGS^zie9&t(bKSVd z=9%unHg91%ERvrcL!hn})&s|G(tIZu;^?zitJ2qa-fJMjF@QWlJM$N-@tKl6+qp1r zr$43uF2_R4#tZI=cqgwb>AeOI*Aka6s+y8MLzpT5f_>RTbgO=UgN%Nr`@7`v_!^sR z)-lOrX=3C8H7jecy#49?+mn=+qGpFQJdY2*>6{^`xO z2fGKP9a#48D%Y;H9W%!-*RpvX0Xal06QT`$nVYCYmOx(in5U_*&ySE-M7u_&N$=(kpJ$}=i>z8IpKj1W{(%wcBV1lgVs-c!w%2s z(z%V6W1lWFCDOcUm|-+E8hv8Ct{g)M{3Th!A0a}GPa6grp5AuZ3Y+940FTIrd@g)R zZoRZMiqYJqq*Cm^m-w(4cU=y3dmh&pJ$07?*KBE5>(nh*1q*CPCAdcfweyi{?onAu z5{r4I6Xuo$9`<&&O2g(LH=;0=CP-vBPD#~$k(Sv~D{}0fPbQ?cE+lAnVFNA_-J;vJ*pNTrVnr!C^}>QRzM%X?`z zgZzEekadV6Rg^Z3#~ZFvj0oeu2xs$E^NJZq;N?I7GdXs?D+Va1=Z8M!9vt0X&0DTq zhZWAU$Q;s)?RivB0jb2<3J4ytl~Iu5Zg^w$ndxVzdoPg|HH^n^5^v8I2DMBIK z`=_{-U5Y8O5E8Aa9@`n=@jC0zApiH#72`45e|8Qf7SL_Z51%P~DIMJUg!cy@_;40& z&WSt|3FH@f!45()5LuHx9n@m@^LCW$v0y0`7F})JOi(%fO&?^z02Xv)^(|h-`|wJP{83q$jV$*n8vLkJ zO@`IAOzK)v6Y5$EK{cAJeRp_)$hyr)U8i`P_wqDpct4Kzo)$S0&-!!PO@4al$g%fD zbTz$w#J`{$eMEISNGaY=cV%3AjPF#BY^jc5t^6GydB~^MzSJArXl=v$2Sa%z=+8!2 zk`fq>1{3m*cw(UtPr#jfFt9~YifnzRHmph=gQ8kqWDCBy+rIbh`WL{fH@DAarT>jn z7$cA&PaSR5F~;VnqfP>)+$~`VhSh$0E1adwT1=ptoLA*93MX+ClBR-Ee?nQ*BAwkZ z_&N@G2c4uRmY79I=-)Nr{W^L*Q9zs=f`_YNhE{5^BVc`IWb*x`K_;7+R9l5#8O{%z zDCLaR@=L+oC*YwgG6hyZ3!~bWN76phleI!BBenpgny;Ao@$GH!MYkPde9I^1*kqnu zUr28g=V#+v3g08_+3Ch^ec4TNkKV+qWSmpFh3Ikl-|KREb2oAl{e>@4jDwlVeJ3`- zMD^A#m%;<5bTQU$Ma=w^Wq^Y(s!!7e(j&Lc*@dRlfu`<2-eYx02e(gxlACnaz_!d2 ziBv-P&1x*yFmNB7B6Um0k5)BDQ49JWr?#EjWbjKMZm8IwSKU}p!@n{(M;Xsir$Tu1 zX8P^HjDgYhDGO2NlTvXPDWj3JuyyB+9;E9glCPg(-a@GX0qyYg2VM^EOMWk{JpMs( zmOR3YSm#8mHht!6LU*%2!Tw-eSxl7Zq$f57_dQeLpdHt0^DrdbPr~-sIDHaP8xqXD zP|+pLZ%JhTRVG3E-(i9*jPu@}TT68Kv_Xt~sQijH&@eC;@qvtsnfpCJ^&F-e`JN2? z?b<-`j3A;fzx|&;mYhLp^_q%jZ%Bu$Ma_Jx(jH7yPWTt@agcr*; z+zxigF9Bwz1)j=I_|w%^pk?*60$*#mmqCh=3ce%F@0s^MyKRcg_d-V6@-FLSFuF70 zIxbu$zLrjWbVfqv6JI&b6S&=GLC!oLAkRtEH5>N!b&ol<_y|llV#f!(8ZupS)qRRW za%=Wd5bEXHj=Hb;qi-dSA*h@G@Ur(bk<~syldkxP;!Hgr+&4K z#C~WB!P$1t9AxfY+Z41eq)JJgqMgIH_8UN*NHkxQi@pNh=Pq?J!_b$NgJ~R4y!EV8 zpy3X4@?X#+d6hTcWB(CN+7cmcM*A*hl<^?EA1$wc;`l0F&;+4sGYj4A%e<6_5JlJY ze@`YAMl%VTi0l5!8p~9HwHucHkm4r@3>qR~(1p&&a>QV%HS^qx#Wp;`U?PHU!a}HI zh6qu)^C&0V`nMHppY+wnY#^a2(oU+rw3MXu`Tq?`O~0?f54(=9{c-Hv^{6AU50A&v zz9mdXYJdUr?kc<0#9v$Ojv%9CwDbxzwWK3C?8QLvR%Ac$l!b+c?f?y%MPw=+nJfF& zcgX|@WgP7&iVm8{JAGDczI*OLC;5447>EdMjxu`>p~5y;6f1d=jXsYaq`du$ckXXR z?&pTO;0co%K2?$Zv*(@vi|Ctn{io^UbybF{N(n*8id8zhMX#0fD`b0v7fr#qc&u0D zh?7Z@)D=kTBML#A1E06aqv|6xaOHn8=|=Dt+^xGCQX0#Hhq_Dz1%K&E~x z{$<+qVMfm2Q6bIahDdoOsycBMWHdv>*?NVd_?wq_a2FWsQsI*Qx%a=gtrYXlf_Iu5b(B!Y{?@0jOM9mI-q>QA@mA|x^MT` z|7|8GgKwl4;Eoe^YP5$|T%U-f>P}iRdD)so0L(ICzx%9PX|}%b;Zt&BU}Z(PumA)F zo710G#<^Mh0{}*Bs5p3PJRH7uBGT-q-Ln)+N6Opoi5dk&zaziR@^Wi+h$8Ye)?ZG_ zEI$4<2$%P4)X{{l$CI0ebFqRP=>nM3s}@pUG;n?goY;6>!6eEQVksVSCwioxtCpRm z3f+PZ-GtZU6>YiAJN+Yr0D`b^{oek#h9H1j>VT|&0u3riP)(I`3g?G)BQp&ayW%Je z_+iA6SE92F$HtA&|BR)ndBSdEE&vW;SO;2Q=rPXnZBp3F)B^rDe8I#rO zLmR1#UnMklNq}RoNexsL&bTLECszG{J_g$(i$MF_^6`+#=xB4K@7r{ErTCwThn_fHB8#m4x@9Ai!W9O5wb zo2eb}J;>Ds5h(3YgDK2GPPZNH+y>pS^s7mhO(mh@gqawMih9ub1o05sEVEauWHP=M z70L6D+)QNH!KrpHH*LptAfO3Y@Zzo|c2c!;fb?emYoM+n_WFc+Ff2zfHM3?XHQu)H^b{Eoq0JitvnI|e>1%_6 zNgZb0h+6XL!$$cDn*vB}dOB^2*H@0?fac&jZO<-&M(qP|C?i7%#QFeDCK;?~Ba4UEKX48X|Uo>I)K8z^D``j}MR^9|MZ+)AIe5%l=yxC2GL|2L_)wA`V~aS*HknAOkA zlZmO$CDxlHg!$PMZv{%eH@HSE0*Dj$Xu&jN;a}Sq(LJxoDeF2<4khDobhkWum{r&9r2dC9zR^OOjNz+8b!v>*&!)I z-qP*<*t3EcUUBL32h=b}^sl_t$K(iR;t*J~v1xdi(f;P-aKzAeXeX!ec~S83o&;9@ z5idkzGGo}AZ#TVBhDx#>53%^WJ1z$9<#^c+s8|{o9~N=0TS3lDGv6|AA0mrq21#bb zVJ!YscDHUfGKV_EM3g`z&!hvJ~Q>D#`k zF04OnZg#FG*%@XnD}TT|-XXcWkO(imE$$6}<9}}?0q8w8#baW)#*$XVCx!JU&jved zV2+-f=rQKJzvomPN!3S(=)WKof2BoCDf|u@gscokjUS4B(%aSEFMZhRxY<-#KmejB z|1pR;ypYsLh5Q2ppYWSe99?#o5ah$SJkT2Qed*llw zKcgAO>RpK8$ERc}H4TyCubVlVjQl4nlf#KFOsVfhCxgDY1L1G0sNIYuLkbv;MXQdz z$vY7={=b}$0rw~Q1;rcjKaal|Sqd$@mWd%$KX?~`SDj)v>kJfcg4d z{;K0H`8$BGuF#zyd2gE2-R3o>PTxwy9>#9iBL#-*%G}`f9z;HJ)YtBm`7>g}qbBC( z$J7v*kuPtKb6hMi>=sG}AMzShM=p@d!a3DUz3G$}0}|U9^&Xn&QWk>QMn*tIj=Uqv zzK0MQs42QBucj;{oiniTV$!}ZMBa7cTlSAF##N0fRDk*_NHRr}1Qk#=@A46;%`w{YXNZM>h{{063&&X{ z!MxstP-UX#C5T5?ZR?qYGf|jLB&t7}5o}ky=wSb=d`xGXyw|=c&7Qs0s;+zAtn?EGR z*Z9o@%#OA9;dK)IDdxEmvPpj!XvGBhF2a2~A;`y?;{VH!q z^V*r)bfrdtGicdancYBN+)Zh{%Aoh+uDE2`h(uh1u(<2>j+_~F=?h59!n3V<>h@Le?H+jbq{k(IJlX+9`xbDYNH43%$k z+UY`#>K5P%MtF*1wLgbToMb_C^92SN5kJ(ztdyE#^1quPS{QMo>)~F9B$%=n2VS!~ zF8vbHxnEzF_B}uyw;Pj7OLK2>LpEeEwV+`zMJ@ba)6q$V-%$x87)X=CNL#>((p+t@ zRyN;mv`y}L-8_1kyWTj}*3W?SGR}ec%qzr=3OXqJ^Kbn3Ug^9E5k^4td0(f2-{zEU#$px9dM|KXP zAWe9TXMU*^x5dU_mw-=l3|(KsyR*W(Rn4zzv9x{q@;CKOvfS+)UJBT+;^+p4wYqiVXtXB}rMrR7x+aqcB4PmLLer3miaysd>8TS>^v&O9` zS#nq7Y$CU`DwRO&R%KZ)vKW~`FJ zCs!=fRYG6Vo!Y`V*_bf#HwVG|rj000|@zU&ys_JU0ld~6Q zTbVZ=pR7NbTh^RTBK8iW#gS~!-4l+zWgeGX9bz(q?L2SqENX&@#fu=Er_}6z_^rO5 zijt1SM6VJ0v&wgIu*hM?`D&Htcs$>3jEjDO8n?*BJColo(3#ov>qNj(6F=wp4dao= zq6j;nn0RjELyapyds3$*`_;Q<%!{Sdnk8(O1ap$*oaUuaFEef1wpM~g+oUrV>RoSe zkyh7M^C!*2C4>;>AhHSNmn-bz{7SI;xef9n01jQ;(2Xl)uarxV;@Zb7Yb}Wc8`Sfot6*aG%;-~#D6_z^QmXqh1bha zt{m98wKMMu)C~043ndHE+;D(Vfw9I>92F_nzafP|CD|Cc0?!O@oNo@`ylTdb^1U-$ z1AlVpnS|lxA}-u%n3IAeMxIEeSPuLtUpKeGx z6bK#tWM5XBO6(ZrM%CSfQI_Il0{2ZWi=q^_vX2 zI6Gz)pZLYrYU}nI9BS-uw>JkQcM$sM-Coeh zjN7vs%>7KXpLo0vTOX55{Un#!G(8b@GQcW$*2yDt_ssJ9cKrQ(Bkr$xLd;9LwUF04 z@L}$gAn39YEK~LXJZ}P~@7w^)QwR;@GAJ*bL-o^X^xg^Hnz$E@EhETTG7?K|@X$2v z##7Y1{=PFXJ>&d6`b5pdT~v1e+*S0oUX)f_PiE%|2dbgOJmKA8gjybg3O4kfc?heO z4Mi_$g3*CK=Iu6)ys?gL1Pgk`Z$#2t{U$MX1y>|xq|!Et1YR@Yroo#{Pu)>!XSq08 zGv=33?m8FaFgfy`e#qGcH!R7UeA7-p{1GHax_Oj>#%Uj>e`k*1_*-7%k!@}B4Qa}p z>r**ku);>$N9UlxXg&B{FqBuZq^?pDD9>G2+Mc~EpRcc}_jRz@FP8HNP}PfHhc<$m zO>!)=oK6eajzmWHVdkL7tz9v4b5v(rzj1c@y`OjaPR{Bqq@-bd-L8Cgw93P(v?Hpc z^7yNuq{Lh~>=2(BSJbfdBJ&09$8DbypasuJfrzXg6`UaLH!mC1)vP6pe{mW>67Vy> zaU84bZ6^gbRvG4%?V$IDAsFd4+#njT*|zzk<#2{6Nr9Pc&OzT79S-$eP}I>h07G2% zqLcYcmgs%O*<YiHbICO zx7&m7!945gFgMsR_?x8dS}%`EE<8jK^ApiukrbusbJ7`)zwqkavgXVE!`I!NF`Uxv zJK=3-YUe^^+-o+`;&`-Xx;VEc#S=G?MARz{@sj*t5!;KDZe$p3s8YXoippF_)T=|4 zt9Rs)!UrGM(q`jNSw+$s@yc1`Z%ju4pE{Bu&6COP81@#9gKIH!y(we#6$mmu%gbsq1rjd;o#499p-k^l1%fBQG@4bX$OFXt48 zaZ^yqxt9<1C3h9hkNfpP%vRv=E8oF#uHgv4ej+|dsXx(q6|euB2gH8Zu#Nbyzc)2O zuy=;BaolML>m=vCY}0BG%tb)3l8X5Vtr{l0JSJdPCi|XDI9fL@ z?T5#tGeF;>lg8nbp^8@P?2}x%wo<($yOoBhjErjvVB*MzLs>SYX6r?n-9W>`FheTJ z!sa`QFt@4wGzL5DO+cf={y-`an&FfJwcQ9s34*Fya}pIXtPaYg5T%QE>Ls6MUF472XGuaCD7 zX0GJnJF7VNI+KR3+aq_)g>6Hca7;T6QbLZXlFefcLMa8&f>ew(fPe@*aH=rQvLk`@ z92jR)&Vdhw-1MI7vx9TjC$kp+I8l}eqCwhjv3uUmPlwwCUGJL%V76(M#N@T7BWdbh}@C z53L$=pFPAsAa+&oNln(Kd8E)`1EHJcsbdr(Mo9yhL|H(O`c%f|C&4^kPvD3kzm?|d z#s-77&P#^w_#g~fvp^V?{oq(zqfD{ItFICUO~N@ctNq|x!t+S`ggue%U%!18LDY%| zvN+6Pq`b}241#&s@A9i_db9o5sIuI7fKo*`oI%WX$$s)e|!_Un&XQYi}P%%$x@=#Wok1QRji4O+3N+k zlYD5vf(?9Ef{L=P#sy7)RAOUtX1}Xv!Y;2K?pc_PTe)Pt?(;Mxgz@^MYH4JK-kVq3 zg%bhTn$M?By&uJ_v3W$%Np$LvzKUV!4MhRiR9N+&ag&QxIQlvb1vE1Iwo+0J%VOZ^ z)`wc0+h2+ccbwhS>@gX~D~#A~-%p9U2eTQ4+Il#p;gqjQr0Up#uU|f!#Ub=gb?If> zRid{m;dXmD@I{xRxw3pO;1cs-$EB_vd&d!$-{LLVa5_Lo2CE{q#ZD#5B zeeZd1Eagc;QzS5pEEb^pwh79(Lz!B5$_m2@LlNUeE4&!0IYAmqlng_1va@Rm9k1^( z>hUSXa;G=k%*m_MWM_aQOWX>gZa4UDQP$5VR@Ue0Xn$1n*im-&H?fwY0xk5WAq`j@#>di3ZT7c*8&3_N0Mcl`p4bdcI;b5&yBv*;w%RCtCgUZqb%7 zLOR6nADCTlla4P3BM?kk#Fx zs0))C6%i(!&gbp>CEEAX)rB|cZGTnME2YUu&UI>g3&{I=c|Pz=p5DBAG6F#j=-H8Y{R&i%oHF97Fc7C~Nl4G_i1cIi zOSo_+X>5((5ku128$^wbSnf|S^2tsJl7}#J)#+;rYx%rtr*fbBvt%N)|{ z`cs!3K@na-4P!Db2u4qCWDIfT=g#|+@S4X2Ln-2CF&uR{3=E0N{y&bdVSg(!{BKBx@CcA9d~er3HHu$hFf z<{V@(9Oq{AeO2myUHjF(|$M<+K;ymzOnRpC6bQ=$}hGexC z3T74FAe7H6SSAVZLY7o6jPOP3_Qk4N?gUp2j(%S({tXKg?73DYRZV(whD`0PMuzw6 zU5qPy3neMGHp0ibQ<#7fUdN7&)$C?t9zb{CP>eW6q+--oG~NOSnmikpS>neJ`41qW z?BkBRiN2j&x@uPnY)3}Oi62mPkf^IQF)L`PV$u!y+Wq0 z5-XalRC^m*2isz$_edL*emTNO(&CH@1!AB7_Dhkq;jYupXl?4H z#ABc;8*Qpmci~U5=6hBI)@3yEZRDA;zCb2KewX{70;5Dj5VJM;4gK7;mUu| z5;z=ZG9dwkZh%k2cWJV$7eZiLHj&`9OVmT7Ekm8~yNY-QI-+yWKG@U72zagX*~IbP z53=beeX{R0hNhK%9A|4MC#KL-Nf;l=|z3T7)oE)%81)4c-js zF;9kBZ_Li->!kGtghX{2i!a!arR$Ki+ncLG4NJ&2$I_+KAyW3}941^WRWG|zC^d4= zPI~)e{Nf1>Q+|*zD{2B1Cms~Xtmpa8&OYNyOaFxlEdr0Yt(%Dzj~$V(u0yAJpIv6d z4|h^H3^q~bgbb2h_MQiPiBu!;uOiGXZZ=iL@wZ+}f`~n&YZIvy2tm2xAF7ai2^jR; za730bw2vsP1*!Rk~q#kaoTxrJj+(W?`tz1nA^I0r8Rud^cJVlChEl; z@#nr~_1_{kNb5(f(}9Af6Xkqe{+iL4e&t?WaGmKImF>U9(cM${@* z{58sh=t56_K%ar@&VAUJuQ}qS+es8G5XSJSS-~c;XOcOG4PD>Wh2%JVOfbkk0HEp` zvzL5_xxLp-#x$;@fg6f>>j6_^?km*Wioa8f$YbM?@gTh{zxGk{4#-H)DG*=d+e}@T zW!;Efd|J{vYdGGZ@GB^CNjXWhMJnmG=QPT-BE4KfW+n(goMIAO=5)${IVu$NIPodK zn9=YJo^)PW35AHRi3%Tta*sxu38Y@Zxy# z6o8Wh*|~0K2>wYo7j_bAl#WB8nL{feFFC+?{)rmQmlnK&5)kA@C=2=(i3 zjx+wwa=obbm+#!2kn$0L3{+#HPBqG%@G7f=@u@>l7qWo!8I-?ccr{T6612>tDNgK( zrf$e6Y9G2GiOvJq1W9NdDX=fMI>cd^u_Qxa|im`fK8Y|D5rA=bzJ3%}_)yAcsf6_h?RZhI~qj$}_I?PcojzkjfRv|OWH zk6EO3^j$(^!_dK?)%etDj4qmCZFA3i0Up?QE3SXsilkqG0N&b7lrx?wL(nkpZUriY2Qls2mjLETK|_8fPHxPb#6OA zajDHvVFdnZWW21pfVvnTGr{nIxNK9WK^JK#%;fRN^CsEa2 zb6b`-Gvu_0Gp!>>?@Y20>p?6k2~vIZr2*NmP9p-7*qk=5T80TOHIKAjGxZp^n&p}Z zmZ^gUgSNDqu_ED611O{)iqNi2NrUd=hG}&PU}P#KmPER*S|hhAgB1N_5D_u#k<1Kk zk{q4~bi?Y`2DXnr6X4T`w%$q+&&9}Andbnr@aruC%YTWmnPj}6uP!QrQU_*hvOIn! zt#Jj`H@$9L9^UNKpAL~@mFupSw~V%hm_j9!RIMszycp}6&xjXwH-iuLI@0l*Qf@7| zyi$9kI@_O$y=@j>uCfm~aXMd#b-R`&4TIY5!4ro7gOvTvhbZrj_I#5PmoG5t`b!-% za9{*5b3fnHi5$g=V~4^I4oa|J!S``~qMLHdrjREdU_|%9LSq}~_piJYLGk zE2OBsd+8D@JovD0}XsLNPS$V*ZD3R6nxPgF={HXqJVyMKRf!uICAYVxYh*lpwZAdm}Sm%)bM z?bDMW(u;yU7dprqiErJ`lqU;(K7Hu&Jcc;;61R5l>o{7=;E&qMldIV&hi+bCP-NqNoFDeddg!+4D6 z>Js_3`e!#Nhm8U9NANVRv`>G}u(sg(C2E`c3hntpnS1DGreHon)&hB9iJrL1swd9K zV&b(sQiivOLCPaR12??6scd=|Bu#+6&;GalnH$fz9ed9D7)uEl!ZnSSGQD4wm$_l0 zFnsjV@rPU++Lvl9JeM81L|JCfNsw?}qn~ypmr@Q!q;#`C`))_>UDcARN&zL?1F~@RPN-D0Yr5G z7iY4JBrc-jf#HIbv4_rFAQmATy#yVKBimN`@hdBWj>{Loiz2Y?a~WSu(l(Vylv&I)dG{65zf0Bxj5K$g1a2^ z!huf2gA-r5ni&NiU$!%jW+HH)B!n@BU17E_a@SKAK~(R+keDpdJ?${gw>}DGhAMKn z>qsySc0Nc{RDUf#1f(HoqaN?@@D+5k>SXl?2(N6!bcQA z{H_@Wyq(~9@>m6g62CMB1&Wx5i=bu?aYi{&*ZkTV;56guOrk94JiBTVuji+~vjDj# zb=v42pm8ZO#AmTs_kFm#-x8jYt6EsPk_2X0$oHQ;;}&Ih6}`=;Yj;&Y9U_1#s8RRe z{@|2`L|uLbd7Ptxa6j@Kgk4tX)U#av&Zv6bbbi+k%A!a$lO5DCHb3)?s|FJRTWT_u zu&J7POx}>D_vcGE5(-^SQ%pZ4YxBYjAR(6@;(1Xc?u({hU-9BUXTO$`8hLi5wJykh z)+#K^ld}=S_#Pn~+@CXOBRl#L`mg8FLf?mUgRleB?3OHjvH!HCA`G^FxM=r4&KEjt zs=wd=bNS!Davm@gp&A~t{cryI_`k3J>m9lxl&b#Q|Lwc}?ZU#abM0=(D~SKwZ73YY zYa&Pi{^LJl3?sp)2#-e$PQ{Le4_smlL7@c)|h|DQoahv=^b%K;5Q`?l2+iQrWnOuE81(Bn zHd{6Jo(*I7e21d9ijEg5EgO?T?1g+TaCl2m(HqW(q_btlZyiUCXUFyv7EfjzgW4XP z-;E{N6c&ktmSqg)s`VJ_Y#veT8Yp2Hz_5w+zISJ8KpH7mR>t%>j@1GZUw4lXch{3I zST61Ht-14m6*Csse-_o&?oW!|-F57p<0g{hOwu}*Jm_<~i}(S|($JM>K0& zTmG)>)E4{vmfgy8f*w_krf%%--TM{H`S0j*?eiZRc3bdL9%h^9)vzQo~#oz z6ow{sl0_;^B^>t}=Gdz}jkSL}k}$CO`kGc+&X#9A6T7Ts&7nbT`w}C%BG#VmdCCZ{ zI|MQ#7pfK}(tKj!;@SL3gyXFd7pD0I_;M$#YBF$k_}H1?_{od0-IxMmMe2rnWqZ<1wTIY_xHL74)vr><8_y*N z&#yudm-8pdxWZTxifm&Uz19|WIMdh}#q~_uuNvh4Tp!4r(&53y(VT@nTHqVq{4LtY zE-sY`RAJap>b4DaT0UPP2Z8R?>2TRD@Lt{~@28D#+rGU%nCi};>qSiyU9-onSTY)% z&;exmxW+}D7L2KGH!dr!cyFJ=0uc_uZ)%nt1S%P0x;Qqw$L{tYcx1xIA$S=UWuVR0 z4EFW$+Ej*zJ^U&$a)etLo}eB%v^qZu<#}YkrjEVr|HIx}zQyr$U84yhK@;5F-Q7Ke z;32pL3nb`3a2RZG3+@^$xHEVlxVuYWaCeu}{N#P^=RNlya6X(*T+m!SQ(axV_Fil4 zRhp-_UW$s7MAb2+CHEaHSJ3;pWWgHDg_BJ>CoUoxEJ4Smx8qO=meAkw8LT{q;Hq|w z!B3Bk4mUIRx}+JOU$V8J?oW24QsFMzZWi;QH7A#wcC}&dJxP<2n6JF2k3(Cm8AHg2+}uLb*$GTn|m89^f<>zJ$cZj0di^W%e&&GeIj;4JMX`%u@7 zdl}vh0n`{-f27A&vUA*hD}9H@P=|rDoSG!(liG~e)zhmRXI!J-VaBL6ma4;d+u0Ai zui_ryNm^H;&W1;po$pH#pTcJ?B6~b0kyxgsiJE3tORodp%)%>)^G<^9`{~Y=8qgIK zhK5LT1X#r8{BVWMVK!7~Uk&IL9r<~k?T(EECYOC&@(Ex!^pMRmV!azpP8E|1neMg)(eZGXBT z8d>nT*#CN3Jzz_GztP>SxSQuVNTNKEIb8 zdwO#C0)cA2=Ps}2{HA`x$M$8_`9^aE(;nBK1h|*`p{Wfyzt%cKpqe+gosa)tCYYo!nK*@pS@dTut~Z1 zcfASrxndI|GkxX6YW}*0qX?Z%eFHZZM zqxof&RvV=JEx`>(jyYCUodr@E)Hd63d~Z583USnbBNFdT`6M)wj7-%+=@A?lTfSLJ zw(z6k`k2L}_+_{W0%~?z3-89bJ_>9h-^;;a{D)d6i(`!taf9aytaIZ`O7Oit3w-F? zvBwj*@<#T;l8BO>eC|v=7(@v2IU40Pda7K*8lbT1X%k7F*%?iLIP70{dOTVw+2?BW z#|a5FLkS+wdHv7~|Kk!9ZjFp}&-UiqSLy-x{Od@1giP0d-q7-fj?{0T-21OcPR-#? z(y6JCsf-Pt1Uw2(J2@do#lbivT-Mg~wAdSqmMm~zQHE=%&sIfzM%d8~=O+$MN53DQ zJg8jev6ylEUVYV00$#a?eNCz2@{KtlG|gbq9q=FQfad89h!P?Lj}f|d zj66e%jc3;B|7g`Yz3y174siXG3s|OAJLFT(J9vcqh0PxwQOe~xY#VmVMl^z40ePHG zu*x>#%7`|Z7=9gR=L>|sCUajzKlBirjaNrRbeXFRkjyt(>;_F=9rgsGQW{-0l@S}j z?OJx7Uqqz8s7l4+1-GcY$~|nMWRF3BwJ-(bur?n0&oX2QI@?bosN|e^;)G8p@iJCbiwFnt!xQNPhyCMY6kAW&;YyHWR_^3(tzuF zXaHquwD%U9J-E3;3z!Nahci)YmeZ>ic7NyScg5bLaTG9>?U?rpYU9+4zqi8#g!a2XEV#`EU4|GzQU>WjUq=Q)P;t4^diL z+fbtJP}Xl|4!AF9+y))2U2Pv^sGQX~7j z8FnouZ>&GZx|EQs2*_|XtpYpZ016okhHavfhHFUW1X4k@pIt~f+_Lwk{iDkec@Hr^ zR3?Xq`BxtVTJeXiZOSLP55?2hq)jeJQh(WkuNTjezJ?OVHAbgNsbdW6Hr+p6+ z?tB|$jIU#;N0~0WYhtp!$#EE_o&F)<2e~7+g3BFGv^V<7x4FC58+?!Sp)vXl9!cc{ zo2&giWQtlzQ>R6huKOv5EPOHWYrCN=HXBY|dymBljC6o#Zi8V_Dai`AV=`_~Z^MrOL=VL{bsrICo zKk}FKEtDyUIWk)*{PIZnJ2_FZp8G>J>%3B`tMukbsSbFCy%Kvu0?l}px{M)zx|iQ; zVT^{5evGvvstl(VTdkJy!pMBL_JReIPqT%K;7V^H#0mK`wZ44}UYR%0B7?O%hWx{M z_)tymeeCP_Y9O$U7sEYsro+$r@g(}1mxY={AJ=L%HADV{71jPwx8JwIs2q>d&Fj_? z?Q9>k=Y~EsG1|yOwf&8vOO*}A|vk#13EI6}?b)e`zhmJ^(R66L=M4eS65HoaDszsxV5k$~^Et%AfkY9VPf^U#B5C9f zlW!%rOa!+EH#(KpV(_D}G=ih8@uK^a!hy;AUKi4o6v{hW44vraWa@yqAQ|B^@EZd(cTW9Ki1pur|1V!3(9NCmR;Y3bOd0kg{B-R)1)KH zXD8TOTh>rtQ2bycv`D0#vR?Rs{xwTd+Z=nBk#zuF=hMx;o;O!pw3Z<)!FEts6L=-| zL~8^jWDkLR7V4;l%C(+{e7a7OwYQ#sUajqI7>iGc{SwOk_dal>Fa7IS*LH!zWC@!9 zOFt3rGJKahwwK?A?R(_6rg%=!HMGd~{06fq%xQV0P7AMsn|3zJMKbPKA^vQR@#KBr zng}t&67$_{@9_Bj=Hey2fqcE*Jmt_Ckfv!XGusn?>V+h#v3rTLdp`W=Tun|@iS#PLfoU&D|3GWSbNNQ2F*M&!N=x}ihP9;GsIS_Cet|)7oiVOljO@abO0WS(8 z2gmnNLxpLOF&Q}~p39d;^KaN?gW{A(qa?;zGPfb?R$jr&O`66a^m0A2@|^45UM8)h zcKTvPz0f9UV=CDWxtm9jGA+!KSW?AfE3Uqle&A-yvybafh($j~TQg0by%wh~S=a>9 zCi`HsS(I&0+#rs;uJ$hkNlqk;yR*}n4jesvemU-*1(cPGJxsCd{?oKId_bPt*2E@9 z=JFP?kb3^=_se^~5Cqcsa7gqYKPbgpVf4SXxh**d3 zyOnAMEItlLBYkh$ybZ~pjiS<-m!dDGSOtT0-%zdB81-y{fb9@K!fLctf}J5f?E(I8 zEUk$?_OGZ3iRg|v8B}BNs_QOn!0!R zUo!VX5VKrmuR31_Gkb-{%$}xthxBuQRMj#j5Iz5Mfr+5ozhGx$=+K)6`;ffoNo~+w zFMTh+`V%+w#Ai(%G1Z)Ou1)yjZTo(|Q_rP@*yGXR_DR!bh}~Q#v0!mcGu`9qrHSMM z?Yyg@ZAZC4^O+Tss$$8Dvem{pd&FFtOhdNu@1A=-An$}J-=Dt39r`f8l>jv zYcRoMv^fswTzacn#0(iQWEgCwmx;#4?zE-|t@X5YV<+dS_e_6=Uz>v0;#G*4glEji zBSOt)JY6H%V%cMDJMJ&9?y-tU6fkWo@nkB{F!J5Ddl#*(7%y*7w7Kq`a-WMuyXY~9;q7tu@wTwO|x!{{co?zlm>4!E3`w^Z?g1hUquky0L4ONMiH%%gL zqb*?*9(Op0#aw`tdjEG2@hP=@Y3LDG4-Aacgo=Q>mOSS(Vq6OC)u%sK`;~V(&Vhom!>RilSoa zWl&&Rt-KEiJ-5a~0}RdbO%R(qUL4P_kOcZhtWN|P6a%|M<4d@^3+>fVj<$Lu%(*s9 z4b}l)6@)8}Ce|?gj4VGITZshT^L-buJkt!>*{xi?u45n~@!GGzmPFs~4=WAfMPP~^ zg!5Tzwf^d|;f+S!O;W)x%Z-3q3$%>sW5ZLEeT&!J`6Yy8-xTjb++i0Xrz4*xU#a#= ztv0*DL;S>5LHBzik-kS0^9t1|X3!egbTqK%Q6l~k!SMZn2zuTy9tBU36M}(!YZBT5 zxc6D(T|{GKrpZC74FJk#?g0FAj?zUae$PF46vWQc7zJ{YUBf`zEequye4$62Og3(89z5aoC*yZxBGJ{prxn8)H}l)K3ODPx1r zEelUPyj-;CZbb*MR^lCPDT=R$;K%}y+65}=?qwYA?gpvU4!#~dFntvghgig8_|9<` z(cRC{&*nIbXsGUg5qVk;t%!dB{}kUp4%B`|#L^N$>d}u>EXrR#p91H?Fa#OgS|yo+ ztxZHekT$s$%u+WIDN~A>2}Q^r>&*4WN8aAzpWicR3&qZ;ozN^e%zolB#3aSEoiEYP zQiT6D_#$TPbLROpS(haPUR{YyG(D1WQiVBKtbHPS(;2|cu#m`Tjxmrw0Cw`A_JK%Z zFS@Kh=IO-5IN>z$dQ%jwk0F%1lNUnAI>t)AobZ%+6D&fPC0bfy#I0j339|js{Pe-v zFMr85z9`EDNcGe(>Ns7RyFltRqwYNB;%z7W-ke8 z&m48SrSFMI5H({vbThlT>6M#XXRL-QQ-~0Co-#{c`rr}M`*+MoiBjLMwgk-JNh|g zUZ<{DJB0$q4da(I)5r0g+JyVaoUjCAlcbK$NFv926PsJKdfBNWfAl@=OWf6FA0HuK zi56CQBnf&2UqmJ*mla?)Vh?PLD-2=8wS_t$oiehaEJ{pUnF-wWej7ls5enEZ)6|GJB1iWi@BwYyZr!&Q5m^h1;M?dodzSuGs0+#= zPb|$m3EWZFV2f(6?nssLDncdxSjIXhqWkSW!{=#olVAR`bk_zu-0?!ijjUNdLK2Ip z0Z>#SI21_YAm`8Tw9Z)?_GKL!#xFEGPy>m)MiImniAX7|;ApFvDUu z|FoK?KUY9;&R{ndsbgoZ4-ov+@~~HL#NT+LC*Rs$o=be3y|mfJV*nQu;422wX2qRz z7bT$;M!|)4zf9t_y9CI7eX}V0a*)E6C(IOh&>@D+*I>I?Z;MuqK|aunIb>N`z!s61^G8VJ%Qm zsluRj%(oy{7VA71%%!q2lMhZe@=W6Th1J|d3`T6N>U;FAho63HM9`(SSOXSCRW4Hh{wt~B z_Z5TI#QO^ti*%{`xIxQpsS!PnJ$}yN+MDYiHqFB#^%ksVa|M?>jAD_yme}0St+$>>u zRQ!LX1PkB*#;^qLd(D4k8htE)tKnVm(h{?06eS}y-P@ji>lKCjM^6*81wD~<=$ShwnOmzuOr zm+6-4E(SVjJS_*GLh;NGFDGbUf6aXbD})kw5g{5Npnf$XoGe=xi{o1^s){7hndExB zI#=P^>7#scYPD1lQ4eGtrM~njfq5z3MKh=spXuHv)`%{|k3uV?5 zjMS)FB<+Z*E_&Xr_bv`LC!ledgIoRlSNEudW)C+>ChHrrMlb%>j0nM3u72j}91D2% zLJgIY8#0|X-Yx7t{*LSBl1icKEmF9V%4`V#E7+HhS_YZfkLfqv@hu2D{$ARM#%gp`R=)yk8AdF(med0^4|8kD(Oa%!cCy=`lRxCHN+UB#yL(Ox zVI3FcMkSEVSHrnh5>I&)z*eD2lQTU(&pE8AYu^mtJfDAANp)#s{ouN9u#rh&CTV1b z*37=KX-oQL(M&JUX^%V4w9x~RqF94(U6;%#DDYsiul?i!io@WtMASHTK*p1;xRZGB z&-(v}1}D+vmtJl4p?iT@a>aJ|fa?7p!CHs7!JYP-$`aE+>sE5KL~GHDV-+UI6gN>w zT}oMgX^obrsB6t^Wyu7$jmo@E#{NHFRUD%nF_urL3qF^&Eku&XbJe;i&v9VV{GxJ` z*Ls%K=7+YrZQ1Y|)hx-jvg?J%Lq;n{hcmNw4Nc4rW3E-pImn`nHqQqtdbm(O?!nGV z5o-_I%WyC?AFr{_!nw*1c2xi8wa9csM}(1V_VG7ILMJJSr+)66bj3|fmbWw=9b=wO zR@Z#4aasAuw{Aj)M!eA8esbKQo`=0(KN4>(^u(^?GykE7h$WZzk^TjkKRsp>&Uo@N zvAReBJG*wlxkeMI)BMmIUE;(0L#rZ^RBrQT*WIG^&)HTLIU2P4eC725Hm$Wkp(z=m z?&Dp9^MezvM1S?%17u{?_E_GC@Y#+Y*O#GJL}U&t75anxpo**o3Mt*Bjd05pS9!j9 zPH4S-i(x{*g+WVdWtZ^D%^kMXiouPejfIj36gd9(xCC*>A>N9UeX~N&ndI?)!;HvF|SY^YvV%T<6_-%HTq7CqNSisXw25ls<3#8)u9Z$3E188dteqrr@;y*gz`V zHEzi`3;G$BE_0r1oA5T?c`UzGSY+I3rEuImIU?HbFt`pck?O+6_zWm(R^!n$)G%i~P7uvqX#m?eHH9+G5x`s)G)aIdhDd?$M z%f%>-Y<4SI1e@Bc#zVvc8IhDLTiMpXw)ApYDxm9OuJFqu_9ti_>f!hR*p?QQAF5%+ zp}8P53Z{FmtS31lLY2f%zLiXp4Qjfb)#gr<{O6m zr|e-O1r$&);Ydlv{pHknrJO!jv2v7;YeSeDF(9q*34i?;Lv!`qFF9p*=)e+L_?VJ? z$n%Dek55E!(u4C$O`T3-Mw#ax%Pf3WJ>{ocq%0y7#>>2%zjtGmb42Omi2P;USoQp; z&S`)Z^00dRxTE@*)2Tk8pLh>CbK&cbj6IDO!&&pt(xA}TCw1&4cRZ>zZyFN>l zLGHbt-Ec$qQ3#U%hCdXPof72%_U@3$6YOBAG(ac@RMoS%vXEq}pSrh_ry|M&g{|ph zV`&?<##!g%{Q&Q47^*w^LKA0Jtxl9XFn7ga^R;0nCmz`s)_@m%unD2MN+vw(<~G&R_VJ@`;5Axtsd(sXhCK?>~mHys2~ zYxjQJ%-&6IZs7S=S*^~=XRXx2$l%WOr+}NOE>mjzt*zJB0H4i=Po_jfU|)Nj1a0OD zgFfJc6&QCx`}$Q`!maxCHkLcMb2SA;a9@#MF9Ny%9}yCtFqQ)7QSp0Zo=#&>Y7M`8 z+l0?!Frt5{lJh|0B1PecXq2no8s%&sH$Y939j9x2p09-uE0a}>4y#GAj)NkZu-x6v zPi}m!fM8OdT%~j^faS1Ker1+g@QuO?<$>p8xQca|ODhJ&imgipit0p^>q~k~F-ps> z=eh%Wi@TYo;yK<;n|E_h`tG2Qmi+Zh)I-H$;iw4ugoeASg!| zK&NykgMlXaIgjD(mIgk#C?5g%>tP_8UK|qNSNdymD4S$d>_5A6WqGE?D+?mN1RZ$` zvk8)qn>*(;3@fpx@|w04a66FMT{TqGq>e^@V}Y~?mVUkLciUvr%=uN{9A_OR%vMQ@ z21Jc191qlYU%)?}r(pIyGaUK})t|QiCvkHPdDb7IP7|)oS|pirq*Gve8xd>Q=X|wK zK0BpU3Y*I^YyZ9ZPL8yrH6uAx$V%pe*~O^;YoMduq}g_$;x%aI<;HYaoRtO8PZ;uE{YF zdMMq9ds`NIySPkNdP){LFI)QCF+@uzfARd{H%XCU*)Mmk%fEJh6?i8H_=Ee&Pe{sg zh~5%)zD0XdWkV=}ai<(EuIO!%O%OgzY2zF-OQ)Kx=&ka~g6!L^rb5EtIPM@q9NJZ< zG;*SkZ=Wc5K+Tg+fi7&^CEcdIVvLK%^45cn@MARQ%TcEl{09i4#5!Z4n1mynazlG` z`F4P8-m3;;1pQ61cs5V!hd^r0jz2fS>%N(a#w`u?moL!EO?*(;jxU zLSFS#4ctD7f`l@Xk}5w{o}re?K@p`os=><9BITvC^3g}n`|3nlF*KRrKUT8c1c!yF zh!&IW9;Q685I|0%ESZ_@+I{|*8qMQ@vzSmGF%Z9iYq~(kd!EL@G;ju(Qm+M^qYS8M zQ(C7o@77Adi*YU+xY;|ewDJt>M-v&Rxo88~iT(_lL*VD|KJy*xq>r!s{2aXA{C?`y ze0tt=6o9Kv!#hkG3HS@K(_4(Zc+@7m1<|SRcAFK;s8y1weCeyfyO+3@3r?r;?8yj~ z69IYd^vt&7Pt2h)4c{i@sz=>7bx^lSG^>#yX zu_Jzqk>+n+ZRx-iqI*XW&1fA5kEkR*SSLE92kkxEp6FqvgCY2}NO9qPHx|jP%-&lnBATg76rB=&eV=U@!wUp4h&!CPT7>Uqb@=3=8Xs@I&-od+>x}2KimGNXKz1I>3{h)Az zpT?OYvt~ZV*4jWfAjuL~!!K=$lVJX+L@SIaUl{rs#F6~dnscbr)7cZWyciFIo=CDs zkn}>6RM`eOo_bG86GCXKTZ?-G?2vz7+eyyCEhu5_+(?UDPjw?j*OD3Q3sjFp%*pe@ zNR0^s%isRZpJgI`B!e?*`q6SEcvZMw(z_;k7JaB_s(->?+K{xQu4Sl)`ZFQ0!Lu%x zrS7n{)bnz2(l7KdF8e5rp3n2aksBJWE6AUws*@j6Px$J?TCrw6&W$4tCc;D#k$zp zyKMhm_8elub?(L|uGYxEjdVY#94|U)`H$8P>Fm_|)n?tj-tBcUO_75C912&A$9Ri2P9=kB*$OGdiEvHfV1((g; zSXWkdIT?#^Vtu=qVTyS=_Ih3PU)ztnAe_*9`GLZhFomQfE)?mY%Rgddh_Cln)z_=; zDIhj%ENuL@{wg;^H4@$9os*Rx`;pbaqt<~ha5Y=)cIJ8$L@PfNBhkMhnCm0{IKfGi zS&7!nXOoMZkNX$23qbyE7>KL3laoKnTdrI!&>wS$i|@DTh~xjrNl0J2%7uOzxh7wC zX*oYGDNC3?`jKZmOFC8+?5Iq-RF{OKR6*EEwcE`2k1jnHSm&X(v+E6YLR3d#9oOPO zHvp$at5Ld$5O+1J8ern!;@rZs|95r^1cvW;hy#W#vmTRtO9b-f}x&xgg|F*NCQ+&ElSLQ4r3+JDz<02W6F ze?TfS-t-_tRV5Bl-;zNpy8gPO!)3M<;IWf;A@f&B`>(=$avBL!iGPlw^sQ3=Zp7ac z|2O<&FXe42m(h#-BEn8rX|rLk@1M6q1mn5m@gi16!&24XNz?xVgIt3ExX1NMrCJ^s z-+#a&fBm)>3_vFhbfS*U=D>pgzagI!bO2>KJ<763!1nQw&XiBNiC(Xln9{c#g+^?T z|I8Z1PjjSk%lh}Ow@{;oEoD_2Jt@ z&l|e2TunLUqWUP2fJKQQq$VrQ>e;c0Ishxf@`c78W1vQvHn z6WIYB^78F^v7wkFURh-U~+5mOzopDbN;jNl!_tZDrH@VnR1N{va$i5dBBwM zX&pkS{ycgHCM;A9(Tgorr8~uk&4?<=;u(}|cX|J|6m_D(JuwBym+LGGPC4AF*?UeKbz#n>RwU`V1 zpU{Uv2ePe(%Rf1>hKQzS5`$&BGa~K|W0!L&=C*`iY=p~}4_sf&?S`Qj0GX7Osc8O@ zH-@0wzb1e-5(iof)+`uSXp2R0EDr!su+DnB&7nU)B1UHMD&%{JU4!D(n7*N&asM|P zH48B4gY&q4HFV@JC&0=j8^8&$L}QlpFDWp9i4yn}5@r-#^VVEb!>OM))y3_Vv$(M& z*4KI#(7RaTOjP%6l$nv}%cgZYAcbx*AUKw%E6~SI+cH+{&i_t9BLpbuwJp|CsH^SX z`$n&cAl{!Q4L=aqu;8?{RZh2GIZd}rbdY^0$SXv2nwCveZ7n)+&3N3jEiQ+ePR4&E ziz!QKvL*e=++~Ulg0Yy(}aBUWZTVB zJ|PGlOapS^^68S6f#m>z2=)XU5kfEU@~!}jzipzas=wADfE23>h-$X&TVP=zew;k* za^EWYVmbJ=LQAl(X8#33UAelaFer{ehAJ(mXlTo9uCCbj`fTeGFUfpK*L2Ey&So=_ z>XwI(j&x+GSWj~b&E;A^)`?et9#^OHoclQ9!w2{iB%f6H307l@`oC@dEb&57+Hz)h z2Y;6haS`z*)MKpbxLjbQ4P=srzb-{8nodXrbm-ha#I#K5Dsf(Rd6$L{_;)+;m~@8Q zrCEdn@q4T~UhbqRH)W9Pkd1p;!O+|U#wu2lU)?yq@Q=@+^#lC%3MpmG#?Mo}Lc6&rMZG#LpNo?C-P4|60AqyJhpEEk@!B>Qt||9SKDGuf34wWz{Z51ke0K`s~y1cj^Gt2od`nS zDeiVOqM^|<%Y2aY0VDXt{$+T7W zJWdj1hwQ#|`QTjpg>g!HX}Q+mXouTC@lZ3>2bknp?o7P5kOYH``KnaG8ETu>+@5RNncALf=dPdI zi_d=&ZvBpYb7+d&E7aLoqgj@Bu@HMrG-$8f*eYBfDAHoTsBs$F*p=XLuTOh;CuWe8 zP&mGSG!JkkJF^`mV^2k-G`Y9?!%u`7KH0s+xdkrfoTIi6C*MLvEjJms)VpmFU$uH!>jJ=b}@ql4y){=$Q#ZfG^__uMHr}~ zaF{nC!>KXMfybZBkl~J>BR$pbS5wnQS=B20{e zRj+UbN_7>-Zl&pcBl*S^ZD;c57u>Q1%F_~=jnH1pEhVcEF6FRxhJ)rnk;A?}-JXrq z8@U6+Du41UZ}I4yBG08)X1aWj0C`1ks&A&>^N5I}+Jm7}xCxA?!#=9J=CU{F3@uG>4$ z*kDWzy~8>S|KxgERn?~m(SOO3Ji+mzfv5KI0AF{GDVl7#(~H1-%7(c@0`_`mlCxE? zCb0f)DLBsrFg<(%M3C;JG0K0?@1$1*EiuUD+&C$r;!Xs9T2g)$nVqgUnCnGi`l&3m^S^yq88qzPVVdr0W7u=`GjVX`^Ig)VMXT#| z|5&D^tzho(CGrx-AN{aO)^(@(by5y@?Qn#X>L=)BsN_Mjw7@SRNEeR3x2CuV_|fTe zW>a8pFTU!W-$n>$8wT!&Bi{!a5X%GfqvlX+S{3y%>qL|m0T{2No&HIY?goEVa=&+F z^HcJO2Qr$>lkcu~J{I}qXK0KN9_hSHmv@Klj90{=q1-@nhHRs~Y`BYd*RbxQW~24A zad24Ra^KapVdMUoQ|VD8Q-9p{oKwV6;4W^7e*GhL4^|nsM5p3R{EHe7o+9&Ms z5=o2|BiBZ?-qNo{`WVJ9OlRd$iVP;s_gZj%dOe|c{6oV2uHu4VAzkcxXFbl56ZW|P z7q*HWGs?`aBs$;;{rhUNm>D3m$zZ(BwQQ8?cZ<4%ng<3jL{fLl|4gW-flN<$iM?O* zC`X_aWk+YvcW4sMhNf%47h(f}`J>xSq1OGrDVEsn%Qa~>|AuY4u zqla~R+lilw_jg}0u_+qx%p=%R;^X9nM}xcQ6D|fnGCiM>Y}6HceuzL7cU+76uNT0) z77G=Y=K`0Fr*VLQMzK!*JK`NvP;k>`eTN8#Rw@&L62Y46`v6n;j}SyY7j~#)OdEXv zVGtN8E8eBG_jD?Se;-UgcL7~*5^tgzzH}U&%Ak!`<{TrTA1-*<(w@rRW=VMC3}5<} zT3e019?)v@ia)|7QFCmaD3NC#h@o=~^mJGx)$b-HO<}q~wO+@U$U%$Vrd?jpS&=Co zzU=;1lP`C3s;TNWfoaxPpN&KtL1klje;&9Jd*Dj;{o8aF>#EFwmr=xIb}71MYHm^8 z+$D@g?pw7q_qdiM8@eS^YsDMN0>DYu0y~O&+Y-d#_NTDJ^iZRXq${9uW6Klskmf+l zbVH`cH1s77U?@wugq?+d(tyx5L0r#|Z|C?P%Q<3M^c)J+DlUom3p6ZzAypS54i9(d z^an>s+WuEI&`bn+I4W1t5DzCD_bn8K7Q~%%A`5nN93C#}<{6|J6Nr(O2j;PDQNcFL z%NrJ% zU%#?2?Fe?-GzmEfJcbQ-8e<1;<`88t#gKW?-mxk@-tm!knZJX;a=+d29|)6^wC>O6 zX5lE+^$YK|&vyrF4r`ox_NssXf-N^8%s~WBJy;qCBJ4)@_8eF@_DJi;_%SRZT9Hf= z>6eJ$&KPgok5&vczRe4vcPEnkOZ$Bs-KmV}DiNu48iF!$OhvjfnS~fVa^jxX;{YW1 zH3Uj;j3gCASwsiN$XYt`5d$t=`KzOyql*mf;QTtmM!s@KJ~*yhcFBvhi#X1|+NIyg z;k-cZ^Fw9FnrEv|(OzaS4U%y?c&2G_HpzWXl-(<3JF z4{R>f64&pUhB3U1a|ZNOFyX^o+v^t+84}1ob4FV?-k)}GR7O9pwqw*SLf7@&5%Ziw zG7&LkeZ1fQ5=)jS^Tzgfue*5tAtq}+J=I8G283t*U}Rk+{Yyvwm0&stgeToO*uB%N zQ+2{slXbB+@GMu({+@k7H$T*p?c2WJGGTQ+=yu1`F#Bo$FU{z1UOL<2E*i1xW^M8; zp0Cj@-ejJXd}Uo6X>iw%bLIg$#On<%?_$N2m~OUJ_-5BC>+`|{EG0~+m7Z5iZ}pyf zHp+qGEB!$yIu^RDwKQn8M_U3E*YbgO2)xSoxWDlFg572niq99Dr~yWc*`SWNEgL-q zl1P;RZxX}93{$UG;``~{(?OgQT`5)R4Haz5?XK#|4wcM{x~WqUk}h9DkQtgSlc6&F~N5M zN3h;?=E_T9=Qzg~wRm~g@46$AxPI*-zVCLx&uH)3>|5)mCx@M}VTr;rLsq*%o6one ztPze|d=k-6qtHvIO-rAV4a6=G(^;j@$Zsfs?Ckg7F1j~t?(Y;ZgHucQ`S0*;1%v7n zosk!7wI%J&yBb!*dxXd2f%MXHQ{W&`4PS^VNT^g-hMWu?aZ`!IZPPJWru5fKER>KG zz#b*xWzft=wm1kgTLsABXU&C6|K|qfup%s_`YDdt202Z+8`sONq>Z+sz`R1*8Db_i z;*bV&J_|AixldP@SgdK=Jsi^TybJUom@hg$Ix)IR#-jy@G2l>bv|SQ3AFey3!ARDT zD%jpP@JrE&A0o#lv$Di-&1yI3RZ4Ww%w4lwtZL2FgcjW)2;~;JNillG!XLk%6pDF} zi+H&`l+P$%1zVCYc2}~q@2oiM+-JfpuE%W@da&gy9Mgwg1od&71ZzDl#Kh4 zsPo+E8pYI+{$to{sLCop$R_ZbxI#~RB5|Y7ttmF)Dy+(jD!un7(3RrclvJlVkK&URO{66&NSHd;ddti3&qCL zV!5laUg|TKYyaSau>RXwKW*t~aD z*pXcq!K*cFml16*T+7v)GC5nW!dk$Grs)>em4I3ti<&lP%uC+sIpPOhU|7-|SE08Z zC!|r9_F@_aq3~#M$Dduf=};0#EI7g9tHN(YM`n)MmKvhgx=EPQs4(ANe>t5ZS*`qF zj74jWYq=Z=na!GmCfcNtO8~$}8RvgcP-ef%Xj1nPOp5ZmeB;CQ1{j zgU8mRW&ydHwHH5Z1gYSFu+I8pVo*0|V2Wh=DWmU@$|M&Umo^HDWBRIqQh2^(_&T0F z6oSLNCA{<&O>Rs|3};HQBdTnE;$zn^r>2*ez~d!}5K2xNkH1&!W^ZSUU~NKtIM;(< zc8mKps1(L;txhX8RO6L&L?9*>A%qsRTK6Qu*P}L~qxO_mp;;k&@7NTz8tv}-X3_-#BDgm-B}>P`GI-vX#bKXAL$W&MINyLY%kp00=WRPyz2n* z>pJ1zOHrxa@=#An=#fOnXcRm>30C#X7_tt+Kk-sw`sf(D-miEy8}263*9*yu=t0b? z?NoR&i(I}8r9ZXIFc^RIXy@w*6!%l-ljB}?M?(rL6gGl+T+UB(d6ic2>kqqk`U_0PU%Ix=w`6TvQ8e;uiyxLX)bbEt^n(xVsFPk)GB!NMA)JaJuMb7~;@ zX&x$3qK{3QbjDN2^62__dXeG09QZC?ceT!lds7b)fPiTry2P2ZX9(UQSq~x8J1aecVL`|ev<>@ljy&YLBddU(R+x#|39Q+TE_N8? zlD!$vWXc!wl*P)UQXygKa;G!3f58i;u(AFf%_--N$D=&3A<*@*_p#}BX%Q-zc)|%= z%t<%~qM{|D`awK~2J(vLc{~XFH!~Mj;1SrDH@Gequr;woj7_Sk+#;Cc+{AnrmW#!# zUHT$9VPMHEhni}K;!97K0SQrJ>x2DwMIBQ#7twb~UZW(iffY?p%&SK~OHp~I8J9xj zMG=K)%GdI=KG(3;eC@V~_EyYYYB)}5&g6MEf=|<+YCw#yda3YPCQL_iQ$AiZkr2daT15;#${w0opuR_u^?+Vt7Lm^xzH&2G9*(*iQq_D@ zA&=F$Z7I#Qu9WY-w2n;M`ezBnXT16)5~|X7N0~*TC}v!D*c=@Qt_!c5xPqKYPm(nY zt|H%n?~bMj%SKY^xr}zx8`5})33^PFswVF2+WS9P z<-I(^f_GMh%yad!0*0s z^rP#`T{&=}-FQ>e5AS9VcDJSgj?hOD(F)2F?bb?s)H+`YDvtV*z8968>m$U zJIB#GXHgr&;PsntV~kZ9(D#j6MtHE$&jLz@K(Dj$!iH6S`6h`*?|?s&48mq`CyfSm zg7mA73GRar^PhuhUEwO4ZGMs$Pg1{ekmi+O8%+Rw2y@f@g=8gBr?_YiQd{n~v!C;t z@EtZzGs4n}zuzIR-lx^@uP|=yj6a2V2o?KBz3mJf1D{IFb;31f8sPnKDM`xEuC9y; z%RuP4x~>SzD4Tzb-mj58PVg8d$>jTUT=^kTYm@e$Z9 z?}m^We&-SUOdPT*U_*=s6b>n5-TQ{b!dwhk=A_=;9|XaT&dZ;94`Mk_<8g?h(Ig?+ z=%QE&*&D&jJV3Re|Dx*K-3);jefI*Q6*=}*F(IhgMIx(1R8?c-kC&y~PxZHr!srGD z1C9d@6bZ23;88Rga%J?MKHnYAMo(@DW|rPHk%eJ}iqr=k2rB*yZh6f4Cs(5j*)`1U zPXxaugNQ6;qIIiAA76BMzFOt42!m2bkH*R*Q51heEc%|3Bh(+^puVBE;Qsa(phd?k z|5U=nq9I3r9wcRAMo-5UJTNL}=OY@(v< zUrF7{R&6QCrAUpUsO9Bi-0A8;M0j*=K)RcGR`{*-t?VDsb_Tz_z!O$ggo;REc{`J5 z8{mwX926+OY?||a$87ZE@rw%VhRau-FYaCT2Cpwp3R)U?7d#uSXO7D^_vbDRl|LJC zqW-X!Y4dvq6^|uht!ZK+`{uc$ce0qTJh8rHzTk@08X|2AvUV<&YPzKsIvs;|wWToD zDqrJYt-F&29X4E7{G94T;aYP(fT+>30N#t!E-@A51DM=WkfqEkL(RG;{k!;;(h1*y z&Y02{z(CPRt1nKI_Yab(@%qN{T?N%BScAUb-mTqW23oQLuLY1CflEJuMGeD{$&v zvtX*<4vOXcN?ZgPEMD2=^_t7i#t4YflUX8;ohvJP{rg0BP6KvBueF+vy8wrMGo_VS zWGuK7UzN9V7zIl3(Uhv%cp*yln@R0?`$|k!W2JkUwrHAKiPXoD@R3-=!)v08nCuEx zAO|z+8vcr}Ewm<{X5P}7AUICEUfc1B1f*-+B;g;fN}8{Xye%=9F_4=sV+KX4cuA66 zMr}aLLp8tMxwITHlI742Iy=u(ZjT#ur!W^VEHElXP6F%R2h)YwB%qAN{UCwxwxau> z(8Vj@-M$yuW_N_D4as+idUGUelw~Rs+{iSU{IsiIH#R$8@=85ja>~ANE#0U7%^v@4 zQxw)pt8d8F7VPGi?u-sDMf)>o&)m7%)oqKqhw-d=xYNs1D+3yTiD4FcY0!4d5Ny5f zdMC?C`zTM_VzA2h@Z=hJYnMUq^ez=cx~Xa+GXI%x^`&Q?uvTQen?>MxPzJWyll2%H zB6gXsamX|)fyhC9zfxJ>1FFDvX&Q?lPEzvA^-}?t+h2!7>u7fy!jyyM2l6sT3T!1?no4u$OXMeRQhE1cdQPvz6g%3PS$QR=5vZx>q){ z_tW30-=FVV;exfhNz3TLsW!yU`0(MwsYS>@Y3F5R;wEzh`*!;5KqQ20{4jtsuS|~i z1eR>YFeJ=Dq69X0C$><9k{k*za+oQ&a1TInnVZ-ehAQ*bKvm(zC_wf^8q|Eu0KpQn zV9&CiL9cD%T?Ny_!MSZot8(Y#CJlWH#*G5!H*C&H=JoDVC6p?YhLh-<+glVZ=aKxVP!D);#!B$Ob?eoG@epxY8hc3FdQV# zkCk1o?hwC~*aTU&S@Vau)E( z(hF@DYnnAakw=V#8mS(gkfPO{t^4X8JwJqrjbqaYj&XQ_>B_z&As|#9e?L>*N!z!1 z6Y%!~G)OYPZf_&7m~hP%=4;~iB4T5wqK>QPS@Eds-29a^z0EB8?!e`Hv3Q>ty2rRW zb+HbzJ^r4Xd=h#Kj|~@mF%%-p`Yu&L(cQjs#p%{^Cq`c~*>kp_<+tZ43N3>~2@E7n z>&>@X%I0hgDPjQNnfJT*Ru{An5fK`C8i$~szUm$SmVo~x-URQy^s&j5JdHx209tQK zk1D1#eGs2DQig2(9mkKfJumB;ll%8mFd+b09lee&E8?4fhtzw4}1+82*3S`|hu%wys?Ziim)sfC_?2 z2LYuQX%>(Yiu4ZBBoKOtbO99+>C&r|0HH}q=%6CK2?;fXND~rjXrXfhc+Pv?Grm9I zj(b0T+Jn6^_ndRh{j9m3HP_nbWpm}e!o8}wxc(|jYw$ik8_*zG%!h&%(yCrpFGq%2 zWtLhROREW+Y~@;(jep3#+rdNr<8&}Z9&Y7v)#Au!(pF%PBwMUmN^xGA78#7{Xu}1S z7pUs)m4u@+^HA??+^Z^U#K%84_(O{4!vx|-w&vVjCfGEqb`F#V_g@xN#oIm>_+Y=H zr!|zsy+&&gQl9_#uLFhJoXFB5SE!cDtSss`W~DChR&w5oXo^@qsRPJz9oI=TeV?>DXAKyL`xGOsQSohUWV@& z>5duc_?->IlO_8Ayy9!PR&3eh+tjig2GK_0_mg#QK{_Lv_QF> z3wdx#U;^p5mt@Dqi;HE~2F`H=4xJ7%Oi5?2t($S+0ntp;>c9^i zW!Im9e~<+joFy%P9^3Rj_>`qh#3q%ML4+iMKkvFpOH;2#!6lO;zy|A;_*6+OL z#1|{V4uNXOU#?rJD*pW|Gp@z%oWX@Hq*6h*Ntq+E27uPG=5-}+(YpPJ@NTkf2RDUj zWU|5!^4}*u6zimb$NvElB+WHFH(*9M6rGuZhFLxZ>L1uVS~V!e1X%L~fW7vyf+UeJc^EKECrT?NL= z(ra;J2RV+@=-o;PcD%&*R|>roj|)4y(HZi&+xE?S+jjNJIo&qDMi_}M{*B5t#4<_eolbp~`zZGzIn3-D^tMur*h1sSQ@M$>Y= zQ3)Mm>WBPvX9G@=-Tu->gLv9)WBm`i>P(d96R<_tgq8Uax3d zl+B2ZA&lPZebAX%8WSz~VCvQQa>J#R8BdkLT-t@NSYzI)U*k`U^Wm=w-tN{%g(9sg z4znyFE*L3c3Z?_6LNFZmoU?9w3#%eEAMG)4qdsV1s1R#~`N+F5t3NR5B13ljR6q)Q zWV=U~SFiGSH zx6MrO60sLR-2tA#)f+P;J1^rZZL2Le?O2AfV9|SX`a{DFJb59}wJWkJ26|DuRkGZCZe< zZ=njf{G{lpot6(a91hlI?(RYpEwBohu`3odu#0BzuO8`&Iu&qHDsRzHRFy%+MC;u; zi0+M%!BD0|C<2u4zzEfYVCSy)xpN>`6(KLhUF3wvm*>a4K`C#T+Vb4tAqjgMbXcc& z6d6@Rv!gkYzBn092JNWpfS2;~pY^V|Ae^L*&V1*%+z^!VF_%TEKs;Tg~7vAisMQnx;eNK9jQHxr`-$A=@4~wM#iz7 z0!u7cjUQbVy>t0B>4lBF`$ZakWtL5`S8ov5Wyw)J_r|gK@qE`Zx|idg+u@G8MLx1p zj~>i|>2|PJ+2vU2$EN6xV5RtP&SeEI8ng<#t#z^&7MW`i&bFA%ry3zePIMr^ctMtGjaQ7tG{~QiI3VV?|H*p)!8D0w*(*9?SN)bfIE%4DV*y zdAkEA8|c&`uZ9tKlR>MmWeq4n#=?ZY*}8rzV~};sy#%445q^o?Sa!7Fi~PzjMw=dTpl;?n284;fW!Us4euMb z2cB_A$v4l+AzsZSOSkRKt?0pa1c4g77qn3xc~76moKtQ{ z`gC(PfeimeesQd)2%#(7iewvk^C6BsZFZjZw^X;&%2bT^d9{KREvorf&cni-vTLx< z^??*rG??-*@9nJZC8=HTg%qukgWB}eX)8C8#<%a%^IUaIYzAl(!ucBmW@cBZgn=4E-3E0HU8`GlLrE5UGeMS^d-FX!nqn4k`tU8U{MrG- zM3kPC?!SBqPli!!D(n~Aa-74w7!W~h?22+X)9jG7#yUP0H+ZsRcqhx4 z*HWke6o1dKVL)z2&^yaOmlBp{@7`7-BN$y1t24daY?jVZAV-a2zsk2n~IuI#6OvMCLvLv!3;1f4C87MK%^Km?y48JTLo=fYbz=m|j9zkXS}ADGa=lBOPZy4(FUAFpnVAdj)` z)1gmAu`lENms5D|dDqs0=7Z;5Jsgwq6K;h7X0|YudBK|dt;je|<5cVQrgzZ}MI8uN zfXlCp7EaIgnL%2I1EY@C`mH4*a>`FBlwPsN$;5l1Am ztwYWe&DMfY4#i5iE~3lq!4jo+l#|J^IRJ09!crY^CQOq!pJ< zyz=~{{;;^t8s&+X%vmyT@DMZPXBiry&_F#Sp70yWjdH@K@^a)k~HiW*=Wa@~O;~XMUD( zYt%lx#=_f?4SX0lLMA!xQ(qN|k%h4lb1qM$K}#gB?_6t^Ck$NtutEK~XXduHwZFwM zo$ggp%+~OmAN;K|?Xr3uc?8*{X=O|2smvKOHv2+rDJ5Nl`0UQ*8?o+rT~+uxqM$&R z{1M7|s@*;~K#}>0r4Pv_q&%x6eQ!4glPmhIZ_^3~oBTScz$=_J`^lCM`N|-3*4EN2 zJfCA2nQ}RaH&Za0*V@=}^LeJy%>mwanLGV=702{0?7HKECc-(yU|nR21HOj@SK4<+ z7!NoT>#DJz`Ab?#1Pl>m%%N%qE(#Z>Qh6b9R z!so%nG(Lxg?u*Gk^TOAx3mwu3YZceRfO*z+@WGTC&24&BKIyDtCBLzVmI}X44)32e zHYLMvXrf6;)x&upJGl-9Je7Haj(x;Xc2G8yltb;+yg7;_xeVa|Rm9Y2`OfIVTK>#; zW+qW^aw0@!woJsm$*k3hO50gL%YY}L+apFDUd<;w)zAW^K70|C{8HM3?PjRNaNING zP6>=_psNyprociRf*!lfTNtlX9NR~;+gJIR*N9O@Qp*YbMNpskyH?h|k>|M<(JWTb zmH!A|7O&)e3WD)-(o!w>AN=+;k@hB-z9Q;5&P}AARn%;)I2fVy=;|D6o*%1aWgwsb zuv6s#g4Y2;9XBu%t&6nx^s?H+9sONjAj)V~DF)s5Up4G5$=?bJxw(lO>hCTmK&6rc zq8^J4wv-%92B4z3ISp zhL&!L@AoZzS4VH@4sbMDvvAPplx;rrQ$P0EG;07G`m++bNK6p}RX(8!crEwN^~Rmf zAp;ir80|O*v|;R_fxQT77|9Waua-S(^RlYmdQx`qQFMXMm zcNb+tVnYWDKo(O@sOW+YY5|y?Zc2^cF@OMtNWLp|iCKuQbVv7VI>q8&d2_@*)gxLy z%MwK`?}8%v@3{#VYx}&+8fYe7ELcfp>H_}?UY+@;aPv*jnW= zv@&j;6dL{g{6_NiUQ$WX{Ird>RE20gB=naS)e5*qNxV8=Nt)FwJVU4 z(K47sh_oNT9Kvp*DBiVROO9E5N;d{GHnoi%E34Mj^cwNV8M;~I;$3K=AW0wA3l}t9 zNp)QM$XY87a1>sLst38`!Cw1*bNJFgcd)Z_NpJAEk0$fXNU!_hj6}Tf5%80;sp#%z z$M(V~NBeRecI7MrbHDNBfwmr#=Uxa|iOi$s2 zU3Z{Mm{jh8Wc71}qC_ZZHx0{J&n?M3(5U=C!}P8B@KnI8w&(ADY-mT&(Qw*Eenx2% zq!=qjvPx8#WK|boLy9i0+-&g*zK=Kd()l6#00O~ z(~0K-eVHBS>ut0IG5(?@@}@_+!(!xeYPNK31KMhTUwG&=<->NiR*a^`tn<|-e(c_y z;KpRFn|<+ZV+zu^bCfb>klBo8N3pi#*yxY9GHHeUE{&`c1C|=Sq#Hk(4mq%E-aM>x>t{yn-rinws#a;M_!G0;~ofYzcTxD<3wbn3R%UapN#3k>Lpsc zWmE;*4&PnH{90hfJRH_XS!(1}GG`69$p;Hnk6wPK5MUo1Xd7DH5l0;BLqciou-6dv>!eA4KV@m8^8`()bXk-d% zt6qx}sZ4rZTqTCScxkFLw<$D-4w4K>5k;tsIgUcJzu3Kd;P z_j3ATI6aDEDesO{ZTxt$5f9Cuw=ck$&3RJ(*4VjL2&7XNUr+}D=LPbLLMH!STTvF#p%gVk(LD zEaJJ%HufqDkL7w69YUiDQCodU*9@@`}0J|Jf2a)=F*6@0%sqKIJNIw zY%0!@|I7iV9h-l=`SL2$u5%Y{r9qlA*95nRnq9=JgMKPC5k&8$^hMNjFZ}1!u^i4# zXTjg(hLw3F(vVD|PRzA*x39FeYUx%Inc9+=uI0x+k>#5B^$X2xb#H^gQe2X6!AGmI z)F5w$YUm&jOjlz>Q6O=l+)DtERzE6D7`#U$45_5P>lyPM`I=$de{k0DoT#3t_kIhl z*o(2Uj2s=h?<^lpC(CV)NaRyz;BVw|m{7dx%XA;_cup22F4dBIxck-!Qvx5hsu|&! zeSO~<-S8ObONb9un_Ft>)~9oCztvxvgEog(S4tm*r4D@q+hc!7ua3Hld5pr$eLnX4 zS@%qS5NTdo`3A!D6y2;LQHj;F*wn>_lmvXScawxK=8kuE%?iGzvHq3ZO5W08t9GT2 zj!Vtk^?gn5AkPf&!~pyzO2xB0-oGT+sNOLq`MxE57u{%EL%F;ILuKg_F>YN~bQrR& z|3DTk_YYeVO$VV8ucVVq6$$8ih`dR_TBJvpel$B-_f^bW_DC(Q1-;s*=ppTQLxwnx5yrS3JY=-EH0ZkK=HJIXnpf(ysw)-#$T48te% zV&ZBQ&62MS;}%DH>b)v2t=^P%gBw*noY{-BPRDLp1;lu1zFnDTrLR&ce57BWChVZt zWY1QH#b11wrx&H(bD|dTN`SIN-hQnJ1XIXP=uz?DeG6o86DRj@n&98wZ_C&y18ANV z`=S^smqj6>6xh!l;lal^`dNoxYZUR63Uk}-f&l#C%x!3Yu4~L`=Ke~Z%P*}JYC)mz zB9AToQQ|?Ocphgwrv_e& z06sC>ewn7Fml8w0bX`1^e0hAdUJT}5nG!q5vomXf42)}k%G)6-n$$J%Sr1y`h-5gy!jOTf5iBF}q&jkmjL3Yu+_`$9t4b zK_Ht$;9jQ(ki)cn^1ZY`r;K7rzHj%=eCwC+#F{cxWJ)uyXm$+50eP8R1Q2u#8t@>w zSSFO7Y#PgT)z8lvi~!apj@7jr_jqP9fFC>f{C8WzlFGok7}Zxp#n|a$FXzJKYIBpw z>93f$DprTP=Lg7CPyjonmzPD3*gYeAr-2>rK7B^nJydRAHYF)L2fNXi2!Pd`A8loE zul15xwad>TGkrF~S(9NLe0iEYCy*!yrj0bITMc*+u-^|p06w-WW*U{syBvoJMaaNj zZ1mW2j12$O4rzef0!`m#ujU7C)AZ)n>~}bJyt6+z2e0WVv*0T$Np!F+|2fW2BKZ>U zb;`7sloZ$d6}fhwzCs5U8`Ma#5p5ws&CT;~PN07APUQV=jcq4C_pXV(QA1&N!p2V& zJojNweve5_nfPJ~&%+@mWu?b%{FR)3_?_6&94iDQE!_U%-VyO7UOG=o(-ow=idl>p zlWn<^-uxB8x)g2vFxP?3>jiQfcRBfaTqWz>ZicCkMr_jdzj+H&jT5dgRk039(U-jj z72MopV`QE8ez@P+vD^HkzdqI-v{4iNgH!a<-R+Zb4dl>3(7}EcwNhE1Q&(3tb-?&^ z@+Q4jx@%j0_pYT+C+V#1kaU!^evEkjrZ|7YTZD@Q16xG8`Vpr@3MvhqGk)#YB|eSk z7Xs7p;AwAWkbuBQd-udn_Bg-eGL!$FxsmSgvgM6SfmsektGf<->uh%8Y$7&;dJj2s z?ob~~xfnx=5^VtVG97q1_DL<{(+FE{?9i_rpCSGD+aVlrZKw{?ZbbWPjF0J|IW0x1 zcM0G11Kb=0hou_EixnfvNwY;#AHD@-C{yF# zBJOp47Nx6uf&o`Je6)%#0_&!h_0ey~tuLU2=hPNzS?vH?As^E@BHw?IwIz{Cfl91( zgu2bb21h5Mq-gSB06c$}K|!4tUUK6td#ph%TwCEd=^;H~hAkt}!KwC3Bi}78fiIr2 z4{Pk5p$>gX8v{lbN$nmzMoD#`z?Jq(2q=&CP$bc48!b zSsuGR0nQvUXE||YvRw1d_ZI!OpT-amGe|d$7rBCBu!zO?+Nh_smbq5!;;kIY(RnI_ zHL>M+eBxuT35;2+>qik2KX;&4SP-bMs6lGB%4F;(t7PpJghmyIjkr}r(bZ>p=%{6{ z?7f+nhj;cI^f7dg$LjApn9OgpdQM;@23|F5 z4=N$FI)AE?=%K+|PR#e}>G_^;>^@vy>G{S)ql|7IwDrevE~d-H8fxfJ8afmux@xl@rDS^ho(?sg;VfC11RfWH3l22g-5Tt> z6TLF_vW`Q0Yk3$YC<>&|S;F_%y6d>y?=0yFlT5oPyV>&5Tvu+9i68y|!`)x| zCF>?M|0GI~E`(G?KMC=K@q=*AMwj%DM)ddivU1FH-&)FL;Y7X#qzrF~Uk%&;WXUKZ zqhtqvXJqNdHH02@f=PrcM}Z+gi_i#rFCT0-E^yi1&31Uy*>Ki<4v-#~v@UPrk4B4+ z&=$mQU;?tXW8wbq;5DX4ObPCw@t7&G7*jQvb-Q#NKdv!fN9oqaMrYPcuW$FOtCzIX zE_rrH8;!c(n#QB}q|<{0g5us+A1Lkz@|VtHwsVlQw%|e;{c<$v!|glS{O!=4AQyBJ zH)!AM_o?Jml%Rm7QdugX3(@$hvmxM?|JS%g#?6VkId1-TnRw;MI~v#qD$_4M$)YfZ zd%0K50y&!Oo*ZYfa;GnIjTkS)b=}0lP=P$uhPLiId+?B+{P51xB=+Njo-ssgsNR=k)Gz5gbp()WIeW<9^|K~j?3)K+a+72XSE6~ z(%^=8oYe--+HWx=&c11LmmwTSNaP_IY$4JO(aDR4!?8i#tFW z5WPMoRc=xnM}>KlI3Q)@WSt1iPY{!_ORI{rvtBg@nZ%d)yQhR%U{aW;3=@6C$&Y*S zwub4Mq#=1rIgvLU3{S6~2abm*P~oentUo>|dNyK)DE^)f&|hll)z^taK08T^&pjcf zEqD?G)L}=KbPs2loFitGxunscBWmOsO>=@+8TZcI3}0PGmv<_cHARFaMProC+TPQj z%|2mqP^M?j4K-J8$|x@}ov88mWZv&8iKFo zS#J4Dqf=z7$s`KHPgYj>1nvK(N;1qZkiK};l{KyKPbTE3Kg3A7?)mwBV*ZzL-!6ZZ z=Sxs*5BppFlgGC?k#ybO-{X@0OUXZQ5_pG1ZQPHDW;`B(|D$UpmZa<2E~z5%U&ft# zOrn6ync2n8{HO7eCAS-oXer`a*81P-pD-&I-;s>__E6dJ4=3=K*>eBe5&!UD!2gvU z5#U9T8Y#=kw(4H>WE?Cu)?kuV_$%9bN!GP605MV)U4Fl7C<$4(IqNZ6##-Dh^5?y( zern#Srke&ui|09Z{$?DD0}!3K+lwZj-~2QG*|;)nXR$GpdS{j|nP#NSE_kc?#>FSX z{~e(JaMAyFmm1t>z`S63P7;4P@H8XGdhbxxC^0<0I5bk=4+sX5eNm?)Q&=s${A-z? z_xc)djMuh~v9gg0#U;dU!*};0To`l>kmXu>F8qG|n7iG&9P?6Txge=_388Myby!Yc za)?P!=n2iK9{Mh=Dj;ZbT}t8dF{G5KoZ8h+nhh^ghv+}3dHZza1zH5klGt!<32nn@ z=kOpRNmN9qd4(M6Qr&#lgNikF*;Oljx#hlTaw}jc(KJ(ziy+8XzSRd^PE@;u{2|t= z|6?0kuR4^BH>64E5ia5$Y3Fx`;R2xENgsOeAt#{fiN%6NO5D8=O+Z|*{Sjq*;exR) z+L`6lDH>{}C-T~sje?i@8_6H!_@_}vc(Oj5T|hdDFNtN#Q3*8>PPnANcOl=tElQF!32#E8arWjib%E_z$h7vYz(cwv9OuM zN16Ru1Xj{Dyr1v=8+d_ujv0u@b}-$@Vb|4^F;U@ZY>#qz`?|fBjv#MJzJmwXe1EsA z4;mG(L%q$URjM=QfIJTs;hx#%hqbRd^BqO{=>YqMp~ib~X||S}GHQIEMFXT|6&>|$ zweG!?!D+q34)aSCl<1!bE!F1P`&uU;2}?hyhK6BP32j`CD_#brE6X=R>TI7^p7))2Ch5-To zFTKif)liITJrht$kiSDu0k4Yu40T=Szc1Y)%p=WryD1Z-O8Vkq=grrR;lbCD$V_{lb>3HDQ@JU^9%*S3lUIdqAb)!se|}ZC41fV zbv;{-$_K5T1}HEL<4{wXpBb{A-E zG#hM#U1l&T_7p=&c;k{#P-7_W*ZrjjXoZWPB2=Am$SGrB=>vSUy920v)GL(bNNIaMBJ# zDe&UV?M54zaBo73{YCW)5!*h+mG@7Q77;}f7)WpC3sS=t8obmtgrza=i2Q{trh%%q z8?mevZm;C9*Obw`mv#ZsG1~?AiyEZsj)H3qr;fN~A}~6Si@UEz5J;wefJvJ(Qs}`c zYx%OaOE3Xp-q_ZPI%=ShiqVUjpTayl)cGuZbCCCkrkb9u$;|d%#dfS(zU5<%JZV$_ zb-`F6$zD{a79z`SgQ-|((PGCg9o#rH-qmNijOR)mi16``D*JKY#Ae~UPK`*{>e%^)$Y3KoG(D7VTKvfsAR{z9=+t z+Dj^H=_sXh)Sf;_PV@XgiyZdIM5vLX!0+3NcV0) z|8lGzUxqu(DspSy$8uMbKCxjVFlMXW=?r+acI#6^9PNq#`#=?3+5D%#M=dck2~X4A zHw3hIRKpA0y6c`=!oi&Lu5;A}!Ee{~y6o*S{LIZ*7y1Ni9>t-?;eA5mR_gGkFy&#J zw!nJ1s#tQ1wQT50Pua)5Z35Y!+8PB4^YrIcyq4+HA7TBvUamZh#L#CcHB)iyu4$+m zZln6~ije#iR8<>PISl{`G}9qd*(=rN+CaC7?(o0rUw@9whjJ!_3l|k`ImK^BU0TO{ zzPM)6RKzrsR^w#G#FfNt({v7y#K~zP6o%R7J z;6~qj=f+S_uTXuD_I8C>?0Z}7@v!p_hxPZF4t1|O#eVDjT&i z%#J(oe1D~#_+`P+&`!h9;dbGJ7jg2+(e3oyXKUke;}jI@joyvR$7S5^>{m0DPD4QoVI+C>qGOKm9;>M*ftNmrxJ@; zY@3Akyp498(Jyi2W5A62hKMh+f+zDK+f}XP=Z(OSfy!=kWHU5Kb)!Ct?N(D;sS-J> z%Z@IW%Cx)F{Q)s`O|}UqheL+Xcr=$%bb}>-=^G^A5D4=n@GcWW8hqIjQ&v%Z+VjPm zl@KQxY4pRp8UChZXSSco2>|MU_`Mjt<8fA~X{7|TTa>_TGw}*w+209(=gf4M~b%y4eUNbZfzww7Q?ULPOiT^Guxs#S;avL~*0bzJs>+9=3}8&WhR zjF5nM1^1Lil^$hKe>KI-3oq#d%salWm-1RU0p`Nrv>$rbt&r8|K&G(@GyS8gR4v3@#kSE_g zo*90zykMU0+9V@=P3Ay_)&IP0@UGbCW?n)X6Eh>wNFr+B4$Q%Cv?3Viuj*^P#fjgDN~=Ky5SRT@`*}MAA72- zwFnS1EflJ~bG0c*X`3T8{*#(ZzoKgT{rwWs7&Ie0S`nYm;G4^3N{x|*LK3{tB^@9M zAEnGRtxi>NaRbCrNatno>a8tICs0I??@H5yR4DWZpx^)3=tdhE?pRS=5=1nNI(J zIdrJtVu`NEO{k`wL%2T;q4wT| z*ZqeQs*&8-t!*V7$2(7MS>I4qL0`=JEVOU=GUa(SbPU4861M(aqMs(&E4atws#b^fT;k+rv2$0;-X0C^F=W$BqP$aq z;Kc-CrJMRzX2>O|OOUx{K*bj`2|w4B$;>4V!rC=mACz5Kdakaq$b#UfdriWo=ti%e zi>A_!DWO%z4-*P`ws9$Q`xUuP?20QCUepGOVy9_R# h`hV_{#N!>ZG|e`}eOYXQpCbJ!Jym;B{K!1;{{VTJxXu6o literal 0 HcmV?d00001 diff --git a/docs/source/images/Nx_Cg_2.png b/docs/source/images/Nx_Cg_2.png new file mode 100644 index 0000000000000000000000000000000000000000..f8f685386683663b3041217e45bf977fa017caf3 GIT binary patch literal 49831 zcmeFZRa9I}w>AodAi)Xlfh4#Fch}$!X*9S?s5!&$8kFDGj@qjqq>jXNB3%{{0SX2lltUc+>;C zpgPLxy1>B^ym|V-%c;FSf`b!n38NsNPwv*9Zr6Y@`&$lMVJBTtK3Ui&F`^OSCtZaLY_Nn_Yv=?{_rTZEGXfoodR zmCl`z+&o)Nu{t$-9OZ@0a&Ek)ZZXY2vSmM@YG`YAh?YGD+I=M=Ay$SD=;U^mC-_fS zdEmEAi3;KS-QVQ{|Lxm<8i=uV9MfN7KRf63>-Z`7Z*~8vK~((} z6_T~Or<$7b|9aNpq$G+ywQ&cpaQfd`qWaro#Snj@Ld8uvIiY{W{I8kn=L_H`E?ipR zDR_tUT1BI0tz?P}(h@VYg>i^46Pd{$V$f3k=rkv6_4R35v*~fe(FG(6&h^Fb`OB>$ zrjz*J1+29uR{07AokOF$B>oCyi+i-tg>ET*wzqPlD(eU*B;p{ztdnYYsufME50|#d z9^a|)K~KsIe;Pxt($v#I83O|YH9NZlE&~1h7P49-D@XWe2?fx+X0z|1JxgLQS2Zvu z16&^k(lC{+v0|YVegveAHF19i+Uay_XX7lgbAJ1JZi5eup4 zZpTG;)?m2l0>1v8Gu=N^-|;ShA0q|uc$k1kfh+5MIw+X57(T{nW3DqpG-7MX2di|u zZt(u^6l5PDZaq5lF)z3>Y!bYkYK=4XFsn9S%p1YQRso{hys9VoE97_SY$c@KZ)gIl zVKX>qH-Vh;bGo7+eyNGyP6IXOP%#FR`Z?7gJ(Uj{DEssJ@87k{7D&8F;PkYm-JN5P zz{J=7Nm=B1ww1J_p=(B0?)^BGFi#sg*}U>`{Fl1^%dLjQL``*hJ$S<`>1(M3C2)YC zt@mbO9cQ6FI?xio6D*NX^R6U$>n=_%?HwlHX%WZuojc>DQ;OW4Ii6&@!?60P`Ij%$ z8p4YfHzS~_htn+Eg5mr8QRsOL3uLL>eWFp~WnwY_Mu3uYQKry-0!>0wlTtftA z#etl;6^mVliTl}$Y%Qmpv#BtBtIG%6s%}YzKPV zgsNSJEY}?7&dyIj3|PDX{wj=Ul3#(ijf!uEjn3e=Wl&xwG$FtkOv$n)xz{Fu^KP5XE#Crz8^%I{iE zt38DpYG88H)?!ZNRjWtb&r7#lsiD4~mnEaXSL3z&<}+5+x&?M-OS+;d$nz7sWfI%n zhhVdN^3s!wotl{QNA#IzAf0-=3j1q`#?yll8!2Yf=u4l=i=tASLzU~S$KMn)wd5ZT z@~}cL?xI=v>?nY?X4_wwo4tP`N0#T=r3mRjSPP)6$#Mdht~n6l%v@GuB*XC#TEEd7 z5F#X_@F$IUq-EEfg9{*0-ByFeFGy0|UaW;*TZK$wavAjXJiPO`IRU~^em(puc8E9n zd}%3SE!yJ-emQ;r@dRMN&$NC`qiIp946{t zH@_K8(cZ79^xfHUu{>LKz$0YKI_=J~>fKrcU;mca|%=i%yV!{zIq>y znzNi9>Srz&j+sO3-pe3CBgi@aW5}w3+n@Q1Hkq{I7^bwRvJ!Wc6hK;lTTgZC_hV@e zLtPBp`t|V2jhX{DZtW_?{!1E5$rVJe-}@Rjb`oIz{2|WSqUWIkn{76WRu_}&V{*X>d)H*hHbw1?vI@=#P8@eIu zx-UDp@7}#vas$m3>eaW3;ybO;m(kO|dJibTn3_4xaeYhb68CFMtcJFq;-&H!9ST8i zS~^$5n8f=CHnUnQ)(HAHO#Q`G1&U6k0?QyNXUm4aXen|)K|UG!MWwKb z!zMwT?L$-`QZ68RIjo~>Q%~AA%tu>W({kRmQoWP-2+p|{IaGzP%X{gGyF z(5R6?`H}c1P2xdwaY?cJE?B(DZ%dlrqcCuJ6~_UEAGr_RLCXEZj1+N!(G^9kk3OpO zD31&Aq{uJvB;g|gMIVzlW_$0X4UNYol=e+)E=$b3cUo=}e<~`Mts87EEt-O^EbFF) z1ckm_wvRX_9$y}3SXqwg@LR~{GOS03>=Y|%ZOWzoIl+stb*>h8n;`iR`e+&XS15hz z8n^??T#xIQ2lso?lTpt|605;CYZY(Fe?XL`dA?VJy+-+(Jf^qZHQQD@oXy^M@{S0O zCmv)TQ!nw}3sFds6^wQlcFSfrGY*G z{MHPQKppS*yee{IKS|uReE4&0+ye&f>u^OOGzQR z3HgDw&w7sug%s%H%*I_bTZ{l(o{q!0#vKc2RbfWITGydgdp2w4P^K36SbcO^30rRj ze)S)|D*;z=>`Piu+|GNv)vrl+*eT2~*Cyty`MrN=AM_4XK8?j{Fs;f$t2P$hs=fEh z5b4(i`Up}UYB^$EqG_MIjFCYpyuM+7eN(#ByzNlu*G4XQOV9UqZ|+*ii{HN6I{c>Y zsexyo1rq2loT!_eqY5tll`iAJJfDBDmWX!w(+@BiG+&FZ-M1(_x%f*{ACo~_ha%lY zUMYaxE73E%r!I{)YQX|xuWk05;9F6XF&64I|j+@qjQ`$miEfH0h$=3h(NXdirs7|DeP_$Mj>J<_xfR(u+2 zQfz$=9|@hJw#AFdvA(}-26Wr;fgDilSJ*<&fthv7nR0z?xNg()Vv}#r-$wlqW*;ER z=nrR`r>1@~eEJfs0N`MrrZPr0LksZJwjSQH;G0eOOp<$g)idtzTYX~2vb zd1Z8noxl&Tr;wVB z72hQ3)8Me5(YQWw!A*->ukS~#;z{0KKDM5gk=Xc!pr#T>apQ%e;`RaCp28nOTw|$Zve>>HYS{r^ZpovYEVE zS?>eWyf5&#)KAV;z;}70{fl{bCpUZ2>8OZ7qXMUh;8xGFBFZw;?%{8id_wivA(OmP zZC^9`$TV9Z?C!PE-CXq*yn|Nh)RZ4Lw~PlxLz*jeIWFtn*_B2hKKu1+jhWa{SN5xX z>E=a+9Q^M_td=ia(4t~&qaI=?`N9O`SWT2OvZmkMV%2FNm0aJHKQ#R4g-KZ|K?aLcR!90h!%$JO!N2PX;cn+4( z8(>(Sf+`Ll-k*6^@I;EVi<|mEFI(F9{FX$!~v8a)aL1tV_Ki0RJEw3(YPx<^#1c7UyfG{m9v=VH3EK$RPzXd z<8MP0bTY)ZyL}Vx1Hbv_tDjK{yv~AX&*Mk=&V=o4c!?Fy$#?xQ&B<$99&~Hx3A|x~ z;Izj-(;MfqEq&$xa%;v!=>UV!RiiV5Z4AFEHSY~{yVEVDMv3i1TQLB^Kjs*@@31%hK!WSf2KYcI~;{ktuu6x5u-J3aXwiu zhzK);Y}4(rQ|pD4piGBi`0|*#y7fw)(0qQe3)|a$dCQ7R)vo(%X&*RAcxle1ZtB&O zXa;g50NLyS6AGePqWaBUG3MHOS|wIfwP^HfzxG`+{7cZ*+L);lK``!5B zwe_f%jWD$AQO67d7YRO}08c#cok+ZXqx#oA_jy@Ji8gWxvfXUh$yFLfsU>uzqL7RU zq52g!f55&dZ0y=`P|8bLN=R|NCqYx7g&_ zOBXeVwRjn(+uVFY(y{yIDFz0I%Nd+-Q}WOoYlA@~h0)UAeD&Cp9U&d$Izx3{Cb>i1 zKFcG-(pX0$8~ixCSbS+gdRFzOQKi;Ay8^#fCF1B9C-#4g(yJeIg*QZ*SqyWm?YT{r z0lvwL-|`$UYRgLH-^aTF0~RQ!cEF5LvOSh#QOeO+6%POt4_$-}(;xe|e{JP-Y(#7^ zxA|}0jqi@AVlfFSkcXJ2?GfJviq*XgDRZ$7J) z;QA};L|($;4eh=Wz&l*XU!HGl?a+-W>>28BD&_EO((zJx;_;VbX>#gd-}f0qz+rMJ z$t88q2XiCQHvsR0w7rPi1pe|z%Vz`!qb2p3l35Qv&&Q;M!k*!rxV~~Tym(VdW=8aR z-zXj%B+4wXvD1{s;+4{_PjI?JBL3QBcq7DiVF_Tj8T({mh@xC6XRAhMn+)KVs(@u<;at9`-9C)Ww|Hb z2S=xX<+Hj9m>My5GhU`NAtJmo*&)KrL$4=T?ZsGUYa|VkM9UJXi_KbKF~(v7*(z$4 ztvUs=2b)&~`z5f*l%71jg!L^QT41de<%XG)L7R6rNi8TY=&ue8}uKtflh*Jm*Wk;)2c?~RP0eBd9EZYHiok8bAp$1-y7Y7ucp z^DRhO41bkJei#X5r4|h$418m^*<=Km1@EU$emp=4VEnKR_!S0 zH?=QBdqPpUy+E3KBSR- zO@v8A&WZNnz44Xh+-E0QVi~qnsuX1Tor9#AYG=+ULLy!^C7h_8wR&zo4Bg+;ms9%R ztOC{wg3vqZq<6lHDQ76!ou7M^DDRrKf>M&E11iHkQ{#=r79&Z(QR+nHBFDG_dM!)M zaY<&KFvMz53ef~*E6tjPe$Oq^%&_K{Ss;!WSu$t&`#B`8$g^2ij&Vf`^#w`G$&-oa z`3?(&25WWk?T=;Z#Ts|@lGk!dCGy`hRb zd|4dTMTY5dHp;_O$dtcH7Jc7Z^wp29&no?ZZ@jPZ3?uhkTpIIA%uaPS#c)xab)-E* z_|{C&T~@k;qg?2c8E7!8?Vw5hDE4~UE(Ix#oB5(*x!AmwOEd$#MK^Fzm6ntpAW)ge z-12@D#40kD(c1U*jk3=fnZe~P)cGUh(;3Sfk4moR(`5mu--K^YI z@eX;U^9NBBn%(<#6Eo2hL1h==Z{7I>G#T?tz%5YF)Q`EBE!?!yIP904wC(9e6`r_@ ztigG3-K!lZZw^c}WiACBu|g|94@tk?lF|EA;EWwWv_I)0HZt4i(qqs~_BysRUJ$12 z{#|@OnRQGIiV3Ek_3=hY+E?XYnV6hMxr%8)#-*9~f*=+!G4aPH?iPV5UT74mIAX;? z@Yh}92pYa1j2PpoycQqswvqiAv*dcWOwlMR*}Ni=5m@FU zfOm%%jHI8t{Ex?GUe{2{!Uuql35S6w5IAv1?P(;eMLV2?S)pX?8QhRc^|fT}2#!^T zNR*m4wu{qKh!w{dLP8!(5{P-bp*xh$MosT$Of~Q9?F&ZJ4)vQ2B^PXF@S7T7x4hX~+a$jJ!_9Q*!5Tx@PoQ`F5H)8U1x&J~*UvI`3^8@WuEPr;C zbJRhg(3MF^R`>8Nc zh=v9=@WKj>6ty7673PpSt_hJi>sI2i(Unaw4Pk=vsgL*!>2^YisrGYK$r$J;6v2huVX4^udwB#|APK7 z;LH~xnUW7$LaHgb&}X>NH0`()Bj!4tmv!tluP_(liPe0fRb}JU@?yBg9+?`exfy+S zU)Uw_{pw+2>({0sqWCg>dgp|{C$bKXB_7f)PYcrb@V_^n%=|Ny4z7!P2M1m+9?REn zy7S!zQmbRF?b>vOw+a(kL(t%1!YF-22n+ir5N{>B@V_}9@zrD;7|WELlSKNb@V`C( zGbq9SDKNIkoaC>8{{Nuz`lDda`q5h#b^abQus1Ey5ygWPJj-Tx`&96Yg8s!SXrFP> zRS>f-k`JT#%qnxXynLX)f?pj}wX=HI=-#PQ5V&=kK90w4ujj+U%J+=L*pQl#h>ew& zWBk(@3KYpuay6*?1+=A40#mL|fr>5r^Lf9{3?*U3!w)wDx8({}jT^#8;k34N;RtE0 zhj~vWx4&Yn{=zO)$6N7!m`WHGrjIAMOO+b)tJ?TAb%%05(qZoT{v#WHl9sBj)Z{LE zbOcK7k0UCuUk1ezj*7rV(t2aBR7hb&Fw$(1!{#p@aqB-bULj|+7_Pj{YHE2_1KaECpt1O8JXj!Efh!%tx#>TG_qkh@@f* z&G(?P$XLBReGKv$e`|Ri;Pp=B(5!ytqKtYw$l={~i}9za5>vc{MD0ms8?12wLyK{# zg1YaOcjbJ|1>Y?QUFKQl5L%2W=?96&~v2BL6E1PSs zcBFFJ>2rxqHJ@HLw3t)S#olu5Y1qg|kP@hchvIcCo%5Gft{e#zO5wUHkxrhilBZ|)oa#CYwd z8f7L$a>r~ZiQ4H07hN~bHJa5N)!W{9T?<<@rcO35`qoq{7g#dTm*;YQc!V)n8n;06 z;nP;9_cFU9cBp8m-=ck#W7e~kZ-FbAi=m$&?%DSHsnuxeaQg%F`LS&)itP@ZA2L6@ z*nB3i-wAh*iki#=V1I-$FKd>( zfZt*P#8CbX;4 zW?`h%#-bX$G}u(<5401Qsg=-~H5h8**4f;jv?7Pj6PMcDR)Cp}0JmYzkfpTQ2w+*x zZBxCn8NfSi@bQlJvI4xn>P!h@tARMU4p^NQnwDfRSPdOyo^`H6E%nW@NT%%g0DhhC z=Nk*qgf5TqK&bDo0$BvFU1rQ^cn>8l__sa>D)(zym5S(9Kk!a^VhUUu-o{xIUNqM( zCPkjPR0WxO5HQK!-Z=Bp*}fVSjfu`5#x`Ft-&?UhmXUgGHpSg^g~559RT zGs!inb+nlJA_B6pY&*W5rOUSBXf&&nNhn_9k#GDhW}9>@A|LrV@^oSt^cEPC^GaxA zdz5gdG7VEzJPMeyDF~^ti*y90ZHF6?42Lqlj)@jye5P3s9T*6GRl;$a%ZU%rT5^R_cUH*^ zVB^|F%`VKJ1l;GJsU3nr-@}LZaIkg5G{jVnVhl(t8jOgc`v_=O+wbQZtL9W;C2I3O zXmf5F%zLbfSQ$FJ$8bH*JEf10222UO{*f$Fd|G3Z(&@?lb9r%w|5IOda{tYSE`)tI zTF{($5yVG2B~8osQ?-}9^5r7g85vG<#b%T-=lQ_j$*3MN3L9vLqd?$^t9Nc$>KzlGA0tM zJn)=$_0xVeQ6Gr0%*+Yd6Fy{)1mslh_)xqGg`W+G9-O@CW!yN$u6H;Ei>E#+5Rifz zORoa2pXCzPDobV}76e$c+(AVyern%g<#iTZD5?l%uJN0kISn6aDQ%o#Q0OkD!#u4U z39p`%NGgHL{fV5$bsb$zMINEr)C)?c0@WaWkLWJjf<^V)o{}b?T3c43ckZOdUkUQM z<{HCdC7U(oomM|VequDbj_~bkf;lPd{NkTE!T9@88q^GrGoJBcI=Pn!FvUrCL?n3N zjsfn^4H7}8yb3s!BIT)dXO}yh>WG)zr(c%Z4C&V2bu?W#6AB}ryquE#=qW`9otH^C z_Jq+Uu#IAjf^)OfCQG}|YAc`)cF@2Mw?yDR*-UK~ql6QTh1D={$$2g{0ppQB0`$9J zyAN<5cc#`wXSU85aEl0?H@B{t@xi$07LSfhYYu;)h?Vq1xI-8Cbm%weSP@{&3+jI7p1!c zwbGsqhL0>Iy0hjL6nZ-*+2Z7aCl?!~Hpl{$Vp+fOYO!eCEwr>Pnr{E_89{W0Z^qV} zI`Rom{K|z&qRfCsr^?PhF)f}tdlP0-nFy`%_7j+)bmvnT7_=H9UuT|jveHeSz2Rg1 zO#avYw!}s_x6kg18_$*b$-q#O(MLaOENm>YoVw&1pQ*31#;N2~x+H1*(d59IG;;x! zo(b$qQos=KSi0z=LR{$hoC|HyZKSX0I$h+J4RfR_020)-NpMiD$EM~r8kaa2!kd~D z(AlCIjyd@4)VMhmVr>miTZ40bc0&IN24t1gkxk7_-OX!o&#Ye95*S`8ZpJTxhRKl5 z$&=jIC^p>A_*iky29&)EyVxyeKV&XnYOq(1`-kY4<3M~)2{M12ym163SGHHeKS!h9IjTl|Dhr42mPWbe7y8Q3=NxYes< zFNOwVA2$9l4MZu)&GUV0srou(o19AVSXIuaY-ZVAfL}~0w+qcg1zRp{7YjAN%BnQ9 zc{Z-AuJ{WRA{ewY47~fiSmTpo0=|BQu@d8UHEr^0|3o>l;}p6-n_lF-psW4Lz$zVo z9Vj}biBEKD)6)cYZ%4qx={X?W$ zz+O!}ndCXeGGpfVmeh6k0{;}+UpY%{^I#~0{-1IA^gg?|sV;A7I45HEqJ}2mJ6-b= z4vrF6ajQZm(|ouCsY;we~9@Od~sVkEFGt@i-bdGP77 zZala@anhX;iEj zacD00{^9v|)$^u{O6tTbROI}g={v(m{KU!>V+>f32jEdA3>_NLh~}{5%QS1$qM7~# z2O~8jaXYu$`}yX|i{q^(%R+ygBxp((IR1;;(_#7qcPNGP+kyrDp*pb<6oW+bI8@d$ z1?<#V1HElek3_jRkfTt{H8k{kT_ht=Xuc(vT+b?e=A(i|A%vUR-MDdYC4k>G5_x+e z60HTlrj=qK1PFvONRy5I-qfHXbcCZB4uc`bcEcsTpV{K}(m6D>iL`XwzJmkn#4Cao zWH7OVvBFA5txE#2Q-*3>3?<+9h0{eneuzmGj0JeVN+yl+(=sVoM75~jIJygkzb8fJ zp6>zo#6`$MIYusz`(6gbAEi&QJ1u!iSGY?H1n4!D3^TvjiF!jIV5GXA4N zLNFx%Yb@q0mJTK|E|E^}#F62;WtZllf2POs`&8$1)7=mD#x3*I>qkW(zjZ3V1lRpW zhm)Gra*d{kpbptW0oN@cq_uN^Yj5pK(R|72kADzX5*~PMk!fia?cwtD%WcP`)M0=L zB5GGtZuoelY$XkHO?FO8>3jO}SH;hEJ1VZ-LfHAY)%+L4-Xbpqbdc+@vIsMOg5sC#;*H{XLKzGn>a?X!qEXOt zpQzE?b7`5TnDty+m|}t|7RL-d6gAnpZYtw~1m)Al@-ns7OFlFht~iRxoMZL5!ueqs zrMyC!X*_?k>|w&ZUv;>UOs-v}nYapKP}Lg&jQJ`kR?;33kKNx1PZ15M z+tHYzY_EE_5}&4I84|O-i}(vth)z=dfwyYX4*%uw`2QFhvMWP_P4mQ0HxWW;kpZ5Pm)ke2gmPCk98J~sNB zGf3+9=3QHsg(WiGSwUK7G}c?^l_C0K%pc)D+7>vXCp1c*zj&q;yOj#*-M!f^Yx^WT zv%qY7^l*F6Y)woaMbynJMUK6QlDl99X+xoUZc2jWsgKF|yqj2hQ^Rej*N2AjSd(QJ zRomDx6v~_!08be;YI03vxE={P!>YrkCfX1#jy^c7}t! zimv=nw`c00#>^*nr484y0PbMRK5PKxp6NjHW>I&tfSIWItl52h@MNh-lrp6Q zP@DK)Hi_&d!XDxV!!uTIUnNP~pZGZJeUXdjrA~;Zm|{Ni9%pK#ypHuDmB`%_Ohng# z@y2equI&`i&%z!6Wi$UJHc-NghG);PhB~%SM$C3W4LM!W$Tm6FL5f-31ffJxZ;5!c zaJ#`hY}Or*O__dZs=&nKnI}w4}U=x2j zkiJ|CcYS&5dRWz(G`i_5_J$r&kEZ=BKgSR0zvv!GoG4K9`R2#cSF`nvBQH9L znB3k;V$cjv^GZ2sg$7TbZ|Or-Ib2ew+gNFamUP@$pTQ^Gp%w>ocT3Hr8O6W?b(|d9 zX_eSk8K-o>ZB=g2(e7eE+dzP%$!{mQmt0-}z# zD5DJRxFTG}cUol;pmizUx+n$rB_T4EfdoDq@ zGg;f|Vt{t(-c_6Ii3vq?n5vzrOz$c>Q!{M9?(bslBwJx&v~C1>gKpRFkA2EcssD-Q z3qYb(p(R{FD$ie3`46o5U%+RKHv*7EtjvKSc?^c?y7G5`tD_aS9R1%m^nE|z*h=EA zyWy(6K^gmg(eteh)@d?U#JZ1XFg}+Cs<)yLK+bBH==SfXMKex^t|18lJuWOoe_rNk z87+{#$HC4#?^lz@ee&L}B;df6hko8cB1y6af2{*-1Pmq74?d)1jsMr)mBMlftfi#> zf=Ew82LJDC;@O?3usnm+ADf~7^ZuVgMpB~a8jK8|-y;5j7XC*GScB+ONHQ!ue*!`O zTa8a|G9s8{>#4?b{->G$YSYpY_Kcg?xiRNIU^Uf$s@H)*&eIVQ-%$Unga5uM8U#yv zsO^j&`X?Iy-}`^kHaa3;kh7M?YsLRA+CL>o(qYdqVm{da8=wE*-_%0v5v|Row)!8U z{rxe%3VUYf_}f|H{~|>NH5F1ZKK@^K`hN-!TPd&%j3AmM(ii_*3V(cfc9pV1**~F+ z|GguyHaU4=&;I`@{QqtWRV3 zsa}34g=H|?ZOZqMespGlWk!w$ASKqbS-n-aS0D1`oVS5^>N#a^3|}+_B6Zv5@f3~!}9gm z+}_L#(W|?mk7oV9Y!Ji z^z6nhGA%(dvwdhe@E`J2M*hOGCrSRxZuk7T)z_+Of7gszTOnB(`N3(ef+in@k$=|O z%Ne%NU~(!plq-D}P20d0f=KMJ)ecB~FZD*Vc)z-BezWB}3<+;~Og&hmmaN8S5^+hw`Z$)VTY>c9Npm82kSkrR^<0Eo7j7G_WPLz}a-fEqOL=+& z#t!XzRp{2(Qi9s8+r1O68n5jSjR~CsGax(A%Qa|sO zTvu}gdrjuQ`!pFBf;#v>dH|DWnSed#?Dy|wyn(!;xbam{;9b_-0!m+rZq4$^H=F%! zyyioR_r{I!=fcZr#*2;3>}4G&^2Xq}USH*h(1w(DWVb`c99UShCSQ%PR}zr)OpcP( zOOVL0QSgJ!2j2-hdMU%soM6!zKbL z#nv|3H?+ho+lA1xwA(@f!N&o&a{(Lx6pn=ywq%)5uk5nxY>1U$Kr$a%x8TcDb}u($ zY+X5o<~Qf&`N4kXsOvULrg^6UHr^^!q^5d$l?z7$tsC1D^*ocTP!|}1L6|a(RH@5W zj@Rc*PwFlL9#Jjmi?&qsqg?4tVa|l?TL#UiY#SqA^PeD#4e4};460|Snm@9zyF&eD z$4B?pyX9?$(vhm+!TB=Yv)s@#7#lX;Y>gJgABr!GM}i28H+pELY(|+ih~?fOLfSUW z(jNjFGL#Ha62+>ZY^jR7I2Jl+6Cr_MX*k{^V~7W)ZaP5$)2EX6NN&-Z8R(JPO(aEa z^U$9t{Q*mmos!WjAz0k**wTzLy%GSK&e|2l)vWuRz<1OUp70NQz=;>|O2|ZW4-q6% z4VEI|_Y8W^uMRo=R-bLNzl$euDAA+hu@>_nSZwqDy7T_!hTg8V$CCJ%sGsC1PcT7Q zY-YMG(L4Sn0PC)+?f$NwPxIaR=h0DxC13-9WfbAP0G86;+6nm-b*StZ*61L;CLeHN zH`hkhzil}~QQ%=&Jmn8J^j>~Y+gowk{a_xH+2+dJ)_deL@d`t1539_3L#I3!V|#d< zx=HIxTp8>DNfj+KAEy_tL-QN^4qWj38XLr^aXRGKWy&~(UtNHyxge)p{(bMmM#x?n z-Ee-&@ebyTjZB$_D=IdesL9%elbwOfdX+G(HO}i?rKwtgOVdLj=AfvMhMHWOJN`+b zHNkxwyLRgtMSBq;X%Eg|N3Q~oYDoFogtROPX$-4xl|%yYv_Yx3e@;KcCu4|_sUS&Y zK`vp>Jj6(4yHnU(R>A8&@~oQ$P{Em&c=}^K(j7e;JI3~IanY_OUM2?318)ZKlz;;2 zePr|Uv0gzWnG*@fG-e^{Y(DxuGe}gn?k#-2MxZxDQvktI{stX8Ec``P&OJP`8M6j8 zO1B@S`*v%%BAel|ZsqWM7Fl1*b!x9*#0{f6qG+y8&XtK>+Xal~fw72g z3=qB+7||mjc5TSjMBYeR*wCl%Jz{3ElJ&GZ91kz<8)Tshzvljq{}a+~K0&eEAI`xq z`S}a=w2TVUR|#^VZPF zY^lu}s)=^NFHg!C;$>KTcUxzs2rMm<9*fakqKTztZx_|iwTy?{J}J^$f7Q8cLEt`! zxyE@&>;~F9;JTHasL@HG;NIG>e2+U52}`V|LtimQ^BwtqI;Vi%XM%k2OIq^dr31Xf zzEG)Jy3wWQ&ClAx{x64i#;jE{u5-k8sO|Yh4tCV*N(LS}bJupd)5=eVJ+BiAOz*X- z!z?Sh?eOiTu9CKi?1FFUqQngYjj&={G6q9gnh`3g{NpH=f=ZNGNCU4f=}B`h0n>_& zJJcr!xn|zeA^_n<%j|CsAL~Dy5lI-mH$ucZ8J5wvngmPOPHj#e+`l=SVg_k?0Un-+ zlUCOJah9-ng!!sAe_sw^4Uuz6)_BAN;hcp^k1!#5wi&ePFk9 z&6KM#2CZUcP{XttCqYZCPUQ@wV=I%Gey~ttK5Y-z`!FjC61wddvbX-NSKAvX+@=iWiPXUQ8Ald=&Y$t5h7k)v)+K&&LqtGA*rN-dcD~$ zqTU(;bQ9n98-EU!<1$2=2__ks>W>EI>z+L&*8CtobuV^6@)0@TC)+q-UM*?e;@<%^ zk4+|lr98KPkI=Ji5n52tIbAl|!4 zW}*T;wyZPR*GYyo^Sn}gy0&{_;)?bR44q>lHXc1XXwsR-VgYsLo{P?K|NJk2?R>|1 z-e2kcNc0blk=%VtljnCy?;0TY5Fnp5i>*&hWwdUWZ!eAQ9}Q=N$H@8V8;y@k#U*hE7HCF6JaT9u<243UBmA@xUIgv zjP*&oZiQ;VFnlyKX(i~`t=JTFdQZmM^&42iB7_TwL>PxRO2iOE#}j+{^WBJ3=|+LgP8JuKjv99wHtvp3OxrgnN>cS!$pK)Bv9U^;L@ z$-VPV?Tn0utz$ol*hPUIAv{S0ovNA&Ln!6SoijhT_+coFx?AM*E45u=F0+=c7A)6* zaW2c@Y~BPB*JjH~%z;H(6ZDCECUdZZ14Ym1n-^}$MH2+JNGaqpaDK11w;-`>?TdDx zF2kcFzW$qeWQlcWIy8 z-E>~W4`l+R`@W|`u1!Z@XNtAmN9uyCU%Tp*^Ku1H;yh#bvgQ(plvzsVhZreAX{yx# z(u>2hYi)66Y0lGkNDU$Y4`;@v(+KdWE!*dbFP}PawgYA+FgD2T+ef1?T1>Tk9~MdC z-Eo|-Ro&Sir@`olR)|&e-C!<9;yz?V>NoADBjE15dtE&1u-Z1JKyQ2YpM^`Iz$v*X z#55UYps!!ugP^kCZWMoV)_~{*y1&zla(`rVdsk_Thj56Nsc_k!Xt(TFs75R_TirAy zg0364k?%A(-6UPZe0nMov1dCBEA!NqHskdK4!Z>nkjYuV17sT=O#NyrsVmSe{_FYr z?%rZ_k7%)MQ+5nQezc*f&Lu)d^ebGybAmwh6e;Cc@tRc{e3-JX@TX;c+McHY$vd}V z^RlB%VFKoDSrPE|N8T;+wQr!5p?HEJ87ztes{L$a_wle)1TI$G&?)A{XI2ye&|Vg$ z`)0|Kb?hCCz{al)$ml9e0;U90&j-o+CV&v)2(lNhB*-ZI1i8_5Z37b@Xq}s)=F5gK z`(-%;U%9$BxUJ1^tp!+nX3^E-d<591cV`eH@qdWHmPMcx93fZ0+xE$hiZFmvlw_O30PbvU&Zvx>=2(~vT zk0|^gQ1q_F_bAK8?+Cw7{={2ZW;+1Z=Bb}~kImjXS|qN~b6Fdq#o*(BT^BViYBd#l z8dfeXDfL@t?;8uqS+q8Hthm`!)+Ww&#&idHg*pu0>uh!;Sn)c2|31kA<=gOE5eFwX%tMg z&a#y$U15BEw}05`XWmJ78RTH= zj4J0K91EfoFf1LJHENq^kqJxtqR}Il6wFU{bt1P^_j&7VD=~9$k6a zjjBCCC9N?9(6)Bqn050}$zMo@)Le4}8c3b^o@g>1)3uZV{SfJ9uPo}Q@)~qqFTHl& z7-aSS?XXp;RZTFS3#W7pho3*PMuAC9&e*lGvDxo`j1DClAr@h7;UL_sykB&2Jgovb z$9LNZVh>*Ur(`J^|^X8C@Q zt2>QDC{6`{%r#I331obJ$#IPsyP-Lu@J==Hq;t(S@^-*P=Bpb#q~%waHoE#9@(o`u zcFA8aAZt{3IP(|JwvPw&{w{#!dh}soEL>ZkdwR-xxNh*<2d#*uU`cykD!GDyBryqQ z2L7$^u;dppNhbCm$wTw0>&aI9c8{dR1IYLdE1-sF~Wve%KUY?{E376(DOLpF>2uleraC)y_Q6!Jr# z^Pt<}uezz^>xh9tdK|K(_1BTtt(fjHIa>l$^4t1crv#gy(3<=@;|hy*wQWfO=Ax4_ zNn25;zaKw2p|3Q;3HIzh(Xe5EIv4)^c)uE%*Wat>jjSm_4$O(Bl|kYpPp8lXCd8Vn z<$E3g?UL|U|Bd&E#RD@Mup0Cq+z7vU1V*HP!^@moNo*|F46ULa<9sVZ^Mc6fG&cd) z6Fx87{KgZZ*3*2o8&5h@7QCrWh&OTIb@yjP0o~$`$4{@Waqq=j@uDmey`M5Z&Ri}O z{#h;Mbf?R&MUkaO^&RyJ%^<2u!S6*U%csf}!VCrx+*VM5FkzR84QA-PLO=gTasH*t zg-G_u=;Mlmi1PM5KBLleByc7IB3Y}EqD1_(^Ra$R>vAn3C(G6(KDGasCxBKNgLyx}fzqVRn341z{LKVDMOpaU zR>O+ixaTF~lwQikcTSG(K(OYy6i6WPGxOagwkX=_$Nu(;EhYb|ejf49LzD}jnDRC~ z03k&B-D+Ydr*1lo?Tv=M%I8N@h6b3eNEC8!0=e%ji5JCqwyLewdfkDPYDiF!n+`P_ zil!H&0$PoL9{+uEp!WN{1(?`y?DiAo1K|1flBZ7f`r>Fw&CIz1?jt%8FZsE;Xrbb-u^>3*~)7gwhw@%kc!}cME_sNxve?6YROv*&Yqk@;GThH=0vJHWW zJD()6girc5t$`f+6@N*F^lT&|FNRMix^ZjVoawN~pyMq2#{F|-Hbc>*S7u|l;qVn9 zc~@=QN3DIxscL5YbF_bGi|p4#+PzZLm0qssc$@2IOQbjg7ELu>#F(4g@*Avw)|fV> zUjdVdLi5`C5?ucNOH?e9Gl)7Wo>0rE+JAjV!`A6Dciz6}TNj?>^1rF_yk00y#pN-t zKv2r7!DC;jQoj (?U^ewVGJ>J#$@WsLmc{9j5}pPxOj3eRGsSpK(l|1S$*n%(F4 z|DHcsiobRmJphA8#Z%}5_A;O7yJ%$;?X-FUIc|k8eCr83QQh`#Qm*GEx3MMxQJ~vT zf1B|L>+A8=kT%YV$B!?T8aElLZlZj}Qu9C3pGk?$2{?{;Ppw^bcJqvvlITUr~et7t4ps@_nhfNT1G&Cf{-8P~pL!%5>UgBQv zZ^}yVAgtk{eg0a_J=*UEKA*PLh}XQ)y9B&X1gn5(xO}oLhT1%yOJQ(R3Cq}N0l`+a z8+x`I)2iFQ{#cQ*o}BWjmle%@E#V`hJl9qj=HTs+;e9Gb+l#mlCEF*L)X?CgPVf|> z5mlEcrSSmgq(6ZFZvwoi#&DN@WFI0`U^Iry+IE1O^@Y#HR^pv?hisi>Lj)>_3W~z$ zE7S;k-cF?AzKH$MC%Bld3Lh1G z2W7Oz5se(t(I>0E+LKoPf5-)h!24H)QlWAoSLcdJ-SVhN$*D37ogwjJ3xkqZ)5s~S z5`%VgO{X79g-r0*)EZpp10xgdAv+$yQ#sF?e{vCb^iwl#B6UUTOaXgxGQLxuH(U1b z4d*{6y#=BVN+RtM4OcYCyzI)pPPw_*z5~&@@O+G1#-J^A_VdDeAvxWj`hRP~i?mS0 z=H!ZF&c}{fKyGs%ADDvhkA{LR-{fKDWdBjpgTb=mGOY zXswMvqEguMjZw{K3Mz1Ou-ty1nx?8E(!`9Vl&$3~{E{s1SNri$Pb!#-wvrn;S{mug z)SG}~W`__BwqHC8xXB>;4WsxVN4)B+b++VC-4};TLn=`r=7mwRkAq(n#J9E8=zZI+ z^~nP(^D=f=Yzo6hE;BUzvZ%P_h9@_sg)W5mJr=bL3XorAxi9SU5sluvBC_t`9epF4 zWK2#!bE?2JBJ;kwig7X;(*bf;QasXXPR~mTgtXK_PbgBF_ zm$wHJ7vCWZ2yo{Tb>7k>FaAP^s5H6_CX`NRrhmGEUoVCeWEyP*JWajp)J%FxmpT)K zEG8ZsuWkIq-qQ0$=-1+_+HR^{_LWV3q_>*4ERKJFK_v}NoH7zof9`>b zcAn}7psPm~G0ba-wK|`ki7aFpRmfVnBYG+4RzB>C8tF&C_;A$+8cF}c$`hCY0S0DR zhz~iNOM3wq_{r?3murP$C2@m8EoXN~bUle5;{EKv8!Y#xDIF6!qfR&Jnn*<97FHp4 zptue5#KRmE=@4cJfJIr2E-+~UFf9phcT$TdSj5u|uTJo3-|duF;9wQ~EOcxRg57I~ z-rqa#^YSk6nRq}2H%O+^b}kK!Ai`;|+k)5{q`n?PRzm^dKPyw( zKt8Uy3up0_F@nhnXDzbnn^msIw%%prY0yi<4ak20l6F<|mXc~JXnWWzzB%V?&zt^_ z7@Aiy(26`pyRDitYU)Vf8>tVmKEMHUt^-D2$@V_4a}h5Co&Q=9#`xQ@kgcuFajE?4 zYd&F>4H&y(LGBNhQ bfA^-qmqytB3tFkY4vh7k1$-Yh&!;}#p5ARu_%s3{wc!`u zj6V+Vm2lOSd6E1jweLfDUhn^~6FMy(nL#t9X7LDek@8s;ssU_C99#`*s?N{)c+QA^@Z;Di)g*hUAt#t*(bqx?@YJ9B>k-06=^>{-v#p+E`BG}cJNy<~(+yVT z9A*{nkHkdF4Htd~U1dUz{6*1-QE=d+gS_}(@_QWg*iEJv|Cotv^-qxAI2kqNbiz?L z8`yAHDFNxH2Q1MPE6sVeQV}9rj_op$rz@Va2x@a>)AP#}uSLHmDicHYjk>VE-o*CD zY)6-P?iYiMN}SSlJ}stn?f1J|{N&F){B1hDpV4xVPXLa-B*F!X}FM9{F^ESpF1;067daQnfaF!g3&liCUIKE3Z3 zJP!qpQ4}#xq$C;%=Rwk`khz@JvaSeB?uyo}fpgms40y!D=Sp7@kh$NNfeZiA+c^0% zLby0LyE{i>zN$?hY}$SW>2Qb4sOh$l7s1UjLyxgn)39kszh}Ipi8gDoKRWdtPu!_E z49f+jnP+@dTzC2rxI>NyF z-$-gCNciyg0#YmD2ur^_=kS!y`!YCLdH4|RY{Tx6sDL|_{Tft=W^r2Ihr3DKeL^G%9Id|t&nKX`$W$;drKm#NgAWzpz!ABu>T-KrB)~X(ZdAjqJ%;to z`g5Yr2;+23fK``#)}DER=KybXikpg7`W&;NeftW2kky3j(}pu(5T+_Jl`l-dZlImOlY$Re)Gz z>zIGsR*c%pmpvR$fcIk5p^g`3YRabo5AdReJJ)D;emI|g7{>?*YpRc%^*Pe(s zx=6i!U95K{toL|&Dr1)cwHy6M4~IOp-V?(+%$dcpIAgT>s-`K8X%-lgrm*`(+5c-` z$wFLdFD?6-9c+lJPm+4q<9Xhu``LL)30CBWxKOh|jO!J@`iYp9z0mIvxTaMMqAm@W zjETHl;tiQ2~Xe=fcD1jm(|t28FC2qhS}OI)wJ)Ms7FdtaGuF^THx%?H>6%f088 zX!|wBjwejYYAZFa$tiqYju*>nwMtjVjqw?7My#@23b%ZT|5W>Ev>kX{ijo&$DY$Y|)kqu*UR+HD>zn<*9UHJk+xeqRFzy%uW>W~ZwQz7ZnfNjy` zUZst@Psn1Me!^?Xes2QY(H3?+G`N%3%K6S}Z3(=fhm~z;#PEBroR;2|Lx#+YwU3(- z-Da}mPspEGj&eY+$^`lZDxZhOn!D??p3=XoB04v615P>cdM=3IoGYWrbIbcq6P*XI z)sCVCZ?TgfwIF(cHg|pRGqtc~PG7p&cIn6ZgF#YkTY>pzOKv#p1qtN(Ez1lTqpi_c z!X~81)n{Bf&#{xs-t+m9pV9vCkbZ?Hj9m-Cceg+9*|x!*M-2eiEjx%aU;o7{{ITf2 zQ=#SQI4V%=f}aVa<&_Y&9O!QL@VgcSpoFqX#K^d*{qIqC&`S~wDol$ z0+fy9cP*~)*jBI%$@Y52z&@>|&=0RX>Ve{(xe9WiPs(Pf_7i;@<8)~v(GFXW?Rsg6 zRAKeO$CJ|&?lh;pc(?=Pv?;uDyi@1Dd?J`VqBq~DicF@@Jh11e8WMUZO{Q(TucDPA zGBvRnC~O1d-=G0>oKw?^ej#1IwI#d<;_V4Qkt@}28L4JG^afPodM;rZ4hsWv^(-soXj%vuQUSoMtXUvA3E`K0AqPpd=d0zehDgfEa>c~#`%2sEN{el3C)0pAfGOU%9AH?>&uj(%z z6)kB$oiiwGe1gm2(!^=#Zs-jp@!BhJUJ3zW>#GknZ)(NdvnL;?$>rx9e2bNcJeDHF zLUAZ2Y-*Gw6s8VT+0}MHcc@4D{hH{+D$EN0gw6X~de=oAq6Oyti!{-(Z_zBb_rt!1 zMh0O+V8lL^!B#n;uC<_faODFYhO`xRp;p&Xd(tcZ$aE7AAgA<{Dwy_%~!cC{m z;9k6q?dLPN1>L*^rF(h}*A=ImP^{EVMdo%8{a{T~U72};-~I@P(8{%CJ*?G$Ma*y2 zmwMB<%f9->V~e_dWvIJvK4nrTKZ>#`8F+?2H9J{W3$w^*g;#<1 zpNueSo1$|uj0o*0j@8OfU@-_ka#*(bJOFX$z<)8*E?)=I-=T5@ZGZNm%NJ@hAiTg* zJ$ZgH<#7Wsw6Sh^DAuxAiF75;NE3w9|I8hOvq2oK@G$)Y;Y=e->=na|20L%w4osUi zvZY4Qh?SiRwuv3$g*X!UwhB~$lz_8yzHJ~o)*Q@Eu+{^Z6;1Q_z|kSX>v{4+u4PFf zJ`n~HFDBwO(dSKVY*}EnCMlBef|HRNhWI>?W=sNsB-gg*_@Bf=_SqY*K+dA3VO8rE zyC~N}a_DC-!+wTcetUI(Ie(2vk1>r7*OSSZ{T^r{h-CQqf7os!z_*og@zv#w*lGP| zh0z;3qI<~1jQsr9LVfF)eRxDsqt#zBovQ+H2vA<#1T)@BQ#>zvx<((v-_1t;qo!8n zHHU%Gz-dptZi59ShCs9k7Y|MIl!M+#!1s=TFyksSdw%j}O|kP7>aRE`=dk7&Ts1nP zn08yAN6#0jw9+d}XFS`m^if7L7&uoN532F8G^f3OKfDGgb4i=$c z;`G+n%&)XvK5CIaJGDQUWVla9BE@dzlPZKMVR8Wfg*;gq#!D(Q3=RR*#WIng>eP-0 zdBgh=)c~N&(H_bu&BRQoNTp{=QcpDbQ$ftmgEGuvlVJRZ;dg6yRAJw^mvzV0w5>#D zY_Z)!O3`O-F-A_0-EGe=w_W%oIEj!ZaA}69C{E+s;fYE#xph$Iqaa*)Fr1b-vdTt^ z_Wh*lS>nuK=c56He52E^mTN;LRbcmRW7YE$M|`@hVF;PmN0amw%7OfV1p~YEpN^S+V;NFJ*nq{g znjA1EN+L`!61|nH7Y*^wsfl4}-G-nOf9QX+y@rV;1_R!dYBm{6Ix6 zd1_PtSSYhLV>4oh9(3!}S(}QmUHj4g*Pg=AmR<)&EBy{1^gQWeo}awNm(O`1MGxpR zH;m5>B5cc`p6@F;W;16q3ffLz>?EU2wRY}qeS#buS?($c2ewm$l+ODck0+GH$rii5 zh(}P5bIg#7eTPYBR95hd%Qx)q{}H}^J~hi#R%;vY4DvSeo|;XfytIiV#}OPM3M?&& z6hCjZGtSK;grVg}*NeB-->wh4kL@0~m7epv@`B5dR!{4$ZV0~CP+EF(DgFuX=*UWh z>Z2O$qXw|EkrW)7O~s`f-ie~nEH%$*@Wi3xGY>fDP_ba0jFyXyg|A@MxXv|0uY;)^ zP+jm!HmiQdI%4sFD*h6Q2=WPj@nx0A5_3o@E!-?TZ3OhQ(qEBZyJd15*PAGFEvg<2 zVf|&aKMSJR$u{?O^k-%f6Q#Mnd#mtc=Lu3ykoVg{e^{(ASg;xz&o$b{e-WUO`c4(7 zlwYQXy_vX6U5o?4-b2t#jsjnkZ!M<$8yx;{0{Li5_6Mq8@|L)$q9i*#l5edsqOzHV zv#e?(kre!F(I`^WTG-#$F``-AN#m5s>!;QxK!EV*n4hZfw-vd3!LopFwKdKv-u*)5 zq)s+#HME%2hrS7>mxWN5d-bY`{_<(`o1RXhGm)rwr_>&=$xA_7!Nsi!))vXzH?ZTP z?W~KQXb>KpGH+^oe6#9wf)qd-`D-PMRQX&coB7;W8R_R-(SQ)=XX&kCMy%=41ZXG6 z+wd?D+yDA&k7!bK?|_Q#BpNg!Oo1>-&hQ4IZr~-S$bl~WPssP6&=oKg17fc0W@VKb zUP5^1=R^zIZIVN0e_hJ=Hg>eB!EKGe+7cDNm+<_&HFo=Ix-3+Gm|V^cNV31ab&VD4 zdg>yjc|KxIF4mKpqbmxervm+fJ575oHeNsvPj+LYPvJzkMExU8R7KFa+ygT?0!EH) zlltfinhMR+#<5vwW^LnUY0^?>>e)W529)s^&@sWHjt!XO)QE3Meqv1fC%&{LEsphk zv>Tg@-jTzmA7|%#SpAgW=BVvkLh3QbeNNPR{CE7#@~(xC7m5b~u4(!*Kf90{9WORY zasf)-W81g*+oYv;Tj8BMt4LLgiFEOdm-7WOOK1JU#07O1Kaj%sRRN%Wtp+b4iOo<2 zi|jKbYYgtAT$#@?WHD3@&JJp|o!{u=yIE!O-Y!;ujjUA+06qPDjIo{0l){oMwaq>( zMYxS!$Yt2bRsIFKewo*Xme+9+X8r0Cxz4sqCC)&RgaQ+-RNcOW3bvlxQgiY9!nvd5 z+b@AgelL5AXer=j{q7Tny0fj5`@IZw52TPIr0@r*U|)Js?^O@`{-@n8bawV&6td9k zc6bkmOV0bF6?U>h59dOq;+B?z590;*=NYwtn%P|;E9KOm9}s@SJhO|l$Uj)-t^jO6 zfai3V5bG?Pz4HZ-m{UnYM_J|4doEnV6#uca{4$fbi_%AJ-L4M-f{t>)CY;TSDH*o% zYsZ6Yr=eIXu6v9|2g9g6gO`g&Xe`_TE{@q(%!kET96!wXGw;m}Q@quqi*FzE9BZiO z*m^!YKMwDc8)idcP~k;Mca4&&StY!t2p|9F5%3nxJo_a>p2vS)Mt&;b6B9jZy}I*`B?CRmzm9KMzayAnZ z@);$vI4mms*Kp7iAsg}S_bbEyy7T`J)c=my|K~tu|HiOc{|K3=G?+A3@4^51!(Cr2 z@3+oN7PH`R=IJR#o78AFn-mNncf))w!8UXx6721cIJEF>(4%lQ`)AIY*=SapZcD+0 zi&e{OCfoP5=TDx*lNj{>kZ}4ySL*0~kdks21jl~(@XX^UYqnrsy8W3}y;K|wcTT$7 zx|xN02rl4|U2#x@8ORTdHga(AboiLsoZxpn$5+zPRAHxCIm~vO7P?SvRCu*f);*t+ zp*zJr$y4%6ZNR8u6nx)pia2Y;8JsFVf|11Tr zcMamY=2KNS=>gNCPWn^JZ7!>l^DMqcRnC@u;M4i~rcIhzXnK{S{E2k0S02EnF*!=e zvjH=qwOA%*-x#8xXK%^IGEWk_hpj5`upk0xzCZF-vQBq+hqFG1*Ia-Nx}%(~w9J~= zg4Nv2jMZCQ?iJZRENm72C(mXj?V&{=-`t)zfZYZi%UzFe?)v<60`2OVY@b=ihpj}g z4Xl+HDihcqd&?jA0#h#mL&9;P(6N_zB`KZWb)2ojQ#=U<0gYxlK-<5g6)?uMMCm^Z zNNuqwwZ=D)Z|w;gI5y4}o@6r61GQ5vDn2ZvKAwO&yotMv9lf9u?-tNm^RPro>uif7 z<9M?Mjkgc~g8J*qC|%>mP6^j@p}p@71xWu-_Ov7=K?P(KqvrsuYn(z-^?;s?kMAcG<8&M`j=y^TIt+I zvY6gX-KmXLHnU4ailA+|r`sBJt?TK_TB}M09`eo7&ekNC(Q2B3kSM)fv8-EtjVEhYc4D6N4cb~fde`e#*V6Bl6|qleRik3a z+^+&9q8oG$b@QHt38(N6+N5y!8QDY!9`!kQNGR%B(0bX12)V>;SPny@6iOj{c5g(e z2*hukt1s84^N&P#PZe!b$;2~4chb_>?YEQdPNtO7Tc<=%JMC&MyEi;^TTq93@A89% zj3tT52POy8nRGUQ;CWa<8GEvl+&_DF&&Q?IIFU5`LEBkTs_9CHed@9930X&$U>PY7 zpM%oz^7+cBliLpkwvE~7ys_-f2d7)HC-guI?u@vP(=|n^uAqb1U5AoujPaILSD?M} zguv5DxtTBNWtpAcXWJ^%>F(;7i1{^*hJB~Qjsi_1wb1Ar&*Dp$6K<`#iw2F=)D4hY z%NKdETIX?69=AD4#6)Oq+?3b~nXTNzOmL$|v8F0=6L~v%fh7sd2xpBbalvR$nIbPCaw`iyZXCK@6JQeU0p>1c6@G}rPCW|xtxs1 z#%arwH+Cmg5#DCha(CoPO1y7nYKd{DaqdK>>ZC6*I}U+k^@?|`EZvr^_jIjM5)~$k zHfPCZZ#lk-IORHL7b7LjNaHQZWq#GDW(GO$3Md6xFFH3i02M{lwQnNf`@jJs-PGrw zSr2jgheE#Ey#iJC_FOdW98iyqXHC>WM9eZ}_GxUjJ)OJGeV4ho7*m~njC|cn976)v z%eckQ{)h!N{9SppPj<)Z1D4$vz8Rf!M@Kbj_zI8a%jet5-GR~Yx|TsNN@x}G0YzEF zTe^unB}(qYYYoNwJFXe>!}hAK_Kfh!x2Lf!p=Td;hqJNsD{xyf4{D7HF&biT5ilIe zVk_#oDI>da{HW1W)zIeK;2Mcaxq!f(zh=21R&owwJ2d&3^$l*_w#knWQJ-Mt=2HQ^ zuUd|jpBvNbOpf|?M@(rN-EnNwk3n+K3=2I{_pPasbYJbx7;cUY?X+dbVO~B?&yGG| z^TyS9i_WU+!U^BTP*Oo^!=O#-bGe2_$X-ell%gm?(>(1%?R3g{n~^!GX;y0_?}Bh$ zk$se!lHkc3U0%Ajy2|1ZiqOzju0;jum_6MSYT9Z&fNi8h0M+DTr2^mT2*3-TU9d4A zGe3L1yI82YFoGj)Tyj062jcShW_?f}iTJt`yY}MAbzb8b$J!~nt{y;OlPX&DUdT(e z{Tvqos^=bvZ{8Wn17nGmyctE!FmVuR6rseinGKgr9@}W~xxen&7%@oGMQvn@Up8#z z!4X?60j3S)c^IX2u4fgxHn5#z?vMQvy_D*N8u+Kw|KmGU*`p0z4Sxpoe9KtMpCC7s z#j(4%4zM2>^{(g4>f+h>;L-ZE`LZC-R!6#>{aXp@;XhU+plS{Q9T`<1mUd3uzVVwhNS-xz8Bup{!)48KGRjp)B&5Qufm^ z6_?F!oS|){q?i)fmk|qt8bAGOdtRZ9@v9(~*8651UsDrzk32JDdY#6<$&{g5<-e&t zyl$@W@q``sbsvhI#Fx?Eh9V$rH9?ABgTsm;&%sl)?!iz)QVuEcI}1bb7A{5>KGr$yEM_WT_F z;&d4LJdxMMCW|KB@pQzp&6Vx?3+}6K&C1cwD8D6Tjr`fazIXE3{-v|CqMBD&5ke$Z zE*ZnF*OO5eI?w%vbCY@$TE!&z>fpZWx9G)`0+rZJhK?rD59iA0QylUl9r$G1cI-FH z3M=Dr`<*WtX)77MP&rN`ntwdUX9NLwvwAC6ZtVIpS~=B;JY{MdY=hW*N}lWq6&94t zQU)&9&0@R7>R#WkUZxD?NF^YRao)Ir&?5h&uYvHro1UwEm3WuW_~`jWjuw8_XQ~0u zy>W5}*GZV^X(EWcua5nVzjl-!Nx+t7MTY$;ck39OiDc^cpzGf;m?3)d(>~k9?&YGv zYhD)XD#0Wub>!%pu%R8Fg`=K|uBqJ%i%4_QL^OoK}|85!{BV)h1*7og1y^z=lLrEB?BsX89{I zgY&>v*E*H`8zGt$9Yb+>g=2#>&jHe@>=rJA0(|M=m%Y_a8*-c5yOx+|+%;)Wc+4Q} z5!a|@Drx+7fk%p$`=g?bF7^%!g6l`QB-J0PORg7+C^(4zy40`4QS4OK!MGLYF)-lv z{1Ew58573my@ero^-dsG!S) z#%zZNUf|X6pkGbye4JSmzAqf@5 zyD4ktA5?!;qIKe$TsOS>(*Vv4*Nz}eK~>fc9sTUb_O`A)1GgTqSf zBP;M>D}p+9z$d!1--h=SKQEewra8qEiAleHGos)yNHDW1xrGVf>?QYCotQY^j+CbF z`=$&BuFtxavEjjMIj^~TN5}F@Uv;@lUtSI;Yz&bV&iO>ol&3Va{xV43qBLDWZx5iMqSZ`ESfozEI63p>Q zA#_*!w;8l&c3aRS&u4`didwMgK&_{(O*2jk&b8dNRueF98|Ceoe_ULq?=U_JA}r=* zof?*nieq$)3qbRaS*Trwgo3_F3^9dHEm@D3hR~w0)nvYFK+Bl@rfXwA=mkqxdJh#I zGy95mB+f|gu4BM1K3S2t$YgDj3&aq6SUIg7x)0!fLgvT_mQd0;*;W|_jeQCjIPvlo zvL@_b|EV;{;^t$wh$i-f{r=TC(9=a;08T_VRITc|>2(tBmrT_B7((5ntF6;ew9;2a z&fc^(eA9#`$sj~Vtgts*(K2)(^0j&_&`& zOi3;{`Y9LmqkVVougf0(o_>#3UD}0sja4d!F<7lhZRR*t0eaz;laZ6S(}dUS!eh&+ zR!KE|xpzV0wpdxUQ=DF$yfyZA)y=pO!wvkp*WSgx=^MGS=jGuO&Whv3@T?X$t9juq zQXztkerKlBISXTy{DzI6sJbb1IdCCHhTg>q=}TJhPXdUcr4C~aqvfLwyP#=N{?W)z zkh#={&9+WkiLSTAzFY-KJm)TdDA@12rd(Lm+o{x@(j7SIC=;VN{}AK396z5<&6Edn zvc(9EP6D+XLOY+pU|P^M6?P@Pf8v2X8TumIH2%oJ-l}pC|Liz8JahJy7iY=Plkxh- z?tbQQC4x6r6W17(Y~lt~t(nVx9TpZ)ki~bAIz6SOSO08BSj^wu2v};J&4h(s$KEHi z1^zLI1md}Yaq*NWja@HuxXxDT-jv&NdVhFr0-4D>804<>hEPS<2=dN^w z^ja=tU^7Os|F)~PprkBJ-}-K*8gwZhlyV|yAQp_1i5N;3zr(hLSI9Y*SBq7;Nn^Wy zc2U?~Uj0tI>@b)7;AEc0@zKSK)DVrfuJ#=_5flgS2e%PtSZ0vf_ej zsp6g_fdMPffcb)O_sx2eztLZUW&> zq#sJdo`ANTp3+y~Y`6nVD(Uc;DXYGH<$*WX-pyX>dnJZKtV#6^tUGGTcrOBX!niV9 z+x1-_6!Yu^LwEg#vY=D;-Wi?Eg0-Pg`#h+?t}dC}OIG58)~6QdwI-D8eM5jwE9dDr zJm%7sWGe18fYXkLI1uPRj?W_jJz9B&K>XBjyUZl^5a^w z3%z(4@_=I{dfP#UxO%TqDl-^p?bn!3Vq5QGd%bF!Ux7U^`&*9 zYfc^XRhjf1f$XTzA6K@6)oC5kXa?2&G}bJ2P~vpRmSmC@=t^9t(mmH*H6E1kChMQr zBJ;_!KRu125>y-KbtTiN<5vKG7 zAuM`*L@o06)s@D_*MGl85(%umnK@V*H%ij1VxC$J{|Ek&+|X$3n4~8&5~l>WOC=cn z8-#Nsd_Y9O@5>79OEgF_(LA0R;A6RdR_%ASSF=j{s7rfKz_RS$s5dWM;ET|mKiLIo zKYYbgq9T23oNX+jZ%S{(9|vqx%ocpg2cub$(8x-H^g*J#?$vp{B>k*yWxpEi^oi+q zKDu@8y~PPi!c4~7Hc$4?9Gg8^EN=VrB|zB9IZ^05lWRQ(Zc!1baoFv#4l!uxBBK3Y zEdYfCmV|+kYe-6dX&*9r+NycLtj6-k8gBKa{ zV~mF#Oq;9MA`~##{Ne4bye^qQQSDRUS3OlZ@*v?Z%L$^nyy!zyUC-U$-;@^)m;il` zhtr1rC4PzCoFQNNC1yTNFd!5v;XpZM5Dc>;0UVT0r*g>BJ(V!}B=W6KP5&I{F41OM zo50|X%GDDUe(X2zleSmhZ>wHvH^*CUomdW4X9cN)eA4`%lDrJE=T^A}mN*UxgEMA} zE!foGmj22a{15!ir9ebk;%C%szOBqPD>A@InJdD5A1<5CEu2l&rJa44r=I!qXTM>K z7l~T^M~m(l35&x_wt?9TBi(w3IH4n0yR7uPyu_$iCKxl$-i=j|t$5F!r<8@@54oEQ ziT6R{_zq^#5X6L>837Zsd&H&JE;azRGd`FIK9Rt>Cira1E(!`7JP4vAOT&bwn;;ZV zta|1YE%$%0jyLGA0hWdo2hxl<6ynv77*yD?4n>^t+X%c*Y|K;n%V;@x?i>9s0rdus zfc+m)M;?1Gm{nS?Fns(Qm#)(?L`{CrjuTJ>2utr$wORu(wy}1kKWQu!f$FCM|6r)K zpKS|%eTkQt+<)g4W1+NK$L*z3Y89^(1l0CiL4OmHvE*odn0X(>?vDZjKX<*nj@?&j zfe6Eo_2I_@(p!y5>{h6VcBMU2;l>KHneU1^bqwlx&?d05xGI#5*K(l#Y|}N;>Mk{VpFdIkVnC z9lAfKfMKJ)^ik{OlR8_?@dYm>f^ifQS!TQ6CBYah{J8h|%8YOmGY3Dn8j*7QCr{y% zCg8!_s7fVi8k1MsSh{+mB>NTicXbf!vx4o7x_Ovn?>nZr7NIGRjP1E&H*w72_c=tg z8ivhBg7D+0k#%Lsr1LPNhE`Wk%SBuOtf#en-_itro2bLl?HBZbqv?Cx3-Y4KITFUP z$FzUi`ze+E9%Gwco%{hXZN)*- zt4{lJ=ROEWduWJAmfa5Sgd2Hi-%(NA<_6W_pNerIqF3^07ZQGxA2RHFzTXd5`Zjlf zqakV_5CY~$@5Nb_4F2L*@qKJ=)$z5M>!?17N=LA~RF$(TEnmqWU?ZZ;b8 zxIv@3dAcM3tRacZ&h)o2YOf4IV8W;G-qNeQKt0Vh6PYV3e#Rj!AMWo%bvq&F^XxX1 zDkl>C$BCWMG%m$LXXLvB*4~}!vH54M>42rN^Q$8CjmFodczWrTg4J<+oOC|6vS70F znf5x#tCv&BekY4M=*Rh^zSJz!K--@0`T!R)9{W2l#Hez_;4xp?u9)@L>F&#n7GB91 zXIhq}xLC`et*`zI#Ao*CTAOocSLpo$x#u2Hp6d~pM@Le3E{+{D&lA)3D_!6DVBf=c z1RVHfqHP>O^`*8`dB;cnYZ@EYP73ZmqZQ4^$dOhKaM5)E5phOlY@=l!G!gFAMiOAXrrK&dvw}*l zn0A5r{p*7io2=nYC>|o?8||%oo8D(xWdpUPu~!zG=&rU_tGK=9RgXj1B3#i6Zw}Ie zr+%;T+##WFH-*>UVuDI`U6J)0&B_}usU`lL`-%HaBa$5I*m(yk+4h|wIrildcm_Yo zx>`5tIX3kyU7xtcc^J6bVb(sDrSs1k_7k1Pg@4LQnm4$T`}HfF-3C$_>@;N4XM&vmiR0r*iQM zWFqFyGX6+LeN@`&dm0erW*v@^2x@pAvwYp_7)i{JbuVh2fX80Z)JU!DKKEQXO`N7T zw+pmTrazArWq6$?&?Zc=f2A>4UkEi%wP5Fu0sC5mHw#_X2RAZt*n8YW<_vey@8e^U1C*eAX!2%2sk1ieg|$ zakb3`FMTqiWt)R(BV}~TKO8Me0Gr<6zc-|Tq#&Zbe%Y)OIj%y8y?y1;&V1?uA$qYYU z&FOcKGNR2FRnW&_hZa&_Q2Uv+r(yuL0`0|a8qD_VZELsF=8c+qHYnCJkJ7wFd?Ypr z{K!FIHrGoae}nz_EOPIj-Q0YkilbE*$bR2)OP3?>RA0g+<~v$_E|WA}eUFNqismg_ zE>m%@vu+y(rf`}i%sMe-V#;JN`FL8+2Ne1x6WE!hK7byO)wE7yrwZBO9Bw2ZkOxhd z9sh9MsFW;9z}9na2BySo&gLyK8a$D-$n4}u`MkFk^ermn&b>^hUVNJJ!|prKZly?y zJYA|W9yW-n_J=4bL`fyJ`#brg?(rvh8nl2e$_Z5I10535;mlUqJL?=S`Rr< zPb8smbV%sgDeyA!L8_8=sVSI9c# z&pWrB(Gex3lf|zj{K7V~-oPRx**M-tmXzf$I%Cz&R&OSJa4+XLkePOWZGP#guzDul zw7L?D{lVnarL&d(ULAyzkoUUFBKN z`y(%%R&JQ_HG3P%f^0UET{+#%t!Qg9;6u)hC`~H^246j9p-$L6?7T|%H@H3mCCL)^ zkRY`eVTrVg+PA&#gHFq0P%w2ppLpWku{}y=3kBFSn}a3A6Qh%bUubHa*UW^SCM~Ee zR!N&Jv$37THfZ}ARKW zg5JKO^Tz0nu48=E()?j;%k!G=_IXip%Ua)#4wR&`J%R#RM4`;nR-tCg4j%Lf0b9kj z$9s;#fJ;Y%MUYYtY>Q=&rAJRvtJx2`jlx0wGb)cEmjZq zUl=N3>|%U&TK7LL6;s4V*tqFRCy#RVu+`)=Z!lLlN&ourS9!}?x56UbsY1CmYdF2N z7#rl$WJZ9IBsB8)Q%B9RH0C)6%)y5zjiDBka_r=ObVO9} zBNqJbLQX#EJrI2EI*juUW1=e{o=ID8*^cE$c5L|M1Lzd9mh(|s9OUGh*HQbycApue zb0=^HQqqBKZe$7UcDeWD^uf_sQW(LGD&kyc22j;dQbUV_X1*4=L}YK$-<_Mg8- z+ zVsradr}E`4s`|Q<_!8t<6x}5BONMr0-H@&4z9Sv5<45vBhtE#9gTg_xivhvxaNFIe zfYI7?Rk-T|N=Na{UE@)c*8IxE$hOzy`}wefO199p4>yA(mzxrMg?mSiBomzXmx~v+ zH?aDRJxqjrtOsg$2vVm5j|Ocv5LSubTD>4AzeiAu4k?Y50cpEMr$|h{aCBkfD0~4t z)>Fz=-UmkWPXYr95=U8lBCOW5uk5TEg=l;tGCZ2B{qIi#uOsEu2qu9bE!I^NU=%aZFMLG%zr}W%J)*Kldt(RgFRr({^ATa~>rxCqE==p9$|d~cFCC4{;u&}#DdV`_>2Mp%PK!1UmB z=JFT;WOa-(Bj_R|=J)GoUYKs^8|`n9CI{X6-8tW+t1A!BQm_4BU=vHXB~hR~#6T-S z7SI$(<&B}*JkmQ{o_s!ZLwA;~Df`sF>Ch+*!}P%))katM`e_f=mB$=yAE+JY?XOKE zl!Ym=GTW1;XBH;4jk#U~M%qf;D*NYgQKSL0*4ouVNU6-J{_ zO~12&vf@LzidWhD zc^P|3SP9-D_*KwOfX=jj6HUo|T>TQDd94eXb!X^yJ?jH)&K8A!&wbkNZd58x;bKev zs6G)_4SQPdI9Zea&U4vm14I`k4@P|i7|Xi4iWgh*#+IPfruGb}~f1WNXLkCA6O&(5>dp+-hmn?DYUFE-Fu z{-{pXxOP2np8Kjb)NsvGQC6s6GL^2J2h|L-ucr@RB6v*p6ed9G@Ou|3TI4N>{gs|U zDv<|m*XxUM9lT!t?DYiA9Q%0raSE1TNm+MhM~B2I<)btK&rfPmsmG#M9`OAob5Xl5 z&JXe@0TONuJN>3xThEuyzh9YWsxI0tGn%{`dt#CmqI4ZCFHm)bt__)s>cD^BMnJ&| zNWRXoZH-J8bsTJFFwrtQuw*{&r0nZ3O}wg%wxqw^x1>J+4M9b^IjHX}HTh}i;2dvN zq;U^SydBgD#}1ob+g--v9hOHeTm%N@$$N_y55Dwyy1f-JRGpw-Q{(jEel#U4N&u0| zCi~su6oy^NQYOZ^jfSwdr;m;ow3JyL_d*cZ2B*bz$Ncek_QDy;2D$L1r=-iX`gh$> zv-Dq`9ijds+Epl&tZ!>9xHXW#LfZECyNcSNDqTHh1!c>;E5&93>`$l?;nnPvy}TbB z=HhmUlQ>2We=+Avexm#BZ9w{SutQV)IGtSVsA7F^P6qL8?TT7DXaK9|(Jndg@z?4& z+2-#ri&u0)KP!X%KP)=99p$K3R4Gmgu`<)ARmL>>I~3AqkWIW#yz4a)CAtWvOe z*YGn$Q28k>$}C{*^EHmVw2igBt4XVuYka}1NX0p#tC(4?GLf3md#^`Xlm^n~dGx-I`oTG~g-&t>qaL$Vbwy!IcI~%A#Zyfu z1#+YM4~?P;fziR*J#&(`+{;Vb>GBA6x+4^ehzm-Z=`7S5>x-PeB0l8MGP!%dFK#}n z#m+I>mK%SdSI%s6-;^Cikvu8^>)>$AQD%?^0c6n`u2fGW?iH(C1Hri&W#`rmcOS7tS zJv>CpVWv;Ix>YMts|dyK*Eic-m=TDx1w!$?->FW_eF;x`^|I;iEAJ4ZFel8Vwk7*0 zA$8g3rf8j-Pn}O_Nzz*F5Hli?K4w0M-m*b7`d;&Mf}i_r!(IaI;`;u&=}(xR(b%8W z_0l5J@~5Y?iF$=KMN=}CTA!T7Vnv%_gI%s(Fxv7jv@(pIqu)BIndJkvJI>7#OL7!o zT6hGK!8LE?S@=-DdUg!ln{U>WizZ6^qpy$}t?NMUmio3jjG+1Acd^6(c+aa(4|#rd z9!@!zHk?$V)zXzIErVHllk)(Uxfz(V@pXOZapOXjI6B^Sh82;c6)kU)WWIHj^vT@! zSl@iY-H4qp+a{i7zKR|dgLasI6})gv{xrR2C9O3Ft5BrV`EVH+a_+EzY4%Wlv|JX^ z+~^)j7K>$1Kw3bbh*&;`I!->88GL1!O|EK!d`WFwoOIq-GG9YeA9h7wF*FhsvDb_u zN8(Th+&?}bNN2}yJG+n$#?eTRoqp(I(*o@XGb3cD_?det_?SK(jx1T7F<2a*f5#^$ z)XjRh`FlH}wNZ7|&qS*J(-U9hUf0qO;PfxpUNSr6*}dyiQ1{+oQTj&!gh?3U*TNdt z49?4lTWWMS*qE7kpvW_n-p+cqF`_x+_spoQ=_4&|unE2Fu@7rfPOf#ULFEL(fS=g?xQwfpl zlJ~6-JZnDP?GmllfAxu%`s|a3STR2XvzdjO`iG=s^BJnyu#3kQlv#=ZOt;ugo<%li za4V$5X+>=yrZ|a&ujUc2EBUO2+s7|29bxMhlFI>Qh?N(tDJVTj=Sqjkqw@qvlYu<_ z>y1H@%Da#E9cfC=yGfdb!sieCwUqGDhn3E7RHK#>(VvO}rV0mH6Sf@jx0CtycFDYx z$fu;wOxoaao!+^HhU^_n5bf)A?1#WFJZrTArn%(ZqSH*`4mPXqO*>7-sc+3pmrtz+ z8g*q_B7FxCi1iQz5!i5q(6(QTMn_m`DThp#g&V6T?3D0)p1oBPG zb<}n>2uU$)t8O{tx3{lI4l<$c8x#pnONwD@(>zVXxyIbVSruRT=7X5xig+pt-qdnW zSGMw}T-i5RJej$_mA(S8tW%;P{}HE49M5+~npYa-OzJ5STrUAjKoibP8( zH=U0sWYzLdyt?u{Q8m?Dc?A*LR&-edgq+JaAbPS22GiBkDf`^N!OKG*p-<>GMZrdk z4)NlykgX#+2FL&j&qZOyInPFt$2hzC)Ll|)GE(=gSjc@#(87a3t>DP44WpDS8tR0D z=jLXQgK=mQ2KOb7y{0~o%(4ifY+_4HP?!6Rq%yC>U8D8g5%#wvQ46{yuy&CiCJF^D zu(p1_`f7PGnrzn*CdE0dY|$G=M3spg*`_n*pGYm#Cf%oe{OzX9Tf;$@aP zXI`rj50+i;m|`F>n%As$I5KExHtOn?nxnZBvLUea3PO!>73z5K^!kx0SXbpc8_6R% zJFsH7^V=1fIdq7rv*pj7^Iqqn?=N|1^>u&tIDK02$ocThaLoDT_V0ii#=M8;A>Wtm zn}A+o6|k0d9_KIorcV}3Ul-f04>rD%2iuO2q5d*j=dfSkW3xq(;s!omUJ^I|lKb_{ z-NQxs?lFQBk)}k^UAY@$o2rRP>DZ;RGmLyY_*Lck5L_t87A~0NdO{7n3ng6A*e|}g z%7r|@@y(yaU$om7CTBgkH(TqGRQK}tnFsvhUPd2EqZ&IOP8|F)U3(QHB=Xr+9{IYN zmIS)^97;^i5+kQVVPTA1VfJ7&pnDgUJ2o&N<+)x-uC$xMb}pc@*5&3<@(;c_VNfR#J<%<`gpViNB1@j$;&;;{gbY3jyWq7bEBmq8 z=nnp-j_loTyLv$`P^eJ&tZ9LuSTBa^VKZ@R(K#UXxb2m;4fHlSP{>h@AhP$%a$QpD z*6y8C(Text4MwJp8%=Sj-QQ@lMHl$$J>vRmCnJj9p}K5tKH|NUeVh7u@5mjnCqDOx z$e{6F=%!7fd68FK{^O1!t@`LHt>nRF5gD6(HZ$*txX;PRip5zeB5w1jjv~EpcJNu8 zJ*{cY$=*s6*u2)If1f|9;u;YX@rsHPHPumb1lv0f2T$4TI$IOmm}Xj+|2TvPFID48 zq9J9C)Gj>__7iRsuKzyfp7~a^J)JVonr|o8!nd5DcQ#d4DXmjD`x!b481ka#Eyfph z&!G`=kfg-Gmt%(&{*mnp`vZ~EulUt2<(#*^zJKEEckclFUDEfS_j^kMN}^kR52l^c zL;Ka{J|t7TvJ1|B8_O7AQBs&~DH=SjjPRA)|6$K|u1=6uF-1H5gX-Z6$oqCS*g`P< z=GP9N#~4d@8t&HL>-a3p-xf+n;{3hrH*L|bHg1UCI<^QfhMwWK#l;q`~ z?GT2bM>pT0i~*!*;X9`wxZR|4$A+($J*0f2^fF|ih{nZ+tEAC7x4vZMrmYZhw zGHF?6wFOFT(_aKN6S;~mkC1z%PYtSS$AZ15@uY-M@ROUCsPbBi=aIO9c!`Vxf-eeh zp$yRRMw&|C-R1ns?H&iH;JZ=qUofrphgQ1IFN*MiKnfz;dTSwM5`pwMx=mTBi|%H! zdpswf%G!r=2>R+)z^20_@U@apctJYM@t8cIc8)MH$=huAD&o0}toPmG5d= zyO=~l@o9ISX(2UacRp-f#&1mOF=rZyYAGU6!1XBvRZ$GuDIIc)) zk-cJV4a>_Pw0a{H($(gs|5U@Qwe8Mz3DZnaqGd%iPhbNI=#X^{+}}K-ce@9)%zbDz z7v>i-b0b7XVSY2{@W!?%3x!52oG3&pBWU%jF0csHAy71U__89JxAdJEuvjElylk8&rFX5{lsf82 zk$+#p_#SaSICbrL$)@J&_x0u;Bwfa4Zj1d)*2M1G7R&x4f}F2&EXi`Ohs1PEl$7>L z^6o;v2sv(Ak75TQ!VOMX>irLBvyr7z0~IwzCe8UMZ%MW9x_2 z(Q(wmG`W2t&XfmzsqiA(OG{ycI-NX`MB7LGVS@_|Qz2k-2in{=j5O%+dUcS5zGF*pI8RXagM0E0r9M$t zZcmIeqpzjA#ePvs(Msu8MZS0aYSM8~bO*$UwY)5yuN$(K_t4$8aPy?;6KW%`+Tb>K zHssLJKYzfLOLU=UfMo=SgxLRL*}?yQ;T2OfGe59jhn%ZH75GxG%UaG|diK1Sp<4fb zm9|(NGB_ZS{&^KkFsO@2fgf(li|=AdCNziR5?Cyv-gfF;eh0l^j>ftXd zqLVKSz8u;`^mKSWd|+)(;@;Klwa?~)tvqjkP*73iF@O~pGB_L3@`9a8MONhD6&@8k z?0rYq0Nap^=e@sH=B-Fa=ACEmY*Q~p_G<=Vz+=ll3eWu;dZ1`O5t-vEaUh=5nFCANi zEnk>cOn)||;E;=@qQv=$BDh3jMwa(S1&dtQL52R*t^&id-m1BXHf3T@3tQ@>3mFcaMPXCSe5tN4kxY3hVx{g*JQ{nzc`p> z`yPm&1*Sm63LwIVUy)~fiL#8h6GHSye||nxSg_^t_9xyvl0UqDv46-oREi3=&CdL= zvu$LtSjz3)Xk|5*g}HWr_QokMBLdwx7VPjsXUV1KpyB8Fu)fGni(KtQYm00o%O zv$0edjhu(Yom8k7BHkhC1+whR#XMV|JSZDKJTR5@qVJG!FMO>%Tkt?azjo!dPN87A zfQ=nR%^9-)geBxh^}x&Qw|ox3V$uEaLxSFuy_}zFftWwg^F{K2k4 zUO9@OX|hl4xYhZH#`Sm4*~_Ch>w(yjMHdBTmrI1CUm}*qd&iEl7;EX-A_T`QLl~US z&TC*1sbf_nQRgG)?S1|AGg%K;xlP@cYQ7BTIh}b9o3(R8^wWjTm#%~CrEB@@tu|0y z=?XGRuH4Nhor25E4iY0H{k83aSd91akrneo3Gnw!>Xq}!DHreTvI3=kml2Ca)7yzv z>}ju6(nui7(33tosLIp_4PK|ykJ7zuKK(I8n3EuAbBI%+)Rpc!|4k&>Lt`STw6S3@ zmqLgPhVk@)HtC|{R#(4|M)$(Y#x`DZt%}pkxavF$tt8h^623&C; zUf&7yIZm!~h-dvOT>5#GU%z<4+XNctptxoE)*o7qnmaFc*V`N(&B;0-Pw<~XEuwY2 zy0@gCb8VG3gH+XL@ITFliPa@C;|m!y&DlR4PO#cp!x;eT+J%&X1}&F($5W_fg5tdW zrofWA46$m6kn=tu~! zlttxRX7?{u3%VKH_u*A>!Xk?Dy5EST(=R+chx=h>()QO;@Rnmq`UICFNYqtYS1>8+ zkcVqq^DLJrazArt$&Of3IWI!cm4lEH zpsarp+0$HS;CM2=D4yz0ji?-T+Mz$nhrHq}xX1`;ov5mbNEQ4sm!~}8wP?f_CJ40b)iFRkq$FX6NF`2Nj$nx^A^NP`Z=hyFi{Y$i`N2g0o&BR-<=Wdzh%3S@e zTQcybS}`2D_=NXB2gP`F>#8?{*FGMJ>~9d6a^)1trKD-AU6G3l+|O=tZok$WQesf2 z#-%Y7OPZsxtz*ypprFk9$D5#&cba8X;^da^!i`c|!u-#n2EBfEw;+qN9Eb?G-s>EpW87V2z#^hARw5WvmOd1Y$5x)mkf_ zUEhgElxJUpx8(5*?uE#0*M#-TlRIJSGz7(uS7kD^<8ZoFX8&VLtnM2cMN2Qw^pxq^^kG4tBry+JefR!D*4_BDGPc!qII>eE zCyU?g*D)@CI@Tv?abfGvL&S#~4mk>+Eg;I&c``RYttD7R^csHV^Ho|k_J3dyn&p z(Dfq0G-tiAW9C3#C3x=e739rlu ziBs%66A-tIgw(UU_2uV&!|L43O`s7}JX_+qxmJ%>_1HcV2{M~RCJPDk%UaT>s*IKt zJ``$u>{jf(Me~XR@h1856ZIL9R~VbVg$8U?SW6fR#p?RF_{+g(JrHX^O+^+{PQn zI)SmPw0a^S*b1Tc;|k$Q$Mc2kh4A-gym;K3pJq$J-}F~Ej>^qH zaOyDZjg#}gJB01R-iRC8K6#rO!&yf4_oe?l(q-ak6JjsVPEr4P_0PFs6pkfAP+eGE z<9{Q^|6!j9jn%~wG-w201o-~d1oBr;+28l>s{i3yC`d2(_}_5e8oW5cINO61oBvG` zk#Yxz^Bx=>G<^Ku5apbI5asWe+j1@c4Gtfm!Xe7-HtkC9{x`wj<2W|H{&2H`|2KrZ z-5-(&u@kYUwEq^o^aoCG<6p2!hd&)V|M?9S`$I>;7{JIM`)|RYzWjrlKboJe{Wrgb z3L}pAqK>SUj85amlDU|N9$I`0T3RcIZFqmcL2( znO@AWRvKyX{&CNObN*MJe{{+a2>9hH>h$PB7UsM-soqrot!yXw-}?3+)AT>L{y!W6 zx*V0%SPB)mFuq;9ih7CzgBovFgP5@t-Cq<4e{{^ck30W&Um^xHxc=msV0w{p!F{Gs zJinDMR8pJ^Z>p#|h3QvTR3h#G?qBf@tOnbrU;+vGW05o|Sn}@<92Re>ULK09&2eQX zRMD#bmucwHc=P-Gb;Ki3lzCN7_`&V2eUOFw6B;1=&ZK4o^Qu!sJ}Mrt?Vs)@bmqX> z5+y>?*}po|2v`M0JO;fq%`9GBFpYpxN%?PGGH<9WHqxj`bx{H+IS71AIaiT&; z=?f^jEFy$~AQjVa!w=L&*N;D}kG1~!Kl!>e=X{S~KS}o^QcmplS?KiS#tCAyIJ1*R zG#gCA+M)&xVknUlQVMHhrkOBmBp9{z8^Oq^imm!e}YQf8_P|H8m3QCFY@Urow z=FFkMatPq9g<)LJDpt=FmL-K2N8dFhye)i;-0Rt82Q+h?uZyu9uq3P;0^(W_5A3?P zUZkJ>@SqL5f1i2UF}n!~dUN#}P;~F{HhYm7+oH~&XAMZ;mAqSs4$M<=jC-BW77>_% zv<$thN7~Td9K*k95!Z36vjSxE#a5m)6i)LC!84}$kFlTEX`N z4E9y4%M6XQb+~0%SEV9qf!SeQ6|?`M`t zR|6wHwE6Kb(@?o;=6{@}zjL=G(QZIx(s8Fb(bw*f7z&(v%A zk*_)t{zFj%NRhxhJHMh2RZM*bEH$`N3-SYWp#cV*BE!p6M%q%;B32+RTOTU5r)JR+ z%6C}8MeswXmEWj1MvdMG4e%zzkBnN222dVzM0$Vsp8}SK!_|M_KqKnbfre0+Cd&@R zA~1XO$6mb1t<@V5{Q&(}3QsDJl_WOrNj%L2vqvleTF}n`i~!nRt0;GR{Nugo6?(8@ zYp-cu168O$3)GPLj&4TcX^A;iM#ZhLzgsFhwBE2=@KZ4b(X<9JCF_HFDc~2DfzaN= z*TT6-1}P2^%7MFt^!v+FnhJDWl|`IBVl48y=!Uq1`j8hoHy1nPm00H20oMwRSRwq> zh~)ID?sIgDAiAuj>|29@da~NOs>Yy9TBo)os^Rv`{VF)yGbQ%ag1?`wLii{Qryc4+ znYGm?l!V)5IYEaDSH$=TM_Tfuv$BaW>NMerme7tJ=qet!fTgL0fKRzrx?E1chk#nK zt^mRfep8Wyc9q};`0+k1<* zE}-X$mY27`kLCNURfMnub*L=lTthVSMIU)=68jvLvi$ZE2`%m0^Ol2&dlJpJuT!iSPiloCOf_weTSrT-9W#R)qmMKn5)WNdlWs z;GvVw+e2I}UP5_0?gIa~ilIrID<_8u4S1)UDiI~X(&%2!(Hkd&aG89MnaQ~9P{9eK zX-v=hj&$Jc!44UJx)N@jvX$M^;?SrqjU^PTc%mlrsW-o4G$Avv6am$E73z+UzeCu} zaiV1%4CgrKd6hra)2~1{WpJq$1eZ%7y(aQhl0UXTCs^f(m;k=!Xo+yYuck$~;X#W} zKB)oFznq3@{^B4^OYgmRd?}6Jlb1L|p*F*6!-hYx8;xTD`!je40GV}lg482feC0Hg zqZ_zLv{I7=Zq=2ngke8pgj32yvRkn&_YmaCbf3BkR|wYYBGrV2uhoPT6*E$tfL%vB zdp#Ek^XV-O7uBb6PpI5Dj&2fe^nFuB&^|WqMF>y);vA()|Hm&@(FVHzp4n1|*>$_t zcIY*)WFVDKdzLqP#vY!;4WdOKjNfP>>y#vG-_Y3{C)WcaPu@$8afV%v4;qY9p_Uxw2(4a8Q7+-ZQuM-4gdX+-)3p3 z?EW*Em_t& z1Ovh$XbRAwMpF=Y1U#^p3vGASwMXpg1G<4D>NB?;H!WLc=8f8y!#hBI^1vD@Q!8r( zSTaJS;7}7IRUd;TpBO$&z9a9{(qP}_HH1(Gp#5!R>+7h(>S-kGeHkO%Pt_ zBP5op)cLM6U6DkB3rgz)JBa>#40|h-efeb#6 z8SiT2=iNCD;i@V)al+Y{II7DoP2+)6z~nrcFsI?Gz&%&=t?L4iJVSxBnq@>B@KTeG zVj_wE;V|U!v_7RpBW>SF;Ky9cs8RaR07c#Be(!0z^?$FGCMT05w6YMMMNFZ2K6&^T z(ci|bT{YXBMv}g>zTE{NWIB3@BzuY~rcH|^U+90C3>N!Y%QL8se83@y3w0*CioV;u zy+Oy8pu{g+B4cs55g>13(`>+DhzUgIfU~Ng6+3i_)G-tX6=w0fPymFJOw*baDG$)! zfXMG!M8MYj+GX+0^(}YG!2GLGv6s{luK|@@gyuNDNtXMBd0dyop#x=jUAut)Jq+ad z&Ma5y;KC5=o$XYJbvqY6bD(32Ao?cKD1d1#?1+!^)#W(Vr^IDrSyINAoQ5SeW74|C z97hEE4+g6U-qIKQx%H-heI|v(4IrdZ!?9? z+$8)cmtM7J5TwnoJD>%Skl9D#eP^s$U>{U|cMQCYT_IzcR(F3y(ER?F_NRb9D#SyG z#?Db)fH{8Msp?7fJ`;8SVDgg1oDmaD(%n}{xJoA zS;MyvSMnbAK{1D+ClbFmKX!YOvPVG3e}gH(fUAlwo!$M>eZqQs`#TD6 z4#@B!0y#t9DUPi-_ce?f8yjxUpzo(Wm$7#eg@=T8cq%7WF@_3>wM_Nv%-A&T33bi; zo~>COCuSla2=DO<@7e5U5S3e1bmJXgPO+B-{S$@bY>_@YCHXHjCCd|Rl}EZJpO4@v zh*vG6ivIl;F@%q1ow-FyASdpEJSI;qeLft%VrU=fngX)y`a{DvJy7;u#*LJRmKihL2mVQ3*|f_>Y=<6NNb~ey{L}SttI0oaQ(u zpM=++T55zf8f&}Dsd_X^;OtOyt7{H#~lsuby zX{gFpOvjQgI9GA79Cm<ntYQ%)? zHnH6elih6O%1nN$WpQs;d=%_kmS5rEM6AG;5+^+JTnB$AVy;%(UgA7H0uLda*XI>U zGMEMe$C#S*PDuKOiFMk}q7*LUE&ujR0kHW!lx5%g z143BHl|ehXKSL5P1c4uZbN0ur>+^o~NQ(M2aoqXM<`h3YYJERqh0MqkjYN=5PPdRg zx0zp>r*x(M^`6oD#tpnr+7DEpE=I%OZ3ht@k5~UYO@w>5X*sAQ3`w&mLjfro_^-+D z#kpzNQU7O_33ncP>mkRcrYRn9^Zoyk8M2P%?hvid4oBEVILiN`*)I4ekor5E`k&+f e!w4wgV=1)@&&{q)P7H70KH3`k4=U8G!~Pev8KB_+ literal 0 HcmV?d00001 diff --git a/docs/source/index.rst b/docs/source/index.rst index 2cd95e7f129..93184f40be1 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -5,12 +5,12 @@ Welcome to cugraph's documentation! :maxdepth: 4 :caption: Contents: + cugraph_intro.md api.rst dask-cugraph.rst - cugraph_intro.rst cugraph_blogs.rst cugraph_ref.rst - + nx_transition.rst Indices and tables ================== diff --git a/docs/source/nx_transition.rst b/docs/source/nx_transition.rst new file mode 100644 index 00000000000..6c57ab89c19 --- /dev/null +++ b/docs/source/nx_transition.rst @@ -0,0 +1,198 @@ +************************************** +NetworkX Compatibility and Transition +************************************** + +*Note: this is a work in progress and will be updatred and changed as we better flesh out +compatibility issues* + +One of the goals of RAPIDS cuGraph is to mimic the NetworkX API to simplify +the transition to accelerated GPU data science. However, graph analysis, +also called network science, like most other data science workflow, is more +than just running an algorithm. Graph data requires cleaning and prep (ETL) +and then the construction of a graph object; that is all before the execution +of a graph algorithm. RAPIDS and cuGraph allow a portion or the complete +analytic workflow to be accelerated. To achieve the maximum amount of +acceleration, we encourage fully replacing existing code with cuGraph. +But sometimes it is easier to replace just a portion. + +Last Update +########### + +Last Update: Oct 14th, 2020 +Release: 0.16 + +Information on `NetworkX `_ + +This transition guide in an expansion of the Medium Blog on `NetworkX Compatibility +`_ + + +Easy Path – Use NetworkX Graph Objects, Accelerated Algorithms +############################################################## + +Rather than updating all of your existing code, simply update the calls to +graph algorithms by replacing the module name. This allows all the complicated +ETL code to be unchanged while still seeing significate performance +improvements. + +In the following example, the cuGraph module is being imported as “cnx”. +While module can be assigned any name can be used, we picked cnx to reduce +the amount of text to be changed. The text highlighted in yellow indicates +changes. + +.. image:: ./images/Nx_Cg_1.png + :width: 600 + +It is that easy. All algorithms in cuGraph support a NetworkX graph object as +input and match the NetworkX API list of arguments. + +Currently, cuGraph accepts both NetworkX Graph and DiGraph objects. We will be +adding support for Bipartite graph and Multigraph over the next few releases. + +| + + +Differences in Algorithms +########################## + +Since cuGraph currently does not support attribute rich graphs, those +algorithms that return simple scores (centrality, clustering, etc.) best match +the NetworkX process. Algorithms that return a subgraph will do so without +any additional attributes on the nodes or edges. + +Algorithms that exactly match +***************************** + ++-------------------------------+------------------------+ +| Algorithm | Differences | ++===============================+========================+ +| Core Number | None | ++-------------------------------+------------------------+ +| HITS | None | ++-------------------------------+------------------------+ +| PageRank | None | ++-------------------------------+------------------------+ +| Personal PageRank | None | ++-------------------------------+------------------------+ +| Strongly Connected Components | None | ++-------------------------------+------------------------+ +| Weakly Connected Components | None | ++-------------------------------+------------------------+ + +| + + + +Algorithms that do not copy over additional attributes +************************************************************************ + ++-------------------------------+-------------------------------------+ +| Algorithm | Differences | ++===============================+=====================================+ +| K-Truss | Does not copy over attributes | ++-------------------------------+-------------------------------------+ +| K-Core | Does not copy over attributes | ++-------------------------------+-------------------------------------+ +| Subgraph Extraction | Does not copy over attributes | ++-------------------------------+-------------------------------------+ + +| + + +Algorithms not in NetworkX +************************** + ++--------------------------------------+----------------------------+ +| Algorithm | Differences | ++======================================+============================+ +| Ensemble Clustering for Graphs (ECG) | Currently not in NetworkX | ++--------------------------------------+----------------------------+ +| Force Atlas 2 | Currently not in NetworkX | ++--------------------------------------+----------------------------+ +| Leiden | Currently not in NetworkX | ++--------------------------------------+----------------------------+ +| Louvain | Currently not in NetworkX | ++--------------------------------------+----------------------------+ +| Overlap coefficient | Currently not in NetworkX | ++--------------------------------------+----------------------------+ +| Spectral Clustering | Currently not in NetworkX | ++--------------------------------------+----------------------------+ + +| + + +Algorithm where not all arguments are supported +*********************************************** + ++----------------------------+-------------------------------------------------+ +| Algorithm | Differences | ++============================+=================================================+ +|Betweenness Centrality | weight is currently not supported – ignored | +| | endpoints is currently not supported – ignored | ++----------------------------+-------------------------------------------------+ +|Edge Betweenness Centrality | weight is currently not supported – ignored | ++----------------------------+-------------------------------------------------+ +| Katz Centrality | beta is currently not supported – ignored | +| | max_iter defaults to 100 versus 1000 | ++----------------------------+-------------------------------------------------+ + +| + +Algorithms where the results are different +****************************************** + + +For example, the NetworkX traversal algorithms typically return a generator +rather than a dictionary. + + ++----------------------------+-------------------------------------------------+ +| Algorithm | Differences | ++============================+=================================================+ +| Triangle Counting | this algorithm simply returns the total number | +| | of triangle and not the number per vertex | +| | (on roadmap to update) | ++----------------------------+-------------------------------------------------+ +| Jaccard coefficient | Currently we only do a 1-hop computation rather | +| | than an all-pairs. Fix is on roadmap | ++----------------------------+-------------------------------------------------+ +| Breadth First Search (BFS) | Returns a Pandas DataFrame with: | +| | [vertex][distance][predecessor] | ++----------------------------+-------------------------------------------------+ +| Single Source | Returns a Pandas DataFrame with: | +| Shortest Path (SSSP) | [vertex][distance][predecessor] | ++----------------------------+-------------------------------------------------+ + +| + +Graph Building +############## + +The biggest difference between NetworkX and cuGraph is with how Graph objects +are built. NetworkX, for the most part, stores graph data in a dictionary. +That structure allows easy insertion of new records. Consider the following +code for building a NetworkX Graph:: + + # Read the node data + df = pd.read_csv( data_file) + + # Construct graph from edge list. + G = nx.DiGraph() + + for row in df.iterrows(): + G.add_edge( + row[1]["1"], row[1]["2"], count=row[1]["3"] + ) + + +The code block is perfectly fine for NetworkX. However, the process of iterating over the dataframe and adding one node at a time is problematic for GPUs and something that we try and avoid. cuGraph stores data in columns (i.e. arrays). Resizing an array requires allocating a new array one element larger, copying the data, and adding the new value. That is not very efficient. + +If your code follows the above model of inserting one element at a time, the we suggest either rewriting that code or using it as is within NetworkX and just accelerating the algorithms with cuGraph. + +Now, if your code bulk loads the data from Pandas, then RAPIDS can accelerate that process by orders of magnitude. + +.. image:: ./images/Nx_Cg_2.png + :width: 600 + +The above cuGraph code will create cuGraph.Graph object and not a NetworkX.Graph object. + diff --git a/docs/source/sphinxext/github_link.py b/docs/source/sphinxext/github_link.py new file mode 100644 index 00000000000..a7a46fdd9df --- /dev/null +++ b/docs/source/sphinxext/github_link.py @@ -0,0 +1,146 @@ +# This contains code with copyright by the scikit-learn project, subject to the +# license in /thirdparty/LICENSES/LICENSE.scikit_learn + +import inspect +import os +import re +import subprocess +import sys +from functools import partial +from operator import attrgetter + +orig = inspect.isfunction + + +# See https://opendreamkit.org/2017/06/09/CythonSphinx/ +def isfunction(obj): + + orig_val = orig(obj) + + new_val = hasattr(type(obj), "__code__") + + if (orig_val != new_val): + return new_val + + return orig_val + + +inspect.isfunction = isfunction + +REVISION_CMD = 'git rev-parse --short HEAD' + +source_regex = re.compile(r"^File: (.*?) \(starting at line ([0-9]*?)\)$", + re.MULTILINE) + + +def _get_git_revision(): + try: + revision = subprocess.check_output(REVISION_CMD.split()).strip() + except (subprocess.CalledProcessError, OSError): + print('Failed to execute git to get revision') + return None + return revision.decode('utf-8') + + +def _linkcode_resolve(domain, info, package, url_fmt, revision): + """Determine a link to online source for a class/method/function + + This is called by sphinx.ext.linkcode + + An example with a long-untouched module that everyone has + >>> _linkcode_resolve('py', {'module': 'tty', + ... 'fullname': 'setraw'}, + ... package='tty', + ... url_fmt='http://hg.python.org/cpython/file/' + ... '{revision}/Lib/{package}/{path}#L{lineno}', + ... revision='xxxx') + 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' + """ + + if revision is None: + return + if domain not in ('py', 'pyx'): + return + if not info.get('module') or not info.get('fullname'): + return + + class_name = info['fullname'].split('.')[0] + module = __import__(info['module'], fromlist=[class_name]) + obj = attrgetter(info['fullname'])(module) + + # Unwrap the object to get the correct source + # file in case that is wrapped by a decorator + obj = inspect.unwrap(obj) + + fn: str = None + lineno: str = None + + try: + fn = inspect.getsourcefile(obj) + except Exception: + fn = None + if not fn: + try: + fn = inspect.getsourcefile(sys.modules[obj.__module__]) + except Exception: + fn = None + + if not fn: + # Possibly Cython code. Search docstring for source + m = source_regex.search(obj.__doc__) + + if (m is not None): + source_file = m.group(1) + lineno = m.group(2) + + # fn is expected to be the absolute path. + fn = os.path.relpath(source_file, start=package) + print("{}:{}".format( + os.path.abspath(os.path.join("..", "python", "cuml", fn)), + lineno)) + else: + return + else: + # Test if we are absolute or not (pyx are relative) + if (not os.path.isabs(fn)): + # Should be relative to docs right now + fn = os.path.abspath(os.path.join("..", "python", fn)) + + # Convert to relative from module root + fn = os.path.relpath(fn, + start=os.path.dirname( + __import__(package).__file__)) + + # Get the line number if we need it. (Can work without it) + if (lineno is None): + try: + lineno = inspect.getsourcelines(obj)[1] + except Exception: + + # Can happen if its a cyfunction. See if it has `__code__` + if (hasattr(obj, "__code__")): + lineno = obj.__code__.co_firstlineno + else: + lineno = '' + return url_fmt.format(revision=revision, + package=package, + path=fn, + lineno=lineno) + + +def make_linkcode_resolve(package, url_fmt): + """Returns a linkcode_resolve function for the given URL format + + revision is a git commit reference (hash or name) + + package is the name of the root module of the package + + url_fmt is along the lines of ('https://github.com/USER/PROJECT/' + 'blob/{revision}/{package}/' + '{path}#L{lineno}') + """ + revision = _get_git_revision() + return partial(_linkcode_resolve, + revision=revision, + package=package, + url_fmt=url_fmt) From d83cff7c7e5155363405865cf2dd376587d08e13 Mon Sep 17 00:00:00 2001 From: Iroy30 <41401566+Iroy30@users.noreply.github.com> Date: Wed, 14 Oct 2020 14:26:18 -0500 Subject: [PATCH 12/16] [REVIEW] update dask docs (#1223) * update dask docs * changelog --- CHANGELOG.md | 2 ++ python/cugraph/dask/community/louvain.py | 2 +- python/cugraph/dask/link_analysis/pagerank.py | 14 +++------ python/cugraph/dask/traversal/bfs.py | 12 ++++---- python/cugraph/dask/traversal/sssp.py | 29 +++++++++++-------- 5 files changed, 30 insertions(+), 29 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 54df86f3a24..26b24d5dcb2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,8 @@ - PR #1176 Update ci/local/README.md - PR #1184 BLD getting latest tags - PR #1217 NetworkX Transition doc +- PR #1223 Update mnmg docs + ## Bug Fixes - PR #1131 Show style checker errors with set +e diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index fa42fb92f42..b0424bae3b9 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -53,7 +53,7 @@ def louvain(input_graph, max_iter=100, resolution=1.0): Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize() + >>> Comms.initialize(p2p=True) >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv('datasets/karate.csv', chunksize=chunksize, delimiter=' ', diff --git a/python/cugraph/dask/link_analysis/pagerank.py b/python/cugraph/dask/link_analysis/pagerank.py index 0ea09969350..4f3e829b3c7 100644 --- a/python/cugraph/dask/link_analysis/pagerank.py +++ b/python/cugraph/dask/link_analysis/pagerank.py @@ -51,8 +51,7 @@ def pagerank(input_graph, personalization=None, max_iter=100, tol=1.0e-5, - nstart=None, - load_balance=True): + nstart=None): """ Find the PageRank values for each vertex in a graph using multiple GPUs. @@ -92,26 +91,21 @@ def pagerank(input_graph, acceptable. nstart : not supported initial guess for pagerank - load_balance : bool - Set as True to perform load_balancing after global sorting of - dask-cudf DataFrame. This ensures that the data is uniformly - distributed among multiple GPUs to avoid over-loading. - Returns ------- PageRank : dask_cudf.DataFrame GPU data frame containing two dask_cudf.Series of size V: the vertex identifiers and the corresponding PageRank values. - ddf['vertex'] : cudf.Series + ddf['vertex'] : dask_cudf.Series Contains the vertex identifiers - ddf['pagerank'] : cudf.Series + ddf['pagerank'] : dask_cudf.Series Contains the PageRank score Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize() + >>> Comms.initialize(p2p=True) >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', diff --git a/python/cugraph/dask/traversal/bfs.py b/python/cugraph/dask/traversal/bfs.py index 88eba53de55..7a2c50a3bc0 100644 --- a/python/cugraph/dask/traversal/bfs.py +++ b/python/cugraph/dask/traversal/bfs.py @@ -64,26 +64,26 @@ def bfs(graph, Returns ------- - df : cudf.DataFrame - df['vertex'][i] gives the vertex id of the i'th vertex + df : dask_cudf.DataFrame + df['vertex'] gives the vertex id - df['distance'][i] gives the path distance for the i'th vertex from the + df['distance'] gives the path distance from the starting vertex (Only if return_distances is True) - df['predecessor'][i] gives for the i'th vertex the vertex it was + df['predecessor'] gives the vertex it was reached from in the traversal Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize() + >>> Comms.initialize(p2p=True) >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', names=['src', 'dst', 'value'], dtype=['int32', 'int32', 'float32']) >>> dg = cugraph.DiGraph() - >>> dg.from_dask_cudf_edgelist(ddf) + >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') >>> df = dcg.bfs(dg, 0) >>> Comms.destroy() """ diff --git a/python/cugraph/dask/traversal/sssp.py b/python/cugraph/dask/traversal/sssp.py index 9554e10f4d6..ce0c7908664 100644 --- a/python/cugraph/dask/traversal/sssp.py +++ b/python/cugraph/dask/traversal/sssp.py @@ -43,42 +43,47 @@ def sssp(graph, source): """ - Find the distances and predecessors for a breadth first traversal of a - graph. - The input graph must contain edge list as dask-cudf dataframe with + Compute the distance and predecessors for shortest paths from the specified + source to all the vertices in the graph. The distances column will store + the distance from the source to each vertex. The predecessors column will + store each vertex's predecessor in the shortest path. Vertices that are + unreachable will have a distance of infinity denoted by the maximum value + of the data type and the predecessor set as -1. The source vertex's + predecessor is also set to -1. + The input graph must contain edge list as dask-cudf dataframe with one partition per GPU. Parameters ---------- graph : cugraph.DiGraph cuGraph graph descriptor, should contain the connectivity information - as dask cudf edge list dataframe(edge weights are not used for this - algorithm). Undirected Graph not currently supported. + as dask cudf edge list dataframe. + Undirected Graph not currently supported. source : Integer Specify source vertex Returns ------- - df : cudf.DataFrame - df['vertex'][i] gives the vertex id of the i'th vertex + df : dask_cudf.DataFrame + df['vertex'] gives the vertex id - df['distance'][i] gives the path distance for the i'th vertex from the - starting vertex (Only if return_distances is True) + df['distance'] gives the path distance from the + starting vertex - df['predecessor'][i] gives for the i'th vertex the vertex it was + df['predecessor'] gives the vertex id it was reached from in the traversal Examples -------- >>> import cugraph.dask as dcg - >>> Comms.initialize() + >>> Comms.initialize(p2p=True) >>> chunksize = dcg.get_chunksize(input_data_path) >>> ddf = dask_cudf.read_csv(input_data_path, chunksize=chunksize, delimiter=' ', names=['src', 'dst', 'value'], dtype=['int32', 'int32', 'float32']) >>> dg = cugraph.DiGraph() - >>> dg.from_dask_cudf_edgelist(ddf) + >>> dg.from_dask_cudf_edgelist(ddf, 'src', 'dst') >>> df = dcg.sssp(dg, 0) >>> Comms.destroy() """ From 14606fc811e9d8c87570f04b6f6cbccd8f33cdd4 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 15 Oct 2020 06:40:52 -0500 Subject: [PATCH 13/16] [REVIEW] ENH Added min CUDA version check to MG Louvain (#1222) * Added code to ensure CUDA 10.2 or higher is used for MG Louvain. * Added PR 1222 to CHANGELOG.md * Temporarily disabling test that sporadically fails on centos7, defering investigation to 0.17 * Updating libcudacxx to tag 1.3.0 (since 1.3.0-rc0 is no longer available) Co-authored-by: Rick Ratzel --- CHANGELOG.md | 2 +- cpp/CMakeLists.txt | 2 +- cpp/tests/traversal/sssp_test.cu | 5 ++++- python/cugraph/dask/community/louvain.py | 9 +++++++++ python/cugraph/utilities/utils.py | 15 +++++++++++++++ 5 files changed, 30 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 26b24d5dcb2..6f305e30fe1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -33,6 +33,7 @@ - PR #1165 updated remaining algorithms to be NetworkX compatible - PR #1176 Update ci/local/README.md - PR #1184 BLD getting latest tags +- PR #1222 Added min CUDA version check to MG Louvain - PR #1217 NetworkX Transition doc - PR #1223 Update mnmg docs @@ -47,7 +48,6 @@ - PR #1180 BLD Adopt RAFT model for cuhornet dependency - PR #1181 Fix notebook error handling in CI - PR #1199 BUG segfault in python test suite -- PR #1186 BLD Installing raft headers under cugraph - PR #1186 BLD Installing raft headers under cugraph - PR #1192 Fix benchmark notes and documentation issues in graph.py - PR #1196 Move subcomms init outside of individual algorithm functions diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3a696b9e8b7..524d91926d7 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -235,7 +235,7 @@ message("Fetching libcudacxx") FetchContent_Declare( libcudacxx GIT_REPOSITORY https://github.com/NVIDIA/libcudacxx.git - GIT_TAG 1.3.0-rc0 + GIT_TAG 1.3.0 GIT_SHALLOW true ) diff --git a/cpp/tests/traversal/sssp_test.cu b/cpp/tests/traversal/sssp_test.cu index ea56d1d79cb..5021bd620f8 100644 --- a/cpp/tests/traversal/sssp_test.cu +++ b/cpp/tests/traversal/sssp_test.cu @@ -425,7 +425,10 @@ TEST_P(Tests_SSSP, CheckFP64_RANDOM_DIST_PREDS) // --gtest_filter=*simple_test* -INSTANTIATE_TEST_CASE_P(simple_test, +// FIXME: Enable this for 0.17. Temporarily disabled due to sporadic error hard +// to reproduce: "transform: failed to synchronize: cudaErrorIllegalAddress: an +// illegal memory access was encountered" thrown in the test body. +INSTANTIATE_TEST_CASE_P(DISABLED_simple_test, Tests_SSSP, ::testing::Values(SSSP_Usecase(MTX, "test/datasets/dblp.mtx", 100), SSSP_Usecase(MTX, "test/datasets/wiki2003.mtx", 100000), diff --git a/python/cugraph/dask/community/louvain.py b/python/cugraph/dask/community/louvain.py index b0424bae3b9..11ecb78375f 100644 --- a/python/cugraph/dask/community/louvain.py +++ b/python/cugraph/dask/community/louvain.py @@ -19,6 +19,8 @@ from cugraph.dask.common.input_utils import get_distributed_data from cugraph.structure.shuffle import shuffle from cugraph.dask.community import louvain_wrapper as c_mg_louvain +from cugraph.utilities.utils import is_cuda_version_less_than + import dask_cudf @@ -66,6 +68,13 @@ def louvain(input_graph, max_iter=100, resolution=1.0): """ # FIXME: finish docstring: describe parameters, etc. + # MG Louvain currently requires CUDA 10.2 or higher. + # FIXME: remove this check once RAPIDS drops support for CUDA < 10.2 + if is_cuda_version_less_than((10, 2)): + raise NotImplementedError("Multi-GPU Louvain is not implemented for " + "this version of CUDA. Ensure CUDA version " + "10.2 or higher is installed.") + # FIXME: dask methods to populate graphs from edgelists are only present on # DiGraph classes. Disable the Graph check for now and assume inputs are # symmetric DiGraphs. diff --git a/python/cugraph/utilities/utils.py b/python/cugraph/utilities/utils.py index 000e32283fa..1a611f45cc8 100644 --- a/python/cugraph/utilities/utils.py +++ b/python/cugraph/utilities/utils.py @@ -12,6 +12,7 @@ # limitations under the License. import cudf +from numba import cuda def get_traversed_path(df, id): @@ -134,3 +135,17 @@ def get_traversed_path_list(df, id): pred = ddf['predecessor'].iloc[0] return answer + + +def is_cuda_version_less_than(min_version=(10, 2)): + """ + Returns True if the version of CUDA being used is less than min_version + """ + this_cuda_ver = cuda.runtime.get_version() # returns (, ) + if this_cuda_ver[0] > min_version[0]: + return False + if this_cuda_ver[0] < min_version[0]: + return True + if this_cuda_ver[1] < min_version[1]: + return True + return False From d39036aa072e423ae440b099be424d95f632fb4c Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 15 Oct 2020 11:59:41 -0500 Subject: [PATCH 14/16] Added PR 1226 to CHANGELOG.md --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index f0d20926c8b..24a16f2aafb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,8 @@ ## Improvements ## Bug Fixes +- PR #1226 Resolving 0.16 to 0.17 auto-merger failures + # cuGraph 0.16.0 (Date TBD) From aa1e7e2d5611571c7ead3b306458f4b493b3c19c Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 15 Oct 2020 12:24:58 -0500 Subject: [PATCH 15/16] Reverting CHANGELOG entry for PR 1226 (resolve auto-merger failures) --- CHANGELOG.md | 1 - 1 file changed, 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 24a16f2aafb..55b692ec08c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,6 @@ ## Improvements ## Bug Fixes -- PR #1226 Resolving 0.16 to 0.17 auto-merger failures # cuGraph 0.16.0 (Date TBD) From d2662e2c9b2cc255777f969db08c9ffd15319231 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Thu, 15 Oct 2020 15:51:40 -0500 Subject: [PATCH 16/16] Updated cudf APIs for 0.17 --- notebooks/structure/Renumber-2.ipynb | 2 +- notebooks/structure/Renumber.ipynb | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/notebooks/structure/Renumber-2.ipynb b/notebooks/structure/Renumber-2.ipynb index 68c21fe725a..d17c2b32191 100755 --- a/notebooks/structure/Renumber-2.ipynb +++ b/notebooks/structure/Renumber-2.ipynb @@ -156,7 +156,7 @@ "\n", "tmp_df, numbering = NumberMap.renumber(gdf, ['src_ip'], ['dst_ip'])\n", "\n", - "gdf = gdf.merge(tmp_df, on='order').sort_values('order').set_index(index='order', drop=True)\n", + "gdf = gdf.merge(tmp_df, on='order').sort_values('order').set_index(keys='order', drop=True)\n", "gdf = gdf.rename(columns={'src': 'src_r', 'dst': 'dst_r'})" ] }, diff --git a/notebooks/structure/Renumber.ipynb b/notebooks/structure/Renumber.ipynb index 929a600a39d..047b53d62df 100755 --- a/notebooks/structure/Renumber.ipynb +++ b/notebooks/structure/Renumber.ipynb @@ -282,11 +282,13 @@ "jac = numbering.unrenumber(jac, 'source')\n", "jac = numbering.unrenumber(jac, 'destination')\n", "\n", - "jac.add_column(\"original_source\",\n", - " [ socket.inet_ntoa(struct.pack('!L', x)) for x in jac['source'].values_host ])\n", + "jac.insert(len(jac.columns),\n", + " \"original_source\",\n", + " [ socket.inet_ntoa(struct.pack('!L', x)) for x in jac['source'].values_host ])\n", "\n", - "jac.add_column(\"original_destination\",\n", - " [ socket.inet_ntoa(struct.pack('!L', x)) for x in jac['destination'].values_host ])\n", + "jac.insert(len(jac.columns),\n", + " \"original_destination\",\n", + " [ socket.inet_ntoa(struct.pack('!L', x)) for x in jac['destination'].values_host ])\n", "\n", "jac.to_pandas()\n" ]