From 3b2908ba740b792382ad21c35fdf7db365292ebb Mon Sep 17 00:00:00 2001 From: GiulioRossetti Date: Mon, 28 Apr 2025 15:08:52 +0200 Subject: [PATCH] :new: fix random seed --- cdlib/__init__.py | 1 + cdlib/algorithms/bipartite_clustering.py | 3 + cdlib/algorithms/crisp_partition.py | 28 +++++++- cdlib/algorithms/overlapping_partition.py | 5 +- cdlib/random.py | 78 +++++++++++++++++++++++ cdlib/test/test_seed.py | 50 +++++++++++++++ cdlib/test/test_viz_network.py | 2 +- docs/reference/utils.rst | 36 ++++++++++- 8 files changed, 197 insertions(+), 6 deletions(-) create mode 100644 cdlib/random.py create mode 100644 cdlib/test/test_seed.py diff --git a/cdlib/__init__.py b/cdlib/__init__.py index f1ef144b..354ec466 100644 --- a/cdlib/__init__.py +++ b/cdlib/__init__.py @@ -7,3 +7,4 @@ from cdlib.classes.temporal_clustering import TemporalClustering from cdlib.classes.named_clustering import NamedClustering from cdlib.lifecycles import LifeCycle, CommunityEvent +from cdlib.random import seed, reset_seed, get_seed, fixed_seed diff --git a/cdlib/algorithms/bipartite_clustering.py b/cdlib/algorithms/bipartite_clustering.py index 750d0135..9762a7a3 100644 --- a/cdlib/algorithms/bipartite_clustering.py +++ b/cdlib/algorithms/bipartite_clustering.py @@ -4,6 +4,7 @@ from collections import defaultdict from cdlib.algorithms.internal.pycondor import condor_object, initial_community, brim from cdlib.prompt_utils import report_missing_packages, prompt_import_failure +from cdlib.random import get_seed missing_packages = set() @@ -149,6 +150,8 @@ def CPM_Bipartite( except: g.vs["name"] = [v.index for v in g.vs] + seed = get_seed(seed) + optimiser = leidenalg.Optimiser() leidenalg.Optimiser.set_rng_seed(self=optimiser, value=seed) diff --git a/cdlib/algorithms/crisp_partition.py b/cdlib/algorithms/crisp_partition.py index 19314416..59637267 100644 --- a/cdlib/algorithms/crisp_partition.py +++ b/cdlib/algorithms/crisp_partition.py @@ -1,8 +1,8 @@ -import sys import numpy as np from typing import Callable from copy import deepcopy from cdlib.algorithms.internal import DER +from cdlib.random import get_seed from community import community_louvain @@ -574,7 +574,7 @@ def louvain( def leiden( - g_original: object, initial_membership: list = None, weights: list = None + g_original: object, initial_membership: list = None, weights: list = None, seed: int = None ) -> NodeClustering: """ The Leiden algorithm is an improvement of the Louvain algorithm. @@ -622,11 +622,14 @@ def leiden( g = convert_graph_formats(g_original, ig.Graph) + seed = get_seed(seed) + part = leidenalg.find_partition( g, leidenalg.ModularityVertexPartition, initial_membership=initial_membership, weights=weights, + seed=seed, ) coms = [g.vs[x]["name"] for x in part] return NodeClustering( @@ -645,6 +648,7 @@ def rb_pots( initial_membership: list = None, weights: list = None, resolution_parameter: float = 1, + seed: int = None, ) -> NodeClustering: """ Rb_pots is a model where the quality function to optimize is: @@ -701,6 +705,7 @@ def rb_pots( ) g = convert_graph_formats(g_original, ig.Graph) + seed = get_seed(seed) part = leidenalg.find_partition( g, @@ -708,6 +713,7 @@ def rb_pots( resolution_parameter=resolution_parameter, initial_membership=initial_membership, weights=weights, + seed=seed ) coms = [g.vs[x]["name"] for x in part] return NodeClustering( @@ -728,6 +734,7 @@ def rber_pots( weights: list = None, node_sizes: list = None, resolution_parameter: float = 1, + seed: int = None, ) -> NodeClustering: """ rber_pots is a model where the quality function to optimize is: @@ -781,6 +788,8 @@ def rber_pots( g = convert_graph_formats(g_original, ig.Graph) + seed = get_seed(seed) + part = leidenalg.find_partition( g, leidenalg.RBERVertexPartition, @@ -788,6 +797,7 @@ def rber_pots( initial_membership=initial_membership, weights=weights, node_sizes=node_sizes, + seed=seed, ) coms = [g.vs[x]["name"] for x in part] return NodeClustering( @@ -809,6 +819,7 @@ def cpm( weights: list = None, node_sizes: list = None, resolution_parameter: float = 1, + seed: int = None, ) -> NodeClustering: """ CPM is a model where the quality function to optimize is: @@ -872,6 +883,8 @@ def cpm( g = convert_graph_formats(g_original, ig.Graph) + seed = get_seed(seed) + part = leidenalg.find_partition( g, leidenalg.CPMVertexPartition, @@ -879,6 +892,7 @@ def cpm( initial_membership=initial_membership, weights=weights, node_sizes=node_sizes, + seed=seed ) coms = [g.vs[x]["name"] for x in part] return NodeClustering( @@ -895,7 +909,7 @@ def cpm( def significance_communities( - g_original: object, initial_membership: list = None, node_sizes: list = None + g_original: object, initial_membership: list = None, node_sizes: list = None, seed: int = None ) -> NodeClustering: """ Significance_communities is a model where the quality function to optimize is: @@ -948,12 +962,14 @@ def significance_communities( ) g = convert_graph_formats(g_original, ig.Graph) + seed = get_seed(seed) part = leidenalg.find_partition( g, leidenalg.SignificanceVertexPartition, initial_membership=initial_membership, node_sizes=node_sizes, + seed=seed, ) coms = [g.vs[x]["name"] for x in part] return NodeClustering( @@ -972,6 +988,7 @@ def surprise_communities( initial_membership: list = None, weights: list = None, node_sizes: list = None, + seed: int = None, ) -> NodeClustering: """ @@ -1027,6 +1044,7 @@ def surprise_communities( ) g = convert_graph_formats(g_original, ig.Graph) + seed = get_seed(seed) part = leidenalg.find_partition( g, @@ -1034,6 +1052,7 @@ def surprise_communities( initial_membership=initial_membership, weights=weights, node_sizes=node_sizes, + seed=seed ) coms = [g.vs[x]["name"] for x in part] return NodeClustering( @@ -2635,6 +2654,9 @@ def pycombo( ) g = convert_graph_formats(g_original, nx.Graph) + + random_seed = get_seed(random_seed) + partition = pycombo_part.execute( g, weight=weight, diff --git a/cdlib/algorithms/overlapping_partition.py b/cdlib/algorithms/overlapping_partition.py index 89937d53..f30fb340 100644 --- a/cdlib/algorithms/overlapping_partition.py +++ b/cdlib/algorithms/overlapping_partition.py @@ -1,4 +1,3 @@ -import sys from random import sample from demon import Demon from cdlib.algorithms.internal.NodePerception import NodePerception @@ -7,6 +6,7 @@ import numpy as np from collections import defaultdict from cdlib import NodeClustering +from cdlib.random import get_seed from cdlib.utils import suppress_stdout, convert_graph_formats, nx_node_integer_mapping from cdlib.algorithms.internal.CONGO import Congo_ from cdlib.algorithms.internal.CONGA import Conga_ @@ -1531,6 +1531,9 @@ def lpam( ) g = convert_graph_formats(g_original, nx.Graph) + + seed = get_seed(seed) + return LPAM(graph=g, k=k, threshold=threshold, distance=distance, seed=seed) diff --git a/cdlib/random.py b/cdlib/random.py new file mode 100644 index 00000000..0559bf1e --- /dev/null +++ b/cdlib/random.py @@ -0,0 +1,78 @@ +# cdlib/utils/random.py + +import random +import numpy as np +import os +from contextlib import contextmanager +import warnings + +try: + import igraph as ig +except ImportError: + ig = None + +try: + import networkit as nk +except ImportError: + nk = None + +try: + import sklearn +except ImportError: + sklearn = None + +try: + import graph_tool as gt +except ImportError: + gt = None + +# Global variable to store the seed +_cdlib_global_seed = None + + +@contextmanager +def fixed_seed(seed_value: int): + """Context manager to temporarily fix the seed.""" + global _cdlib_global_seed + previous_seed = _cdlib_global_seed + seed(seed_value) + try: + yield + finally: + if previous_seed is not None: + seed(previous_seed) + else: + reset_seed() + + +def seed(seed_value: int): + """Set a global random seed for reproducibility across cdlib and its dependencies.""" + global _cdlib_global_seed + if _cdlib_global_seed is not None: + warnings.warn( + f"cdlib.seed() has already been set (previous value: {_cdlib_global_seed}). Overriding it.", + UserWarning + ) + _cdlib_global_seed = seed_value + + # Core Python + random.seed(seed_value) + os.environ["PYTHONHASHSEED"] = str(seed_value) + + # Numpy + np.random.seed(seed_value) + + # networkit + if nk is not None: + nk.engine.setSeed(seed_value, False) + + +def get_seed(default=None): + """Retrieve the global seed if set, else return a default value.""" + return _cdlib_global_seed if _cdlib_global_seed is not None else default + + +def reset_seed(): + """Reset the global seed to None (no forced seeding).""" + global _cdlib_global_seed + _cdlib_global_seed = None diff --git a/cdlib/test/test_seed.py b/cdlib/test/test_seed.py new file mode 100644 index 00000000..8afc3571 --- /dev/null +++ b/cdlib/test/test_seed.py @@ -0,0 +1,50 @@ +import unittest +import networkx as nx +from cdlib import algorithms, seed, reset_seed, get_seed, fixed_seed + + +class TestSeedSetting(unittest.TestCase): + + def setUp(self): + self.graph = nx.karate_club_graph() + + def test_leiden_seed(self): + seed(42) + comms1 = algorithms.leiden(self.graph) + seed(42) + comms2 = algorithms.leiden(self.graph) + self.assertEqual(comms1.communities, comms2.communities) + + def test_infomap_seed(self): + seed(123) + comms1 = algorithms.infomap(self.graph) + seed(123) + comms2 = algorithms.infomap(self.graph) + self.assertEqual(comms1.communities, comms2.communities) + + def test_manual_override(self): + seed(42) + comms1 = algorithms.leiden(self.graph, seed=100) + seed(42) + comms2 = algorithms.leiden(self.graph, seed=100) + self.assertEqual(comms1.communities, comms2.communities) + + def test_reset_seed(self): + seed(42) + reset_seed() + self.assertIsNone(get_seed()) + + def test_warning_on_multiple_seed_calls(self): + seed(42) + with self.assertWarns(UserWarning): + seed(123) + + def test_fixed_seed_context_manager(self): + seed(42) + original_seed = get_seed() + + with fixed_seed(100): + self.assertEqual(get_seed(), 100) + + # After context, seed should be restored + self.assertEqual(get_seed(), original_seed) diff --git a/cdlib/test/test_viz_network.py b/cdlib/test/test_viz_network.py index 70dc340d..489cbb5e 100644 --- a/cdlib/test/test_viz_network.py +++ b/cdlib/test/test_viz_network.py @@ -50,7 +50,7 @@ def test_interactive_cluster(self): g, coms, pos, - interractive=True, + interactive=True, output_file=output_file, plot_labels=True, plot_overlaps=True, diff --git a/docs/reference/utils.rst b/docs/reference/utils.rst index 166e65a7..0ae98adf 100644 --- a/docs/reference/utils.rst +++ b/docs/reference/utils.rst @@ -30,4 +30,38 @@ Remapping of graph nodes. It is often a good idea to limit memory usage and to u :toctree: generated/ nx_node_integer_mapping - remap_node_communities \ No newline at end of file + remap_node_communities + +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +Global Seeding for Reproducibility +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +``cdlib`` provides a utility to globally set the random seed across its algorithms and dependencies: + +.. code-block:: python + import cdlib + + # Set seed for reproducibility + cdlib.seed(42) + + # All community detection algorithms will now default to use this seed + from cdlib import algorithms + import networkx as nx + + G = nx.karate_club_graph() + communities = algorithms.leiden(G) + + # Reset the seed to the default value + cdlib.reset_seed() + +Using a temporary fixed seed in a context manager: + +.. code-block:: python + + from cdlib import fixed_seed + + with fixed_seed(123): + communities = algorithms.leiden(G) + # Seed automatically restored + +