Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions cdlib/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from cdlib.classes.temporal_clustering import TemporalClustering
from cdlib.classes.named_clustering import NamedClustering
from cdlib.lifecycles import LifeCycle, CommunityEvent
from cdlib.random import seed, reset_seed, get_seed, fixed_seed
3 changes: 3 additions & 0 deletions cdlib/algorithms/bipartite_clustering.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from collections import defaultdict
from cdlib.algorithms.internal.pycondor import condor_object, initial_community, brim
from cdlib.prompt_utils import report_missing_packages, prompt_import_failure
from cdlib.random import get_seed

missing_packages = set()

Expand Down Expand Up @@ -149,6 +150,8 @@ def CPM_Bipartite(
except:
g.vs["name"] = [v.index for v in g.vs]

seed = get_seed(seed)

optimiser = leidenalg.Optimiser()
leidenalg.Optimiser.set_rng_seed(self=optimiser, value=seed)

Expand Down
28 changes: 25 additions & 3 deletions cdlib/algorithms/crisp_partition.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import sys
import numpy as np
from typing import Callable
from copy import deepcopy
from cdlib.algorithms.internal import DER
from cdlib.random import get_seed

from community import community_louvain

Expand Down Expand Up @@ -574,7 +574,7 @@ def louvain(


def leiden(
g_original: object, initial_membership: list = None, weights: list = None
g_original: object, initial_membership: list = None, weights: list = None, seed: int = None
) -> NodeClustering:
"""
The Leiden algorithm is an improvement of the Louvain algorithm.
Expand Down Expand Up @@ -622,11 +622,14 @@ def leiden(

g = convert_graph_formats(g_original, ig.Graph)

seed = get_seed(seed)

part = leidenalg.find_partition(
g,
leidenalg.ModularityVertexPartition,
initial_membership=initial_membership,
weights=weights,
seed=seed,
)
coms = [g.vs[x]["name"] for x in part]
return NodeClustering(
Expand All @@ -645,6 +648,7 @@ def rb_pots(
initial_membership: list = None,
weights: list = None,
resolution_parameter: float = 1,
seed: int = None,
) -> NodeClustering:
"""
Rb_pots is a model where the quality function to optimize is:
Expand Down Expand Up @@ -701,13 +705,15 @@ def rb_pots(
)

g = convert_graph_formats(g_original, ig.Graph)
seed = get_seed(seed)

part = leidenalg.find_partition(
g,
leidenalg.RBConfigurationVertexPartition,
resolution_parameter=resolution_parameter,
initial_membership=initial_membership,
weights=weights,
seed=seed
)
coms = [g.vs[x]["name"] for x in part]
return NodeClustering(
Expand All @@ -728,6 +734,7 @@ def rber_pots(
weights: list = None,
node_sizes: list = None,
resolution_parameter: float = 1,
seed: int = None,
) -> NodeClustering:
"""
rber_pots is a model where the quality function to optimize is:
Expand Down Expand Up @@ -781,13 +788,16 @@ def rber_pots(

g = convert_graph_formats(g_original, ig.Graph)

seed = get_seed(seed)

part = leidenalg.find_partition(
g,
leidenalg.RBERVertexPartition,
resolution_parameter=resolution_parameter,
initial_membership=initial_membership,
weights=weights,
node_sizes=node_sizes,
seed=seed,
)
coms = [g.vs[x]["name"] for x in part]
return NodeClustering(
Expand All @@ -809,6 +819,7 @@ def cpm(
weights: list = None,
node_sizes: list = None,
resolution_parameter: float = 1,
seed: int = None,
) -> NodeClustering:
"""
CPM is a model where the quality function to optimize is:
Expand Down Expand Up @@ -872,13 +883,16 @@ def cpm(

g = convert_graph_formats(g_original, ig.Graph)

seed = get_seed(seed)

part = leidenalg.find_partition(
g,
leidenalg.CPMVertexPartition,
resolution_parameter=resolution_parameter,
initial_membership=initial_membership,
weights=weights,
node_sizes=node_sizes,
seed=seed
)
coms = [g.vs[x]["name"] for x in part]
return NodeClustering(
Expand All @@ -895,7 +909,7 @@ def cpm(


def significance_communities(
g_original: object, initial_membership: list = None, node_sizes: list = None
g_original: object, initial_membership: list = None, node_sizes: list = None, seed: int = None
) -> NodeClustering:
"""
Significance_communities is a model where the quality function to optimize is:
Expand Down Expand Up @@ -948,12 +962,14 @@ def significance_communities(
)

g = convert_graph_formats(g_original, ig.Graph)
seed = get_seed(seed)

part = leidenalg.find_partition(
g,
leidenalg.SignificanceVertexPartition,
initial_membership=initial_membership,
node_sizes=node_sizes,
seed=seed,
)
coms = [g.vs[x]["name"] for x in part]
return NodeClustering(
Expand All @@ -972,6 +988,7 @@ def surprise_communities(
initial_membership: list = None,
weights: list = None,
node_sizes: list = None,
seed: int = None,
) -> NodeClustering:
"""

Expand Down Expand Up @@ -1027,13 +1044,15 @@ def surprise_communities(
)

g = convert_graph_formats(g_original, ig.Graph)
seed = get_seed(seed)

part = leidenalg.find_partition(
g,
leidenalg.SurpriseVertexPartition,
initial_membership=initial_membership,
weights=weights,
node_sizes=node_sizes,
seed=seed
)
coms = [g.vs[x]["name"] for x in part]
return NodeClustering(
Expand Down Expand Up @@ -2635,6 +2654,9 @@ def pycombo(
)

g = convert_graph_formats(g_original, nx.Graph)

random_seed = get_seed(random_seed)

partition = pycombo_part.execute(
g,
weight=weight,
Expand Down
5 changes: 4 additions & 1 deletion cdlib/algorithms/overlapping_partition.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
import sys
from random import sample
from demon import Demon
from cdlib.algorithms.internal.NodePerception import NodePerception
Expand All @@ -7,6 +6,7 @@
import numpy as np
from collections import defaultdict
from cdlib import NodeClustering
from cdlib.random import get_seed
from cdlib.utils import suppress_stdout, convert_graph_formats, nx_node_integer_mapping
from cdlib.algorithms.internal.CONGO import Congo_
from cdlib.algorithms.internal.CONGA import Conga_
Expand Down Expand Up @@ -1531,6 +1531,9 @@ def lpam(
)

g = convert_graph_formats(g_original, nx.Graph)

seed = get_seed(seed)

return LPAM(graph=g, k=k, threshold=threshold, distance=distance, seed=seed)


Expand Down
78 changes: 78 additions & 0 deletions cdlib/random.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# cdlib/utils/random.py

import random
import numpy as np
import os
from contextlib import contextmanager
import warnings

try:
import igraph as ig
except ImportError:
ig = None

try:
import networkit as nk
except ImportError:
nk = None

try:
import sklearn
except ImportError:
sklearn = None

try:
import graph_tool as gt
except ImportError:
gt = None

# Global variable to store the seed
_cdlib_global_seed = None


@contextmanager
def fixed_seed(seed_value: int):
"""Context manager to temporarily fix the seed."""
global _cdlib_global_seed
previous_seed = _cdlib_global_seed
seed(seed_value)
try:
yield
finally:
if previous_seed is not None:
seed(previous_seed)
else:
reset_seed()


def seed(seed_value: int):
"""Set a global random seed for reproducibility across cdlib and its dependencies."""
global _cdlib_global_seed
if _cdlib_global_seed is not None:
warnings.warn(
f"cdlib.seed() has already been set (previous value: {_cdlib_global_seed}). Overriding it.",
UserWarning
)
_cdlib_global_seed = seed_value

# Core Python
random.seed(seed_value)
os.environ["PYTHONHASHSEED"] = str(seed_value)

# Numpy
np.random.seed(seed_value)

# networkit
if nk is not None:
nk.engine.setSeed(seed_value, False)


def get_seed(default=None):
"""Retrieve the global seed if set, else return a default value."""
return _cdlib_global_seed if _cdlib_global_seed is not None else default


def reset_seed():
"""Reset the global seed to None (no forced seeding)."""
global _cdlib_global_seed
_cdlib_global_seed = None
50 changes: 50 additions & 0 deletions cdlib/test/test_seed.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
import unittest
import networkx as nx
from cdlib import algorithms, seed, reset_seed, get_seed, fixed_seed


class TestSeedSetting(unittest.TestCase):

def setUp(self):
self.graph = nx.karate_club_graph()

def test_leiden_seed(self):
seed(42)
comms1 = algorithms.leiden(self.graph)
seed(42)
comms2 = algorithms.leiden(self.graph)
self.assertEqual(comms1.communities, comms2.communities)

def test_infomap_seed(self):
seed(123)
comms1 = algorithms.infomap(self.graph)
seed(123)
comms2 = algorithms.infomap(self.graph)
self.assertEqual(comms1.communities, comms2.communities)

def test_manual_override(self):
seed(42)
comms1 = algorithms.leiden(self.graph, seed=100)
seed(42)
comms2 = algorithms.leiden(self.graph, seed=100)
self.assertEqual(comms1.communities, comms2.communities)

def test_reset_seed(self):
seed(42)
reset_seed()
self.assertIsNone(get_seed())

def test_warning_on_multiple_seed_calls(self):
seed(42)
with self.assertWarns(UserWarning):
seed(123)

def test_fixed_seed_context_manager(self):
seed(42)
original_seed = get_seed()

with fixed_seed(100):
self.assertEqual(get_seed(), 100)

# After context, seed should be restored
self.assertEqual(get_seed(), original_seed)
2 changes: 1 addition & 1 deletion cdlib/test/test_viz_network.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def test_interactive_cluster(self):
g,
coms,
pos,
interractive=True,
interactive=True,
output_file=output_file,
plot_labels=True,
plot_overlaps=True,
Expand Down
36 changes: 35 additions & 1 deletion docs/reference/utils.rst
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,38 @@ Remapping of graph nodes. It is often a good idea to limit memory usage and to u
:toctree: generated/

nx_node_integer_mapping
remap_node_communities
remap_node_communities

^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Global Seeding for Reproducibility
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

``cdlib`` provides a utility to globally set the random seed across its algorithms and dependencies:

.. code-block:: python
import cdlib

# Set seed for reproducibility
cdlib.seed(42)

# All community detection algorithms will now default to use this seed
from cdlib import algorithms
import networkx as nx

G = nx.karate_club_graph()
communities = algorithms.leiden(G)

# Reset the seed to the default value
cdlib.reset_seed()

Using a temporary fixed seed in a context manager:

.. code-block:: python

from cdlib import fixed_seed

with fixed_seed(123):
communities = algorithms.leiden(G)
# Seed automatically restored


Loading