Fix issue #3153: generalized modularity maximization (#3260)

* Generalized Modularity with Gamma parameter * Genearalized modularity with gamma parameter * fix issue 3153, modularity_max can support maximization of the generalized modularity * fix issue 3153 - add resolution parameter for modularity_max and test case * Add generalized modularity * Add generalized modularity and test cases * Remove the rounding in modularity_max * style * Move tests to where they should be * update old tests to simplify and clarify * Correct modularity calls to use keyword and adjust tests * Update docs of resolution parameter and tests * Add tests for resolution parameter of modularity The feature was added, but without tests. This at leasts tests a couple of simple cases. * Update/correct mod_max docs and match output types. * Correct the resolution impact tests for max_mod * Update release notes for max_mod greedy API changes * black * Fix link to modularity docs. * Fix doc ref to private function. * Rm print statements from tests. * Update based on review. Co-authored-by: Dan Schult <dschult@colgate.edu> Co-authored-by: Ross Barnowski <rossbar@berkeley.edu>
author: Xiaoyan Lu <xiaoyan.lu.xl@gmail.com> 2021-02-23 23:05:50 -0500
committer: GitHub <noreply@github.com> 2021-02-23 23:05:50 -0500
commit: e4d1483f241e9a56fbda247592710b659d29bce5 (patch)
tree: 8a6f67845492cfe96e66ae18efffebb3f9dfc973
parent: 5d10139b38c93adaff5872e4e1e4e9892eb2118b (diff)
download: networkx-e4d1483f241e9a56fbda247592710b659d29bce5.tar.gz
6 files changed, 187 insertions, 54 deletions
diff --git a/doc/reference/algorithms/community.rst b/doc/reference/algorithms/community.rst
index b89b0e2c..5df5b89c 100644
--- a/doc/reference/algorithms/community.rst
+++ b/doc/reference/algorithms/community.rst
@@ -29,7 +29,7 @@ Modularity-based communities
    :toctree: generated/
 
    greedy_modularity_communities
-   _naive_greedy_modularity_communities
+   naive_greedy_modularity_communities
 
 Tree partitioning
 -----------------
diff --git a/doc/release/release_dev.rst b/doc/release/release_dev.rst
index ccef52df..b19e0e2a 100644
--- a/doc/release/release_dev.rst
+++ b/doc/release/release_dev.rst
@@ -60,6 +60,10 @@ API Changes
   instead of a UUID generate string. So the function returns `tree`.
 - [`#4545 <https://github.com/networkx/networkx/pull/4545>`_]
   The variable `NIL` ="NIL" has been removed from `networkx.generators.trees`
+- [`#3620 <https://github.com/networkx/networkx/pull/3620>`_]
+  The function `naive_greedy_modularity_communities` now returns a
+  list of communities (like `greedy_modularity_communities`) instead
+  of a generator of communities.
 
 Deprecations
 ------------
@@ -97,6 +101,8 @@ Deprecations
 - [`#4617 <https://github.com/networkx/networkx/pull/4617>`_]
   Deprecate ``hub_matrix`` and ``authority_matrix``
 
+
+
 Contributors to this release
 ----------------------------
 
diff --git a/networkx/algorithms/community/modularity_max.py b/networkx/algorithms/community/modularity_max.py
index 88225368..cac64eb5 100644
--- a/networkx/algorithms/community/modularity_max.py
+++ b/networkx/algorithms/community/modularity_max.py
@@ -15,22 +15,30 @@ __all__ = [
 ]
 
 
-def greedy_modularity_communities(G, weight=None):
-    """Find communities in graph using Clauset-Newman-Moore greedy modularity
-    maximization. This method currently supports the Graph class and does not
+def greedy_modularity_communities(G, weight=None, resolution=1):
+    """Find communities in G using greedy modularity maximization.
+
+    This function uses Clauset-Newman-Moore greedy modularity maximization [2]_.
+    This method currently supports the Graph class and does not
     consider edge weights.
 
     Greedy modularity maximization begins with each node in its own community
     and joins the pair of communities that most increases modularity until no
     such pair exists.
 
+    This function maximizes the generalized modularity, where `resolution`
+    is the resolution parameter, often expressed as $\gamma$.
+    See :func:`~networkx.algorithms.community.quality.modularity`.
+
     Parameters
     ----------
     G : NetworkX graph
 
     Returns
     -------
-    Yields sets of nodes, one for each community.
+    list
+        A list of sets of nodes, one for each community.
+        Sorted by length with largest communities first.
 
     Examples
     --------
@@ -40,13 +48,19 @@ def greedy_modularity_communities(G, weight=None):
     >>> sorted(c[0])
     [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
 
+    See Also
+    --------
+    modularity
+
     References
     ----------
-    .. [1] M. E. J Newman 'Networks: An Introduction', page 224
+    .. [1] M. E. J Newman "Networks: An Introduction", page 224
        Oxford University Press 2011.
     .. [2] Clauset, A., Newman, M. E., & Moore, C.
        "Finding community structure in very large networks."
        Physical Review E 70(6), 2004.
+    .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community
+       Detection" Phys. Rev. E74, 2006.
     """
 
     # Count nodes and edges
@@ -68,7 +82,7 @@ def greedy_modularity_communities(G, weight=None):
 
     # Initial modularity
     partition = [[label_for_node[x] for x in c] for c in communities.values()]
-    q_cnm = modularity(G, partition)
+    q_cnm = modularity(G, partition, resolution=resolution)
 
     # Initialize data structures
     # CNM Eq 8-9 (Eq 8 was missing a factor of 2 (from A_ij + A_ji)
@@ -79,7 +93,7 @@ def greedy_modularity_communities(G, weight=None):
     a = [k[i] * q0 for i in range(N)]
     dq_dict = {
         i: {
-            j: 2 * q0 - 2 * k[i] * k[j] * q0 * q0
+            j: 2 * q0 - 2 * resolution * k[i] * k[j] * q0 * q0
             for j in [node_for_label[u] for u in G.neighbors(label_for_node[i])]
             if j != i
         }
@@ -138,10 +152,10 @@ def greedy_modularity_communities(G, weight=None):
             if k in both_set:
                 dq_jk = dq_dict[j][k] + dq_dict[i][k]
             elif k in j_set:
-                dq_jk = dq_dict[j][k] - 2.0 * a[i] * a[k]
+                dq_jk = dq_dict[j][k] - 2.0 * resolution * a[i] * a[k]
             else:
                 # k in i_set
-                dq_jk = dq_dict[i][k] - 2.0 * a[j] * a[k]
+                dq_jk = dq_dict[i][k] - 2.0 * resolution * a[j] * a[k]
             # Update rows j and k
             for row, col in [(j, k), (k, j)]:
                 # Save old value for finding heap index
@@ -209,10 +223,42 @@ def greedy_modularity_communities(G, weight=None):
     return sorted(communities, key=len, reverse=True)
 
 
-def naive_greedy_modularity_communities(G):
-    """Find communities in graph using the greedy modularity maximization.
+def naive_greedy_modularity_communities(G, resolution=1):
+    """Find communities in G using greedy modularity maximization.
+
     This implementation is O(n^4), much slower than alternatives, but it is
     provided as an easy-to-understand reference implementation.
+
+    Greedy modularity maximization begins with each node in its own community
+    and joins the pair of communities that most increases modularity until no
+    such pair exists.
+
+    This function maximizes the generalized modularity, where `resolution`
+    is the resolution parameter, often expressed as $\gamma$.
+    See :func:`~networkx.algorithms.community.quality.modularity`.
+
+    Parameters
+    ----------
+    G : NetworkX graph
+
+    Returns
+    -------
+    list
+        A list of sets of nodes, one for each community.
+        Sorted by length with largest communities first.
+
+    Examples
+    --------
+    >>> from networkx.algorithms.community import greedy_modularity_communities
+    >>> G = nx.karate_club_graph()
+    >>> c = list(greedy_modularity_communities(G))
+    >>> sorted(c[0])
+    [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
+
+    See Also
+    --------
+    greedy_modularity_communities
+    modularity
     """
     # First create one community for each node
     communities = list([frozenset([u]) for u in G.nodes()])
@@ -220,7 +266,7 @@ def naive_greedy_modularity_communities(G):
     merges = []
     # Greedily merge communities until no improvement is possible
     old_modularity = None
-    new_modularity = modularity(G, communities)
+    new_modularity = modularity(G, communities, resolution=resolution)
     while old_modularity is None or new_modularity > old_modularity:
         # Save modularity for comparison
         old_modularity = new_modularity
@@ -229,13 +275,15 @@ def naive_greedy_modularity_communities(G):
         to_merge = None
         for i, u in enumerate(communities):
             for j, v in enumerate(communities):
-                # Skip i=j and empty communities
+                # Skip i==j and empty communities
                 if j <= i or len(u) == 0 or len(v) == 0:
                     continue
                 # Merge communities u and v
                 trial_communities[j] = u | v
                 trial_communities[i] = frozenset([])
-                trial_modularity = modularity(G, trial_communities)
+                trial_modularity = modularity(
+                    G, trial_communities, resolution=resolution
+                )
                 if trial_modularity >= new_modularity:
                     # Check if strictly better or tie
                     if trial_modularity > new_modularity:
@@ -257,8 +305,7 @@ def naive_greedy_modularity_communities(G):
             communities[j] = u | v
             communities[i] = frozenset([])
     # Remove empty communities and sort
-    communities = [c for c in communities if len(c) > 0]
-    yield from sorted(communities, key=lambda x: len(x), reverse=True)
+    return sorted((c for c in communities if len(c) > 0), key=len, reverse=True)
 
 
 # old name
diff --git a/networkx/algorithms/community/quality.py b/networkx/algorithms/community/quality.py
index 8db1c960..470db9ee 100644
--- a/networkx/algorithms/community/quality.py
+++ b/networkx/algorithms/community/quality.py
@@ -244,31 +244,38 @@ def coverage(G, partition):
     return intra_edges / total_edges
 
 
-def modularity(G, communities, weight="weight"):
+def modularity(G, communities, weight="weight", resolution=1):
     r"""Returns the modularity of the given partition of the graph.
 
     Modularity is defined in [1]_ as
 
     .. math::
-
-        Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \frac{k_ik_j}{2m}\right)
+        Q = \frac{1}{2m} \sum_{ij} \left( A_{ij} - \gamma\frac{k_ik_j}{2m}\right)
             \delta(c_i,c_j)
 
-    where $m$ is the number of edges, $A$ is the adjacency matrix of
-    `G`, $k_i$ is the degree of $i$ and $\delta(c_i, c_j)$
-    is 1 if $i$ and $j$ are in the same community and 0 otherwise.
+    where $m$ is the number of edges, $A$ is the adjacency matrix of `G`,
+    $k_i$ is the degree of $i$, $\gamma$ is the resolution parameter,
+    and $\delta(c_i, c_j)$ is 1 if $i$ and $j$ are in the same community else 0.
 
     According to [2]_ (and verified by some algebra) this can be reduced to
 
     .. math::
        Q = \sum_{c=1}^{n}
-       \left[ \frac{L_c}{m} - \left( \frac{k_c}{2m} \right) ^2 \right]
+       \left[ \frac{L_c}{m} - \gamma\left( \frac{k_c}{2m} \right) ^2 \right]
 
     where the sum iterates over all communities $c$, $m$ is the number of edges,
     $L_c$ is the number of intra-community links for community $c$,
-    $k_c$ is the sum of degrees of the nodes in community $c$.
+    $k_c$ is the sum of degrees of the nodes in community $c$,
+    and $\gamma$ is the resolution parameter.
+
+    The resolution parameter sets an arbitrary tradeoff between intra-group
+    edges and inter-group edges. More complex grouping patterns can be
+    discovered by analyzing the same network with multiple values of gamma
+    and then combining the results [3]_. That said, it is very common to
+    simply use gamma=1. More on the choice of gamma is in [4]_.
 
     The second formula is the one actually used in calculation of the modularity.
+    For directed graphs the second formula replaces $k_c$ with $k^{in}_c k^{out}_c$.
 
     Parameters
     ----------
@@ -282,6 +289,10 @@ def modularity(G, communities, weight="weight"):
             as a weight. If None or an edge does not have that attribute,
             then that edge has weight 1.
 
+    resolution : float (default=1)
+        If resolution is less than 1, modularity favors larger communities.
+        Greater than 1 favors smaller communities.
+
     Returns
     -------
     Q : float
@@ -303,11 +314,17 @@ def modularity(G, communities, weight="weight"):
 
     References
     ----------
-    .. [1] M. E. J. Newman *Networks: An Introduction*, page 224.
+    .. [1] M. E. J. Newman "Networks: An Introduction", page 224.
        Oxford University Press, 2011.
     .. [2] Clauset, Aaron, Mark EJ Newman, and Cristopher Moore.
        "Finding community structure in very large networks."
-       Physical review E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
+       Phys. Rev. E 70.6 (2004). <https://arxiv.org/abs/cond-mat/0408187>
+    .. [3] Reichardt and Bornholdt "Statistical Mechanics of Community Detection"
+       Phys. Rev. E 74, 016110, 2006. https://doi.org/10.1103/PhysRevE.74.016110
+    .. [4] M. E. J. Newman, "Equivalence between modularity optimization and
+       maximum likelihood methods for community detection"
+       Phys. Rev. E 94, 052315, 2016. https://doi.org/10.1103/PhysRevE.94.052315
+
     """
     if not isinstance(communities, list):
         communities = list(communities)
@@ -333,7 +350,7 @@ def modularity(G, communities, weight="weight"):
         out_degree_sum = sum(out_degree[u] for u in comm)
         in_degree_sum = sum(in_degree[u] for u in comm) if directed else out_degree_sum
 
-        return L_c / m - out_degree_sum * in_degree_sum * norm
+        return L_c / m - resolution * out_degree_sum * in_degree_sum * norm
 
     return sum(map(community_contribution, communities))
 
diff --git a/networkx/algorithms/community/tests/test_modularity_max.py b/networkx/algorithms/community/tests/test_modularity_max.py
index ba80ef94..c6fe3fa2 100644
--- a/networkx/algorithms/community/tests/test_modularity_max.py
+++ b/networkx/algorithms/community/tests/test_modularity_max.py
@@ -1,39 +1,43 @@
+import pytest
+
 import networkx as nx
 from networkx.algorithms.community import (
     greedy_modularity_communities,
+    modularity,
     naive_greedy_modularity_communities,
 )
 
 
-class TestCNM:
-    def setup(self):
-        self.G = nx.karate_club_graph()
+@pytest.mark.parametrize(
+    "func", (greedy_modularity_communities, naive_greedy_modularity_communities)
+)
+def test_modularity_communities(func):
+    G = nx.karate_club_graph()
+
+    john_a = frozenset(
+        [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
+    )
+    mr_hi = frozenset([0, 4, 5, 6, 10, 11, 16, 19])
+    overlap = frozenset([1, 2, 3, 7, 9, 12, 13, 17, 21])
+    expected = {john_a, overlap, mr_hi}
 
-    def _check_communities(self, expected):
-        communities = set(greedy_modularity_communities(self.G))
-        assert communities == expected
+    assert set(func(G)) == expected
 
-    def test_karate_club(self):
-        john_a = frozenset(
-            [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
-        )
-        mr_hi = frozenset([0, 4, 5, 6, 10, 11, 16, 19])
-        overlap = frozenset([1, 2, 3, 7, 9, 12, 13, 17, 21])
-        self._check_communities({john_a, overlap, mr_hi})
 
+def test_resolution_parameter_impact():
+    G = nx.barbell_graph(5, 3)
 
-class TestNaive:
-    def setup(self):
-        self.G = nx.karate_club_graph()
+    gamma = 1
+    expected = [frozenset(range(5)), frozenset(range(8, 13)), frozenset(range(5, 8))]
+    assert greedy_modularity_communities(G, resolution=gamma) == expected
+    assert naive_greedy_modularity_communities(G, resolution=gamma) == expected
 
-    def _check_communities(self, expected):
-        communities = set(naive_greedy_modularity_communities(self.G))
-        assert communities == expected
+    gamma = 2.5
+    expected = [{0, 1, 2, 3}, {9, 10, 11, 12}, {5, 6, 7}, {4}, {8}]
+    assert greedy_modularity_communities(G, resolution=gamma) == expected
+    assert naive_greedy_modularity_communities(G, resolution=gamma) == expected
 
-    def test_karate_club(self):
-        john_a = frozenset(
-            [8, 14, 15, 18, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33]
-        )
-        mr_hi = frozenset([0, 4, 5, 6, 10, 11, 16, 19])
-        overlap = frozenset([1, 2, 3, 7, 9, 12, 13, 17, 21])
-        self._check_communities({john_a, overlap, mr_hi})
+    gamma = 0.3
+    expected = [frozenset(range(8)), frozenset(range(8, 13))]
+    assert greedy_modularity_communities(G, resolution=gamma) == expected
+    assert naive_greedy_modularity_communities(G, resolution=gamma) == expected
diff --git a/networkx/algorithms/community/tests/test_quality.py b/networkx/algorithms/community/tests/test_quality.py
index a28f99e4..79d60e7f 100644
--- a/networkx/algorithms/community/tests/test_quality.py
+++ b/networkx/algorithms/community/tests/test_quality.py
@@ -2,6 +2,7 @@
 module.
 
 """
+import pytest
 
 import networkx as nx
 from networkx import barbell_graph
@@ -73,6 +74,64 @@ def test_modularity():
     assert almost_equal(2 / 9, modularity(G, C))
 
 
+def test_modularity_resolution():
+    G = nx.barbell_graph(3, 0)
+    C = [{0, 1, 4}, {2, 3, 5}]
+    assert modularity(G, C) == pytest.approx(3 / 7 - 100 / 14 ** 2)
+    gamma = 2
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx(3 / 7 - gamma * 100 / 14 ** 2)
+    gamma = 0.2
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx(3 / 7 - gamma * 100 / 14 ** 2)
+
+    C = [{0, 1, 2}, {3, 4, 5}]
+    assert modularity(G, C) == pytest.approx(6 / 7 - 98 / 14 ** 2)
+    gamma = 2
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx(6 / 7 - gamma * 98 / 14 ** 2)
+    gamma = 0.2
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx(6 / 7 - gamma * 98 / 14 ** 2)
+
+    G = nx.barbell_graph(5, 3)
+    C = [frozenset(range(5)), frozenset(range(8, 13)), frozenset(range(5, 8))]
+    gamma = 1
+    result = modularity(G, C, resolution=gamma)
+    # This C is maximal for gamma=1:  modularity = 0.518229
+    assert result == pytest.approx((22 / 24) - gamma * (918 / (48 ** 2)))
+    gamma = 2
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx((22 / 24) - gamma * (918 / (48 ** 2)))
+    gamma = 0.2
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx((22 / 24) - gamma * (918 / (48 ** 2)))
+
+    C = [{0, 1, 2, 3}, {9, 10, 11, 12}, {5, 6, 7}, {4}, {8}]
+    gamma = 1
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx((14 / 24) - gamma * (598 / (48 ** 2)))
+    gamma = 2.5
+    result = modularity(G, C, resolution=gamma)
+    # This C is maximal for gamma=2.5:  modularity = -0.06553819
+    assert result == pytest.approx((14 / 24) - gamma * (598 / (48 ** 2)))
+    gamma = 0.2
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx((14 / 24) - gamma * (598 / (48 ** 2)))
+
+    C = [frozenset(range(8)), frozenset(range(8, 13))]
+    gamma = 1
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx((23 / 24) - gamma * (1170 / (48 ** 2)))
+    gamma = 2
+    result = modularity(G, C, resolution=gamma)
+    assert result == pytest.approx((23 / 24) - gamma * (1170 / (48 ** 2)))
+    gamma = 0.3
+    result = modularity(G, C, resolution=gamma)
+    # This C is maximal for gamma=0.3:  modularity = 0.805990
+    assert result == pytest.approx((23 / 24) - gamma * (1170 / (48 ** 2)))
+
+
 def test_inter_community_edges_with_digraphs():
     G = nx.complete_graph(2, create_using=nx.DiGraph())
     partition = [{0}, {1}]
author	Xiaoyan Lu <xiaoyan.lu.xl@gmail.com>	2021-02-23 23:05:50 -0500
committer	GitHub <noreply@github.com>	2021-02-23 23:05:50 -0500
commit	e4d1483f241e9a56fbda247592710b659d29bce5 (patch)
tree	8a6f67845492cfe96e66ae18efffebb3f9dfc973
parent	5d10139b38c93adaff5872e4e1e4e9892eb2118b (diff)
download	networkx-e4d1483f241e9a56fbda247592710b659d29bce5.tar.gz