diff options
author | Aric Hagberg <aric.hagberg+github@gmail.com> | 2016-01-08 16:00:55 -0700 |
---|---|---|
committer | Aric Hagberg <aric.hagberg+github@gmail.com> | 2016-01-08 16:00:55 -0700 |
commit | 529c6fd5b103b345a734f0d33f8d43a28f401498 (patch) | |
tree | 11b3defad2c70ddae43d929fededab02aed92ebb | |
parent | 1aee422ce836d4253401145026fcd3628cee3ad5 (diff) | |
parent | 801b25867b35b827bea31bcaefc7bc143f12426f (diff) | |
download | networkx-529c6fd5b103b345a734f0d33f8d43a28f401498.tar.gz |
Merge pull request #1726 from jfinkels/kernighan-lin
Adds Kernighan--Lin bipartition algorithm.
-rw-r--r-- | doc/source/reference/algorithms.community.rst | 8 | ||||
-rw-r--r-- | networkx/algorithms/community/__init__.py | 1 | ||||
-rw-r--r-- | networkx/algorithms/community/community_utils.py | 35 | ||||
-rw-r--r-- | networkx/algorithms/community/kernighan_lin.py | 167 | ||||
-rw-r--r-- | networkx/algorithms/community/tests/test_kernighan_lin.py | 51 |
5 files changed, 262 insertions, 0 deletions
diff --git a/doc/source/reference/algorithms.community.rst b/doc/source/reference/algorithms.community.rst index 28b27f8a..746f7c35 100644 --- a/doc/source/reference/algorithms.community.rst +++ b/doc/source/reference/algorithms.community.rst @@ -5,6 +5,14 @@ Communities .. automodule:: networkx.algorithms.community .. currentmodule:: networkx +Bipartitions +------------ +.. automodule:: networkx.algorithms.community.kernighan_lin +.. autosummary:: + :toctree: generated/ + + kernighan_lin_bisection + K-Clique -------- .. automodule:: networkx.algorithms.community.kclique diff --git a/networkx/algorithms/community/__init__.py b/networkx/algorithms/community/__init__.py index 6f29ff8c..5a6c2840 100644 --- a/networkx/algorithms/community/__init__.py +++ b/networkx/algorithms/community/__init__.py @@ -1,4 +1,5 @@ from networkx.algorithms.community.asyn_lpa import * from networkx.algorithms.community.centrality import * +from networkx.algorithms.community.kernighan_lin import * from networkx.algorithms.community.kclique import * from networkx.algorithms.community.quality import * diff --git a/networkx/algorithms/community/community_utils.py b/networkx/algorithms/community/community_utils.py new file mode 100644 index 00000000..6babff2d --- /dev/null +++ b/networkx/algorithms/community/community_utils.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +# +# utils.py - helper functions for community-finding algorithms +# +# Copyright 2011 Ben Edwards <bedwards@cs.unm.edu>. +# Copyright 2011 Aric Hagberg <hagberg@lanl.gov>. +# Copyright 2015 NetworkX developers. +# +# This file is part of NetworkX. +# +# NetworkX is distributed under a BSD license; see LICENSE.txt for more +# information. +"""Helper functions for community-finding algorithms.""" + + +def is_partition(G, communities): + """Return ``True`` if and only if ``communities`` is a partition of + the nodes of ``G``. + + A partition of a universe set is a family of pairwise disjoint sets + whose union is the entire universe set. + + ``G`` is a NetworkX graph. + + ``communities`` is an iterable of sets of nodes of ``G``. This + iterable will be consumed multiple times during the execution of + this function. + + """ + # Alternate implementation: + # + # return (len(G) == sum(len(c) for c in community) and + # set(G) == set.union(*community)) + # + return all(sum(1 if v in c else 0 for c in communities) == 1 for v in G) diff --git a/networkx/algorithms/community/kernighan_lin.py b/networkx/algorithms/community/kernighan_lin.py new file mode 100644 index 00000000..698fed00 --- /dev/null +++ b/networkx/algorithms/community/kernighan_lin.py @@ -0,0 +1,167 @@ +# -*- coding: utf-8 -*- +# +# kernighan_lin.py - Kernighan–Lin bipartition algorithm +# +# Copyright 2011 Ben Edwards <bedwards@cs.unm.edu>. +# Copyright 2011 Aric Hagberg <hagberg@lanl.gov>. +# Copyright 2015 NetworkX developers. +# +# This file is part of NetworkX. +# +# NetworkX is distributed under a BSD license; see LICENSE.txt for more +# information. +"""Functions for computing the Kernighan–Lin bipartition algorithm.""" +from __future__ import division + +from collections import defaultdict +from itertools import islice +from operator import itemgetter +import random + +import networkx as nx +from networkx.utils import not_implemented_for +from .community_utils import is_partition + + +def _compute_delta(G, A, B, weight): + # helper to compute initial swap deltas for a pass + delta = defaultdict(float) + for u, v, d in G.edges(data=True): + w = d.get(weight, 1) + if u in A: + if v in A: + delta[u] -= w + delta[v] -= w + elif v in B: + delta[u] += w + delta[v] += w + elif u in B: + if v in A: + delta[u] += w + delta[v] += w + elif v in B: + delta[u] -= w + delta[v] -= w + return delta + + +def _update_delta(delta, G, A, B, u, v, weight): + # helper to update swap deltas during single pass + for _, nbr, d in G.edges(u, data=True): + w = d.get(weight, 1) + if nbr in A: + delta[nbr] += 2 * w + if nbr in B: + delta[nbr] -= 2 * w + for _, nbr, d in G.edges(v, data=True): + w = d.get(weight, 1) + if nbr in A: + delta[nbr] -= 2 * w + if nbr in B: + delta[nbr] += 2 * w + return delta + + +def _kernighan_lin_pass(G, A, B, weight): + # do a single iteration of Kernighan–Lin algorithm + # returns list of (g_i,u_i,v_i) for i node pairs u_i,v_i + multigraph = G.is_multigraph() + delta = _compute_delta(G, A, B, weight) + swapped = set() + gains = [] + while len(swapped) < len(G): + gain = [] + for u in A - swapped: + for v in B - swapped: + try: + if multigraph: + w = sum(d.get(weight, 1) for d in G[u][v].values()) + else: + w = G[u][v].get(weight, 1) + except KeyError: + w = 0 + gain.append((delta[u] + delta[v] - 2 * w, u, v)) + if len(gain) == 0: + break + maxg, u, v = max(gain, key=itemgetter(0)) + swapped |= {u, v} + gains.append((maxg, u, v)) + delta = _update_delta(delta, G, A - swapped, B - swapped, u, v, weight) + return gains + + +@not_implemented_for('directed') +def kernighan_lin_bisection(G, partition=None, max_iter=10, weight='weight'): + """Partition a graph into two blocks using the Kernighan–Lin + algorithm. + + This algorithm paritions a network into two sets by iteratively + swapping pairs of nodes to reduce the edge cut between the two sets. + + Parameters + ---------- + G : graph + + partition : tuple + Pair of iterables containing an intial partition. If not + specified, a random balanced partition is used. + + max_iter : int + Maximum number of times to attempt swaps to find an + improvemement before giving up. + + weight : key + Edge data key to use as weight. If ``None``, the weights are all + set to one. + + Returns + ------- + partition : tuple + A pair of sets of nodes representing the bipartition. + + Raises + ------- + NetworkXError + If partition is not a valid partition of the nodes of the graph. + + References + ---------- + .. [1] Kernighan, B. W.; Lin, Shen (1970). + "An efficient heuristic procedure for partitioning graphs." + *Bell Systems Technical Journal* 49: 291--307. + Oxford University Press 2011. + + """ + # If no partition is provided, split the nodes randomly into a + # balanced partition. + if partition is None: + nodes = list(G) + random.shuffle(nodes) + h = len(nodes) // 2 + partition = (nodes[:h], nodes[h:]) + # Make a copy of the partition as a pair of sets. + try: + A, B = set(partition[0]), set(partition[1]) + except: + raise ValueError('partition must be two sets') + if not is_partition(G, (A, B)): + raise nx.NetworkXError('partition invalid') + for i in range(max_iter): + # `gains` is a list of triples of the form (g, u, v) for each + # node pair (u, v), where `g` is the gain of that node pair. + gains = _kernighan_lin_pass(G, A, B, weight) + csum = list(nx.utils.accumulate(g for g, u, v in gains)) + max_cgain = max(csum) + if max_cgain <= 0: + break + # Get the node pairs up to the index of the maximum cumulative + # gain, and collect each `u` into `anodes` and each `v` into + # `bnodes`, for each pair `(u, v)`. + index = csum.index(max_cgain) + nodesets = islice(zip(*gains[:index + 1]), 1, 3) + anodes, bnodes = (set(s) for s in nodesets) + A |= bnodes + A -= anodes + B |= anodes + B -= bnodes + return A, B diff --git a/networkx/algorithms/community/tests/test_kernighan_lin.py b/networkx/algorithms/community/tests/test_kernighan_lin.py new file mode 100644 index 00000000..3d56b81e --- /dev/null +++ b/networkx/algorithms/community/tests/test_kernighan_lin.py @@ -0,0 +1,51 @@ +# test_kernighan_lin.py - unit tests for Kernighan–Lin bipartition algorithm +# +# Copyright 2011 Ben Edwards <bedwards@cs.unm.edu>. +# Copyright 2011 Aric Hagberg <hagberg@lanl.gov>. +# Copyright 2015 NetworkX developers. +# +# This file is part of NetworkX. +# +# NetworkX is distributed under a BSD license; see LICENSE.txt for more +# information. +"""Unit tests for the :mod:`networkx.algorithms.community.kernighan_lin` +module. + +""" +from nose.tools import assert_equal +from nose.tools import raises + +import networkx as nx + + +def assert_partition_equal(x, y): + assert_equal(set(map(frozenset, x)), set(map(frozenset, y))) + + +def test_partition(): + G = nx.barbell_graph(3, 0) + C = nx.kernighan_lin_bisection(G) + assert_partition_equal(C, [{0, 1, 2}, {3, 4, 5}]) + + +@raises(nx.NetworkXError) +def test_non_disjoint_partition(): + G = nx.barbell_graph(3, 0) + partition = ({0, 1, 2}, {2, 3, 4, 5}) + nx.kernighan_lin_bisection(G, partition) + + +@raises(nx.NetworkXError) +def test_too_many_blocks(): + G = nx.barbell_graph(3, 0) + partition = ({0, 1}, {2}, {3, 4, 5}) + nx.kernighan_lin_bisection(G, partition) + + +def test_multigraph(): + G = nx.cycle_graph(4) + M = nx.MultiGraph(G.edges()) + M.add_edges_from(G.edges()) + M.remove_edge(1, 2) + A, B = nx.kernighan_lin_bisection(M) + assert_partition_equal([A, B], [{0, 1}, {2, 3}]) |