diff options
author | Yuto Yamaguchi <yamaguchiyuto@users.noreply.github.com> | 2018-01-28 22:12:28 +0900 |
---|---|---|
committer | Dan Schult <dschult@colgate.edu> | 2018-01-28 08:12:28 -0500 |
commit | 39866aa66e8ae7999348f35d75616b76a9d453ba (patch) | |
tree | dece099c0e780b51fc4c94a6238283e01add3607 | |
parent | b22da6106736394fa7624c016c8734c960bf0699 (diff) | |
download | networkx-39866aa66e8ae7999348f35d75616b76a9d453ba.tar.gz |
Implement a module for node classification (#2848)
* Implement a module for node classification
* Fix tests
* Put helper functions inside the public function.
* Rename modules
* Fix examples
* Try imports
* Add a doc for node classification
* Add one test case
* Add skip annotation.
* Follow PEP8
* Follow PEP8 in test code
* Fix doctest violation
-rw-r--r-- | doc/reference/algorithms/node_classification.rst | 23 | ||||
-rw-r--r-- | networkx/algorithms/__init__.py | 1 | ||||
-rw-r--r-- | networkx/algorithms/node_classification/__init__.py | 23 | ||||
-rw-r--r-- | networkx/algorithms/node_classification/hmn.py | 160 | ||||
-rw-r--r-- | networkx/algorithms/node_classification/lgc.py | 169 | ||||
-rw-r--r-- | networkx/algorithms/node_classification/tests/test_harmonic_function.py | 92 | ||||
-rw-r--r-- | networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py | 80 | ||||
-rw-r--r-- | networkx/algorithms/node_classification/utils.py | 102 | ||||
-rw-r--r-- | setup.py | 2 |
9 files changed, 652 insertions, 0 deletions
diff --git a/doc/reference/algorithms/node_classification.rst b/doc/reference/algorithms/node_classification.rst new file mode 100644 index 00000000..f8afa4df --- /dev/null +++ b/doc/reference/algorithms/node_classification.rst @@ -0,0 +1,23 @@ +*********** +Node Classification +*********** + +.. automodule:: networkx.algorithms.node_classification +.. currentmodule:: networkx + +Harmonic Function (HMN) +------------ +.. automodule:: networkx.algorithms.community.hmn +.. autosummary:: + :toctree: generated/ + + harmonic_function + + +Local and Global Consistency (LGC) +------------ +.. automodule:: networkx.algorithms.community.lgc +.. autosummary:: + :toctree: generated/ + + local_and_global_consistency diff --git a/networkx/algorithms/__init__.py b/networkx/algorithms/__init__.py index c77e966e..fa2776eb 100644 --- a/networkx/algorithms/__init__.py +++ b/networkx/algorithms/__init__.py @@ -50,6 +50,7 @@ from networkx.algorithms.wiener import * # the `networkx` namespace. import networkx.algorithms.assortativity import networkx.algorithms.bipartite +import networkx.algorithms.node_classification import networkx.algorithms.centrality import networkx.algorithms.chordal import networkx.algorithms.cluster diff --git a/networkx/algorithms/node_classification/__init__.py b/networkx/algorithms/node_classification/__init__.py new file mode 100644 index 00000000..f1db37da --- /dev/null +++ b/networkx/algorithms/node_classification/__init__.py @@ -0,0 +1,23 @@ +""" This module provides the functions for node classification problem. + +The functions in this module are not imported +into the top level `networkx` namespace. +You can access these functions by importing +the `networkx.algorithms.node_classification` modules, +then accessing the functions as attributes of `node_classification`. +For example: + + >>> import networkx as nx + >>> from networkx.algorithms import node_classification + >>> G = nx.path_graph(4) + >>> G.edges() + EdgeView([(0, 1), (1, 2), (2, 3)]) + >>> G.node[0]['label'] = 'A' + >>> G.node[3]['label'] = 'B' + >>> node_classification.harmonic_function(G) # doctest: +SKIP + ['A', 'A', 'B', 'B'] + +""" + +from networkx.algorithms.node_classification.hmn import * +from networkx.algorithms.node_classification.lgc import * diff --git a/networkx/algorithms/node_classification/hmn.py b/networkx/algorithms/node_classification/hmn.py new file mode 100644 index 00000000..30da956e --- /dev/null +++ b/networkx/algorithms/node_classification/hmn.py @@ -0,0 +1,160 @@ +# -*- coding: utf-8 -*- +# +# Author: Yuto Yamaguchi <yuto.ymgc@gmail.com> + +"""Function for computing Harmonic function algorithm by Zhu et al. + +References +---------- +Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August). +Semi-supervised learning using gaussian fields and harmonic functions. +In ICML (Vol. 3, pp. 912-919). +""" + + +import networkx as nx + +from networkx.utils.decorators import not_implemented_for +from networkx.algorithms.node_classification.utils import ( + _get_label_info, + _init_label_matrix, + _propagate, + _predict, +) + +__all__ = ['harmonic_function'] + + +@not_implemented_for('directed') +def harmonic_function(G, max_iter=30, label_name='label'): + """Node classification by Harmonic function + + Parameters + ---------- + G : NetworkX Graph + max_iter : int + maximum number of iterations allowed + label_name : string + name of target labels to predict + + Raises + ---------- + `NetworkXError` if no nodes on `G` has `label_name`. + + Returns + ---------- + predicted : array, shape = [n_samples] + Array of predicted labels + + Examples + -------- + >>> from networkx.algorithms import node_classification + >>> G = nx.path_graph(4) + >>> G.node[0]['label'] = 'A' + >>> G.node[3]['label'] = 'B' + >>> G.nodes(data=True) + NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) + >>> G.edges() + EdgeView([(0, 1), (1, 2), (2, 3)]) + >>> predicted = node_classification.harmonic_function(G) + >>> predicted + ['A', 'A', 'B', 'B'] + + References + ---------- + Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August). + Semi-supervised learning using gaussian fields and harmonic functions. + In ICML (Vol. 3, pp. 912-919). + """ + try: + import numpy as np + except ImportError: + raise ImportError( + "harmonic_function() requires numpy: http://scipy.org/ ") + try: + from scipy import sparse + except ImportError: + raise ImportError( + "harmonic_function() requires scipy: http://scipy.org/ ") + + def _build_propagation_matrix(X, labels): + """Build propagation matrix of Harmonic function + + Parameters + ---------- + X : scipy sparse matrix, shape = [n_samples, n_samples] + Adjacency matrix + labels : array, shape = [n_samples, 2] + Array of pairs of node id and label id + + Returns + ---------- + P : scipy sparse matrix, shape = [n_samples, n_samples] + Propagation matrix + + """ + degrees = X.sum(axis=0).A[0] + degrees[degrees == 0] = 1 # Avoid division by 0 + D = sparse.diags((1.0 / degrees), offsets=0) + P = D.dot(X).tolil() + P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes + return P + + def _build_base_matrix(X, labels, n_classes): + """Build base matrix of Harmonic function + + Parameters + ---------- + X : scipy sparse matrix, shape = [n_samples, n_samples] + Adjacency matrix + labels : array, shape = [n_samples, 2] + Array of pairs of node id and label id + n_classes : integer + The number of classes (distinct labels) on the input graph + + Returns + ---------- + B : array, shape = [n_samples, n_classes] + Base matrix + """ + n_samples = X.shape[0] + B = np.zeros((n_samples, n_classes)) + B[labels[:, 0], labels[:, 1]] = 1 + return B + + X = nx.to_scipy_sparse_matrix(G) # adjacency matrix + labels, label_dict = _get_label_info(G, label_name) + + if labels.shape[0] == 0: + raise nx.NetworkXError( + "No node on the input graph is labeled by '" + label_name + "'.") + + n_samples = X.shape[0] + n_classes = label_dict.shape[0] + + F = _init_label_matrix(n_samples, n_classes) + + P = _build_propagation_matrix(X, labels) + B = _build_base_matrix(X, labels, n_classes) + + remaining_iter = max_iter + while remaining_iter > 0: + F = _propagate(P, F, B) + remaining_iter -= 1 + + predicted = _predict(F, label_dict) + + return predicted + + +def setup_module(module): + """Fixture for nose tests.""" + from nose import SkipTest + try: + import numpy + except ImportError: + raise SkipTest("NumPy not available") + try: + import scipy + except ImportError: + raise SkipTest("SciPy not available") diff --git a/networkx/algorithms/node_classification/lgc.py b/networkx/algorithms/node_classification/lgc.py new file mode 100644 index 00000000..250f7940 --- /dev/null +++ b/networkx/algorithms/node_classification/lgc.py @@ -0,0 +1,169 @@ +# -*- coding: utf-8 -*- +# +# Author: Yuto Yamaguchi <yuto.ymgc@gmail.com> + +"""Function for computing Local and global consistency algorithm by Zhou et al. + +References +---------- +Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004). +Learning with local and global consistency. +Advances in neural information processing systems, 16(16), 321-328. +""" + +import networkx as nx + +from networkx.utils.decorators import not_implemented_for +from networkx.algorithms.node_classification.utils import ( + _get_label_info, + _init_label_matrix, + _propagate, + _predict, +) + +__all__ = ['local_and_global_consistency'] + + +@not_implemented_for('directed') +def local_and_global_consistency(G, alpha=0.99, + max_iter=30, + label_name='label'): + """Node classification by Local and Global Consistency + + Parameters + ---------- + G : NetworkX Graph + alpha : float + Clamping factor + max_iter : int + Maximum number of iterations allowed + label_name : string + Name of target labels to predict + + Raises + ---------- + `NetworkXError` if no nodes on `G` has `label_name`. + + Returns + ---------- + predicted : array, shape = [n_samples] + Array of predicted labels + + Examples + -------- + >>> from networkx.algorithms import node_classification + >>> G = nx.path_graph(4) + >>> G.node[0]['label'] = 'A' + >>> G.node[3]['label'] = 'B' + >>> G.nodes(data=True) + NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}}) + >>> G.edges() + EdgeView([(0, 1), (1, 2), (2, 3)]) + >>> predicted = node_classification.local_and_global_consistency(G) + >>> predicted + ['A', 'A', 'B', 'B'] + + + References + ---------- + Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004). + Learning with local and global consistency. + Advances in neural information processing systems, 16(16), 321-328. + """ + try: + import numpy as np + except ImportError: + raise ImportError( + "local_and_global_consistency() requires numpy: ", + "http://scipy.org/ ") + try: + from scipy import sparse + except ImportError: + raise ImportError( + "local_and_global_consistensy() requires scipy: ", + "http://scipy.org/ ") + + def _build_propagation_matrix(X, labels, alpha): + """Build propagation matrix of Local and global consistency + + Parameters + ---------- + X : scipy sparse matrix, shape = [n_samples, n_samples] + Adjacency matrix + labels : array, shape = [n_samples, 2] + Array of pairs of node id and label id + alpha : float + Clamping factor + + Returns + ---------- + S : scipy sparse matrix, shape = [n_samples, n_samples] + Propagation matrix + + """ + degrees = X.sum(axis=0).A[0] + degrees[degrees == 0] = 1 # Avoid division by 0 + D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0)) + S = alpha * D2.dot(X).dot(D2) + return S + + def _build_base_matrix(X, labels, alpha, n_classes): + """Build base matrix of Local and global consistency + + Parameters + ---------- + X : scipy sparse matrix, shape = [n_samples, n_samples] + Adjacency matrix + labels : array, shape = [n_samples, 2] + Array of pairs of node id and label id + alpha : float + Clamping factor + n_classes : integer + The number of classes (distinct labels) on the input graph + + Returns + ---------- + B : array, shape = [n_samples, n_classes] + Base matrix + """ + + n_samples = X.shape[0] + B = np.zeros((n_samples, n_classes)) + B[labels[:, 0], labels[:, 1]] = 1 - alpha + return B + + X = nx.to_scipy_sparse_matrix(G) # adjacency matrix + labels, label_dict = _get_label_info(G, label_name) + + if labels.shape[0] == 0: + raise nx.NetworkXError( + "No node on the input graph is labeled by '" + label_name + "'.") + + n_samples = X.shape[0] + n_classes = label_dict.shape[0] + F = _init_label_matrix(n_samples, n_classes) + + P = _build_propagation_matrix(X, labels, alpha) + B = _build_base_matrix(X, labels, alpha, n_classes) + + remaining_iter = max_iter + while remaining_iter > 0: + F = _propagate(P, F, B) + remaining_iter -= 1 + + predicted = _predict(F, label_dict) + + return predicted + + +def setup_module(module): + """Fixture for nose tests.""" + from nose import SkipTest + try: + import numpy + except ImportError: + raise SkipTest("NumPy not available") + try: + import scipy + except ImportError: + raise SkipTest("SciPy not available") diff --git a/networkx/algorithms/node_classification/tests/test_harmonic_function.py b/networkx/algorithms/node_classification/tests/test_harmonic_function.py new file mode 100644 index 00000000..84b62109 --- /dev/null +++ b/networkx/algorithms/node_classification/tests/test_harmonic_function.py @@ -0,0 +1,92 @@ +#!/usr/bin/env python +from nose.tools import * +from nose import SkipTest +import networkx as nx +from networkx.algorithms import node_classification + + +class TestHarmonicFunction: + + @classmethod + def setupClass(cls): + global numpy + global scipy + try: + import numpy + except ImportError: + raise SkipTest('NumPy not available.') + try: + import scipy + except ImportError: + raise SkipTest('SciPy not available.') + + def test_path_graph(self): + G = nx.path_graph(4) + label_name = 'label' + G.node[0][label_name] = 'A' + G.node[3][label_name] = 'B' + predicted = node_classification.harmonic_function( + G, label_name=label_name) + assert_equal(predicted[0], 'A') + assert_equal(predicted[1], 'A') + assert_equal(predicted[2], 'B') + assert_equal(predicted[3], 'B') + + @raises(nx.NetworkXError) + def test_no_labels(self): + G = nx.path_graph(4) + node_classification.harmonic_function(G) + + @raises(nx.NetworkXError) + def test_no_nodes(self): + G = nx.Graph() + node_classification.harmonic_function(G) + + @raises(nx.NetworkXError) + def test_no_edges(self): + G = nx.Graph() + G.add_node(1) + G.add_node(2) + node_classification.harmonic_function(G) + + @raises(nx.NetworkXNotImplemented) + def test_digraph(self): + G = nx.DiGraph() + G.add_edge(0, 1) + G.add_edge(1, 2) + G.add_edge(2, 3) + label_name = 'label' + G.node[0][label_name] = 'A' + G.node[3][label_name] = 'B' + node_classification.harmonic_function(G) + + def test_one_labeled_node(self): + G = nx.path_graph(4) + label_name = 'label' + G.node[0][label_name] = 'A' + predicted = node_classification.harmonic_function( + G, label_name=label_name) + assert_equal(predicted[0], 'A') + assert_equal(predicted[1], 'A') + assert_equal(predicted[2], 'A') + assert_equal(predicted[3], 'A') + + def test_nodes_all_labeled(self): + G = nx.karate_club_graph() + label_name = 'club' + predicted = node_classification.harmonic_function( + G, label_name=label_name) + for i in range(len(G)): + assert_equal(predicted[i], G.node[i][label_name]) + + def test_labeled_nodes_are_not_changed(self): + G = nx.karate_club_graph() + label_name = 'club' + label_removed = set([0, 1, 2, 3, 4, 5, 6, 7]) + for i in label_removed: + del G.node[i][label_name] + predicted = node_classification.harmonic_function( + G, label_name=label_name) + label_not_removed = set(list(range(len(G)))) - label_removed + for i in label_not_removed: + assert_equal(predicted[i], G.node[i][label_name]) diff --git a/networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py b/networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py new file mode 100644 index 00000000..3a64fbe7 --- /dev/null +++ b/networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python +from nose.tools import * +from nose import SkipTest +import networkx as nx +from networkx.algorithms import node_classification + + +class TestLocalAndGlobalConsistency: + + @classmethod + def setupClass(cls): + global numpy + global scipy + try: + import numpy + except ImportError: + raise SkipTest('NumPy not available.') + try: + import scipy + except ImportError: + raise SkipTest('SciPy not available.') + + def test_path_graph(self): + G = nx.path_graph(4) + label_name = 'label' + G.node[0][label_name] = 'A' + G.node[3][label_name] = 'B' + predicted = node_classification.local_and_global_consistency( + G, label_name=label_name) + assert_equal(predicted[0], 'A') + assert_equal(predicted[1], 'A') + assert_equal(predicted[2], 'B') + assert_equal(predicted[3], 'B') + + @raises(nx.NetworkXError) + def test_no_labels(self): + G = nx.path_graph(4) + node_classification.local_and_global_consistency(G) + + @raises(nx.NetworkXError) + def test_no_nodes(self): + G = nx.Graph() + node_classification.local_and_global_consistency(G) + + @raises(nx.NetworkXError) + def test_no_edges(self): + G = nx.Graph() + G.add_node(1) + G.add_node(2) + node_classification.local_and_global_consistency(G) + + @raises(nx.NetworkXNotImplemented) + def test_digraph(self): + G = nx.DiGraph() + G.add_edge(0, 1) + G.add_edge(1, 2) + G.add_edge(2, 3) + label_name = 'label' + G.node[0][label_name] = 'A' + G.node[3][label_name] = 'B' + node_classification.harmonic_function(G) + + def test_one_labeled_node(self): + G = nx.path_graph(4) + label_name = 'label' + G.node[0][label_name] = 'A' + predicted = node_classification.local_and_global_consistency( + G, label_name=label_name) + assert_equal(predicted[0], 'A') + assert_equal(predicted[1], 'A') + assert_equal(predicted[2], 'A') + assert_equal(predicted[3], 'A') + + def test_nodes_all_labeled(self): + G = nx.karate_club_graph() + label_name = 'club' + predicted = node_classification.local_and_global_consistency( + G, alpha=0, label_name=label_name) + for i in range(len(G)): + assert_equal(predicted[i], G.node[i][label_name]) diff --git a/networkx/algorithms/node_classification/utils.py b/networkx/algorithms/node_classification/utils.py new file mode 100644 index 00000000..39d5d3cd --- /dev/null +++ b/networkx/algorithms/node_classification/utils.py @@ -0,0 +1,102 @@ +# -*- coding: utf-8 -*- +# +# Author: Yuto Yamaguchi <yuto.ymgc@gmail.com> + +def _propagate(P, F, B): + """Propagate labels by one step + + Parameters + ---------- + P : scipy sparse matrix, shape = [n_samples, n_samples] + Propagation matrix + F : numpy array, shape = [n_samples, n_classes] + Label matrix + B : numpy array, shape = [n_samples, n_classes] + Base matrix + + Returns + ---------- + F_new : array, shape = [n_samples, n_classes] + Label matrix + """ + F_new = P.dot(F) + B + return F_new + + +def _get_label_info(G, label_name): + """Get and return information of labels from the input graph + + Parameters + ---------- + G : Network X graph + label_name : string + Name of the target label + + Returns + ---------- + labels : numpy array, shape = [n_labeled_samples, 2] + Array of pairs of labeled node ID and label ID + label_dict : numpy array, shape = [n_classes] + Array of labels + i-th element contains the label corresponding label ID `i` + """ + import numpy as np + + labels = [] + label_to_id = {} + lid = 0 + for i, n in enumerate(G.nodes(data=True)): + if label_name in n[1]: + label = n[1][label_name] + if label not in label_to_id: + label_to_id[label] = lid + lid += 1 + labels.append([i, label_to_id[label]]) + labels = np.array(labels) + label_dict = np.array([label for label, _ in sorted( + label_to_id.items(), key=lambda x:x[1])]) + return (labels, label_dict) + + +def _init_label_matrix(n_samples, n_classes): + """Create and return zero matrix + + Parameters + ---------- + n_samples : integer + The number of nodes (samples) on the input graph + n_classes : integer + The number of classes (distinct labels) on the input graph + + Returns + ---------- + F : numpy array, shape = [n_samples, n_classes] + Label matrix + """ + import numpy as np + + F = np.zeros((n_samples, n_classes)) + return F + + +def _predict(F, label_dict): + """Predict labels by learnt label matrix + + Parameters + ---------- + F : numpy array, shape = [n_samples, n_classes] + Learnt (resulting) label matrix + label_dict : numpy array, shape = [n_classes] + Array of labels + i-th element contains the label corresponding label ID `i` + + Returns + ---------- + predicted : numpy array, shape = [n_samples] + Array of predicted labels + """ + import numpy as np + + predicted_label_ids = np.argmax(F, axis=1) + predicted = label_dict[predicted_label_ids].tolist() + return predicted @@ -34,6 +34,7 @@ packages = ["networkx", "networkx.algorithms", "networkx.algorithms.assortativity", "networkx.algorithms.bipartite", + "networkx.algorithms.node_classification", "networkx.algorithms.centrality", "networkx.algorithms.community", "networkx.algorithms.components", @@ -85,6 +86,7 @@ package_data = { 'networkx.algorithms': ['tests/*.py'], 'networkx.algorithms.assortativity': ['tests/*.py'], 'networkx.algorithms.bipartite': ['tests/*.py'], + 'networkx.algorithms.node_classification': ['tests/*.py'], 'networkx.algorithms.centrality': ['tests/*.py'], 'networkx.algorithms.community': ['tests/*.py'], 'networkx.algorithms.components': ['tests/*.py'], |