summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorYuto Yamaguchi <yamaguchiyuto@users.noreply.github.com>2018-01-28 22:12:28 +0900
committerDan Schult <dschult@colgate.edu>2018-01-28 08:12:28 -0500
commit39866aa66e8ae7999348f35d75616b76a9d453ba (patch)
treedece099c0e780b51fc4c94a6238283e01add3607
parentb22da6106736394fa7624c016c8734c960bf0699 (diff)
downloadnetworkx-39866aa66e8ae7999348f35d75616b76a9d453ba.tar.gz
Implement a module for node classification (#2848)
* Implement a module for node classification * Fix tests * Put helper functions inside the public function. * Rename modules * Fix examples * Try imports * Add a doc for node classification * Add one test case * Add skip annotation. * Follow PEP8 * Follow PEP8 in test code * Fix doctest violation
-rw-r--r--doc/reference/algorithms/node_classification.rst23
-rw-r--r--networkx/algorithms/__init__.py1
-rw-r--r--networkx/algorithms/node_classification/__init__.py23
-rw-r--r--networkx/algorithms/node_classification/hmn.py160
-rw-r--r--networkx/algorithms/node_classification/lgc.py169
-rw-r--r--networkx/algorithms/node_classification/tests/test_harmonic_function.py92
-rw-r--r--networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py80
-rw-r--r--networkx/algorithms/node_classification/utils.py102
-rw-r--r--setup.py2
9 files changed, 652 insertions, 0 deletions
diff --git a/doc/reference/algorithms/node_classification.rst b/doc/reference/algorithms/node_classification.rst
new file mode 100644
index 00000000..f8afa4df
--- /dev/null
+++ b/doc/reference/algorithms/node_classification.rst
@@ -0,0 +1,23 @@
+***********
+Node Classification
+***********
+
+.. automodule:: networkx.algorithms.node_classification
+.. currentmodule:: networkx
+
+Harmonic Function (HMN)
+------------
+.. automodule:: networkx.algorithms.community.hmn
+.. autosummary::
+ :toctree: generated/
+
+ harmonic_function
+
+
+Local and Global Consistency (LGC)
+------------
+.. automodule:: networkx.algorithms.community.lgc
+.. autosummary::
+ :toctree: generated/
+
+ local_and_global_consistency
diff --git a/networkx/algorithms/__init__.py b/networkx/algorithms/__init__.py
index c77e966e..fa2776eb 100644
--- a/networkx/algorithms/__init__.py
+++ b/networkx/algorithms/__init__.py
@@ -50,6 +50,7 @@ from networkx.algorithms.wiener import *
# the `networkx` namespace.
import networkx.algorithms.assortativity
import networkx.algorithms.bipartite
+import networkx.algorithms.node_classification
import networkx.algorithms.centrality
import networkx.algorithms.chordal
import networkx.algorithms.cluster
diff --git a/networkx/algorithms/node_classification/__init__.py b/networkx/algorithms/node_classification/__init__.py
new file mode 100644
index 00000000..f1db37da
--- /dev/null
+++ b/networkx/algorithms/node_classification/__init__.py
@@ -0,0 +1,23 @@
+""" This module provides the functions for node classification problem.
+
+The functions in this module are not imported
+into the top level `networkx` namespace.
+You can access these functions by importing
+the `networkx.algorithms.node_classification` modules,
+then accessing the functions as attributes of `node_classification`.
+For example:
+
+ >>> import networkx as nx
+ >>> from networkx.algorithms import node_classification
+ >>> G = nx.path_graph(4)
+ >>> G.edges()
+ EdgeView([(0, 1), (1, 2), (2, 3)])
+ >>> G.node[0]['label'] = 'A'
+ >>> G.node[3]['label'] = 'B'
+ >>> node_classification.harmonic_function(G) # doctest: +SKIP
+ ['A', 'A', 'B', 'B']
+
+"""
+
+from networkx.algorithms.node_classification.hmn import *
+from networkx.algorithms.node_classification.lgc import *
diff --git a/networkx/algorithms/node_classification/hmn.py b/networkx/algorithms/node_classification/hmn.py
new file mode 100644
index 00000000..30da956e
--- /dev/null
+++ b/networkx/algorithms/node_classification/hmn.py
@@ -0,0 +1,160 @@
+# -*- coding: utf-8 -*-
+#
+# Author: Yuto Yamaguchi <yuto.ymgc@gmail.com>
+
+"""Function for computing Harmonic function algorithm by Zhu et al.
+
+References
+----------
+Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
+Semi-supervised learning using gaussian fields and harmonic functions.
+In ICML (Vol. 3, pp. 912-919).
+"""
+
+
+import networkx as nx
+
+from networkx.utils.decorators import not_implemented_for
+from networkx.algorithms.node_classification.utils import (
+ _get_label_info,
+ _init_label_matrix,
+ _propagate,
+ _predict,
+)
+
+__all__ = ['harmonic_function']
+
+
+@not_implemented_for('directed')
+def harmonic_function(G, max_iter=30, label_name='label'):
+ """Node classification by Harmonic function
+
+ Parameters
+ ----------
+ G : NetworkX Graph
+ max_iter : int
+ maximum number of iterations allowed
+ label_name : string
+ name of target labels to predict
+
+ Raises
+ ----------
+ `NetworkXError` if no nodes on `G` has `label_name`.
+
+ Returns
+ ----------
+ predicted : array, shape = [n_samples]
+ Array of predicted labels
+
+ Examples
+ --------
+ >>> from networkx.algorithms import node_classification
+ >>> G = nx.path_graph(4)
+ >>> G.node[0]['label'] = 'A'
+ >>> G.node[3]['label'] = 'B'
+ >>> G.nodes(data=True)
+ NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
+ >>> G.edges()
+ EdgeView([(0, 1), (1, 2), (2, 3)])
+ >>> predicted = node_classification.harmonic_function(G)
+ >>> predicted
+ ['A', 'A', 'B', 'B']
+
+ References
+ ----------
+ Zhu, X., Ghahramani, Z., & Lafferty, J. (2003, August).
+ Semi-supervised learning using gaussian fields and harmonic functions.
+ In ICML (Vol. 3, pp. 912-919).
+ """
+ try:
+ import numpy as np
+ except ImportError:
+ raise ImportError(
+ "harmonic_function() requires numpy: http://scipy.org/ ")
+ try:
+ from scipy import sparse
+ except ImportError:
+ raise ImportError(
+ "harmonic_function() requires scipy: http://scipy.org/ ")
+
+ def _build_propagation_matrix(X, labels):
+ """Build propagation matrix of Harmonic function
+
+ Parameters
+ ----------
+ X : scipy sparse matrix, shape = [n_samples, n_samples]
+ Adjacency matrix
+ labels : array, shape = [n_samples, 2]
+ Array of pairs of node id and label id
+
+ Returns
+ ----------
+ P : scipy sparse matrix, shape = [n_samples, n_samples]
+ Propagation matrix
+
+ """
+ degrees = X.sum(axis=0).A[0]
+ degrees[degrees == 0] = 1 # Avoid division by 0
+ D = sparse.diags((1.0 / degrees), offsets=0)
+ P = D.dot(X).tolil()
+ P[labels[:, 0]] = 0 # labels[:, 0] indicates IDs of labeled nodes
+ return P
+
+ def _build_base_matrix(X, labels, n_classes):
+ """Build base matrix of Harmonic function
+
+ Parameters
+ ----------
+ X : scipy sparse matrix, shape = [n_samples, n_samples]
+ Adjacency matrix
+ labels : array, shape = [n_samples, 2]
+ Array of pairs of node id and label id
+ n_classes : integer
+ The number of classes (distinct labels) on the input graph
+
+ Returns
+ ----------
+ B : array, shape = [n_samples, n_classes]
+ Base matrix
+ """
+ n_samples = X.shape[0]
+ B = np.zeros((n_samples, n_classes))
+ B[labels[:, 0], labels[:, 1]] = 1
+ return B
+
+ X = nx.to_scipy_sparse_matrix(G) # adjacency matrix
+ labels, label_dict = _get_label_info(G, label_name)
+
+ if labels.shape[0] == 0:
+ raise nx.NetworkXError(
+ "No node on the input graph is labeled by '" + label_name + "'.")
+
+ n_samples = X.shape[0]
+ n_classes = label_dict.shape[0]
+
+ F = _init_label_matrix(n_samples, n_classes)
+
+ P = _build_propagation_matrix(X, labels)
+ B = _build_base_matrix(X, labels, n_classes)
+
+ remaining_iter = max_iter
+ while remaining_iter > 0:
+ F = _propagate(P, F, B)
+ remaining_iter -= 1
+
+ predicted = _predict(F, label_dict)
+
+ return predicted
+
+
+def setup_module(module):
+ """Fixture for nose tests."""
+ from nose import SkipTest
+ try:
+ import numpy
+ except ImportError:
+ raise SkipTest("NumPy not available")
+ try:
+ import scipy
+ except ImportError:
+ raise SkipTest("SciPy not available")
diff --git a/networkx/algorithms/node_classification/lgc.py b/networkx/algorithms/node_classification/lgc.py
new file mode 100644
index 00000000..250f7940
--- /dev/null
+++ b/networkx/algorithms/node_classification/lgc.py
@@ -0,0 +1,169 @@
+# -*- coding: utf-8 -*-
+#
+# Author: Yuto Yamaguchi <yuto.ymgc@gmail.com>
+
+"""Function for computing Local and global consistency algorithm by Zhou et al.
+
+References
+----------
+Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
+Learning with local and global consistency.
+Advances in neural information processing systems, 16(16), 321-328.
+"""
+
+import networkx as nx
+
+from networkx.utils.decorators import not_implemented_for
+from networkx.algorithms.node_classification.utils import (
+ _get_label_info,
+ _init_label_matrix,
+ _propagate,
+ _predict,
+)
+
+__all__ = ['local_and_global_consistency']
+
+
+@not_implemented_for('directed')
+def local_and_global_consistency(G, alpha=0.99,
+ max_iter=30,
+ label_name='label'):
+ """Node classification by Local and Global Consistency
+
+ Parameters
+ ----------
+ G : NetworkX Graph
+ alpha : float
+ Clamping factor
+ max_iter : int
+ Maximum number of iterations allowed
+ label_name : string
+ Name of target labels to predict
+
+ Raises
+ ----------
+ `NetworkXError` if no nodes on `G` has `label_name`.
+
+ Returns
+ ----------
+ predicted : array, shape = [n_samples]
+ Array of predicted labels
+
+ Examples
+ --------
+ >>> from networkx.algorithms import node_classification
+ >>> G = nx.path_graph(4)
+ >>> G.node[0]['label'] = 'A'
+ >>> G.node[3]['label'] = 'B'
+ >>> G.nodes(data=True)
+ NodeDataView({0: {'label': 'A'}, 1: {}, 2: {}, 3: {'label': 'B'}})
+ >>> G.edges()
+ EdgeView([(0, 1), (1, 2), (2, 3)])
+ >>> predicted = node_classification.local_and_global_consistency(G)
+ >>> predicted
+ ['A', 'A', 'B', 'B']
+
+
+ References
+ ----------
+ Zhou, D., Bousquet, O., Lal, T. N., Weston, J., & Schölkopf, B. (2004).
+ Learning with local and global consistency.
+ Advances in neural information processing systems, 16(16), 321-328.
+ """
+ try:
+ import numpy as np
+ except ImportError:
+ raise ImportError(
+ "local_and_global_consistency() requires numpy: ",
+ "http://scipy.org/ ")
+ try:
+ from scipy import sparse
+ except ImportError:
+ raise ImportError(
+ "local_and_global_consistensy() requires scipy: ",
+ "http://scipy.org/ ")
+
+ def _build_propagation_matrix(X, labels, alpha):
+ """Build propagation matrix of Local and global consistency
+
+ Parameters
+ ----------
+ X : scipy sparse matrix, shape = [n_samples, n_samples]
+ Adjacency matrix
+ labels : array, shape = [n_samples, 2]
+ Array of pairs of node id and label id
+ alpha : float
+ Clamping factor
+
+ Returns
+ ----------
+ S : scipy sparse matrix, shape = [n_samples, n_samples]
+ Propagation matrix
+
+ """
+ degrees = X.sum(axis=0).A[0]
+ degrees[degrees == 0] = 1 # Avoid division by 0
+ D2 = np.sqrt(sparse.diags((1.0 / degrees), offsets=0))
+ S = alpha * D2.dot(X).dot(D2)
+ return S
+
+ def _build_base_matrix(X, labels, alpha, n_classes):
+ """Build base matrix of Local and global consistency
+
+ Parameters
+ ----------
+ X : scipy sparse matrix, shape = [n_samples, n_samples]
+ Adjacency matrix
+ labels : array, shape = [n_samples, 2]
+ Array of pairs of node id and label id
+ alpha : float
+ Clamping factor
+ n_classes : integer
+ The number of classes (distinct labels) on the input graph
+
+ Returns
+ ----------
+ B : array, shape = [n_samples, n_classes]
+ Base matrix
+ """
+
+ n_samples = X.shape[0]
+ B = np.zeros((n_samples, n_classes))
+ B[labels[:, 0], labels[:, 1]] = 1 - alpha
+ return B
+
+ X = nx.to_scipy_sparse_matrix(G) # adjacency matrix
+ labels, label_dict = _get_label_info(G, label_name)
+
+ if labels.shape[0] == 0:
+ raise nx.NetworkXError(
+ "No node on the input graph is labeled by '" + label_name + "'.")
+
+ n_samples = X.shape[0]
+ n_classes = label_dict.shape[0]
+ F = _init_label_matrix(n_samples, n_classes)
+
+ P = _build_propagation_matrix(X, labels, alpha)
+ B = _build_base_matrix(X, labels, alpha, n_classes)
+
+ remaining_iter = max_iter
+ while remaining_iter > 0:
+ F = _propagate(P, F, B)
+ remaining_iter -= 1
+
+ predicted = _predict(F, label_dict)
+
+ return predicted
+
+
+def setup_module(module):
+ """Fixture for nose tests."""
+ from nose import SkipTest
+ try:
+ import numpy
+ except ImportError:
+ raise SkipTest("NumPy not available")
+ try:
+ import scipy
+ except ImportError:
+ raise SkipTest("SciPy not available")
diff --git a/networkx/algorithms/node_classification/tests/test_harmonic_function.py b/networkx/algorithms/node_classification/tests/test_harmonic_function.py
new file mode 100644
index 00000000..84b62109
--- /dev/null
+++ b/networkx/algorithms/node_classification/tests/test_harmonic_function.py
@@ -0,0 +1,92 @@
+#!/usr/bin/env python
+from nose.tools import *
+from nose import SkipTest
+import networkx as nx
+from networkx.algorithms import node_classification
+
+
+class TestHarmonicFunction:
+
+ @classmethod
+ def setupClass(cls):
+ global numpy
+ global scipy
+ try:
+ import numpy
+ except ImportError:
+ raise SkipTest('NumPy not available.')
+ try:
+ import scipy
+ except ImportError:
+ raise SkipTest('SciPy not available.')
+
+ def test_path_graph(self):
+ G = nx.path_graph(4)
+ label_name = 'label'
+ G.node[0][label_name] = 'A'
+ G.node[3][label_name] = 'B'
+ predicted = node_classification.harmonic_function(
+ G, label_name=label_name)
+ assert_equal(predicted[0], 'A')
+ assert_equal(predicted[1], 'A')
+ assert_equal(predicted[2], 'B')
+ assert_equal(predicted[3], 'B')
+
+ @raises(nx.NetworkXError)
+ def test_no_labels(self):
+ G = nx.path_graph(4)
+ node_classification.harmonic_function(G)
+
+ @raises(nx.NetworkXError)
+ def test_no_nodes(self):
+ G = nx.Graph()
+ node_classification.harmonic_function(G)
+
+ @raises(nx.NetworkXError)
+ def test_no_edges(self):
+ G = nx.Graph()
+ G.add_node(1)
+ G.add_node(2)
+ node_classification.harmonic_function(G)
+
+ @raises(nx.NetworkXNotImplemented)
+ def test_digraph(self):
+ G = nx.DiGraph()
+ G.add_edge(0, 1)
+ G.add_edge(1, 2)
+ G.add_edge(2, 3)
+ label_name = 'label'
+ G.node[0][label_name] = 'A'
+ G.node[3][label_name] = 'B'
+ node_classification.harmonic_function(G)
+
+ def test_one_labeled_node(self):
+ G = nx.path_graph(4)
+ label_name = 'label'
+ G.node[0][label_name] = 'A'
+ predicted = node_classification.harmonic_function(
+ G, label_name=label_name)
+ assert_equal(predicted[0], 'A')
+ assert_equal(predicted[1], 'A')
+ assert_equal(predicted[2], 'A')
+ assert_equal(predicted[3], 'A')
+
+ def test_nodes_all_labeled(self):
+ G = nx.karate_club_graph()
+ label_name = 'club'
+ predicted = node_classification.harmonic_function(
+ G, label_name=label_name)
+ for i in range(len(G)):
+ assert_equal(predicted[i], G.node[i][label_name])
+
+ def test_labeled_nodes_are_not_changed(self):
+ G = nx.karate_club_graph()
+ label_name = 'club'
+ label_removed = set([0, 1, 2, 3, 4, 5, 6, 7])
+ for i in label_removed:
+ del G.node[i][label_name]
+ predicted = node_classification.harmonic_function(
+ G, label_name=label_name)
+ label_not_removed = set(list(range(len(G)))) - label_removed
+ for i in label_not_removed:
+ assert_equal(predicted[i], G.node[i][label_name])
diff --git a/networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py b/networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py
new file mode 100644
index 00000000..3a64fbe7
--- /dev/null
+++ b/networkx/algorithms/node_classification/tests/test_local_and_global_consistency.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+from nose.tools import *
+from nose import SkipTest
+import networkx as nx
+from networkx.algorithms import node_classification
+
+
+class TestLocalAndGlobalConsistency:
+
+ @classmethod
+ def setupClass(cls):
+ global numpy
+ global scipy
+ try:
+ import numpy
+ except ImportError:
+ raise SkipTest('NumPy not available.')
+ try:
+ import scipy
+ except ImportError:
+ raise SkipTest('SciPy not available.')
+
+ def test_path_graph(self):
+ G = nx.path_graph(4)
+ label_name = 'label'
+ G.node[0][label_name] = 'A'
+ G.node[3][label_name] = 'B'
+ predicted = node_classification.local_and_global_consistency(
+ G, label_name=label_name)
+ assert_equal(predicted[0], 'A')
+ assert_equal(predicted[1], 'A')
+ assert_equal(predicted[2], 'B')
+ assert_equal(predicted[3], 'B')
+
+ @raises(nx.NetworkXError)
+ def test_no_labels(self):
+ G = nx.path_graph(4)
+ node_classification.local_and_global_consistency(G)
+
+ @raises(nx.NetworkXError)
+ def test_no_nodes(self):
+ G = nx.Graph()
+ node_classification.local_and_global_consistency(G)
+
+ @raises(nx.NetworkXError)
+ def test_no_edges(self):
+ G = nx.Graph()
+ G.add_node(1)
+ G.add_node(2)
+ node_classification.local_and_global_consistency(G)
+
+ @raises(nx.NetworkXNotImplemented)
+ def test_digraph(self):
+ G = nx.DiGraph()
+ G.add_edge(0, 1)
+ G.add_edge(1, 2)
+ G.add_edge(2, 3)
+ label_name = 'label'
+ G.node[0][label_name] = 'A'
+ G.node[3][label_name] = 'B'
+ node_classification.harmonic_function(G)
+
+ def test_one_labeled_node(self):
+ G = nx.path_graph(4)
+ label_name = 'label'
+ G.node[0][label_name] = 'A'
+ predicted = node_classification.local_and_global_consistency(
+ G, label_name=label_name)
+ assert_equal(predicted[0], 'A')
+ assert_equal(predicted[1], 'A')
+ assert_equal(predicted[2], 'A')
+ assert_equal(predicted[3], 'A')
+
+ def test_nodes_all_labeled(self):
+ G = nx.karate_club_graph()
+ label_name = 'club'
+ predicted = node_classification.local_and_global_consistency(
+ G, alpha=0, label_name=label_name)
+ for i in range(len(G)):
+ assert_equal(predicted[i], G.node[i][label_name])
diff --git a/networkx/algorithms/node_classification/utils.py b/networkx/algorithms/node_classification/utils.py
new file mode 100644
index 00000000..39d5d3cd
--- /dev/null
+++ b/networkx/algorithms/node_classification/utils.py
@@ -0,0 +1,102 @@
+# -*- coding: utf-8 -*-
+#
+# Author: Yuto Yamaguchi <yuto.ymgc@gmail.com>
+
+def _propagate(P, F, B):
+ """Propagate labels by one step
+
+ Parameters
+ ----------
+ P : scipy sparse matrix, shape = [n_samples, n_samples]
+ Propagation matrix
+ F : numpy array, shape = [n_samples, n_classes]
+ Label matrix
+ B : numpy array, shape = [n_samples, n_classes]
+ Base matrix
+
+ Returns
+ ----------
+ F_new : array, shape = [n_samples, n_classes]
+ Label matrix
+ """
+ F_new = P.dot(F) + B
+ return F_new
+
+
+def _get_label_info(G, label_name):
+ """Get and return information of labels from the input graph
+
+ Parameters
+ ----------
+ G : Network X graph
+ label_name : string
+ Name of the target label
+
+ Returns
+ ----------
+ labels : numpy array, shape = [n_labeled_samples, 2]
+ Array of pairs of labeled node ID and label ID
+ label_dict : numpy array, shape = [n_classes]
+ Array of labels
+ i-th element contains the label corresponding label ID `i`
+ """
+ import numpy as np
+
+ labels = []
+ label_to_id = {}
+ lid = 0
+ for i, n in enumerate(G.nodes(data=True)):
+ if label_name in n[1]:
+ label = n[1][label_name]
+ if label not in label_to_id:
+ label_to_id[label] = lid
+ lid += 1
+ labels.append([i, label_to_id[label]])
+ labels = np.array(labels)
+ label_dict = np.array([label for label, _ in sorted(
+ label_to_id.items(), key=lambda x:x[1])])
+ return (labels, label_dict)
+
+
+def _init_label_matrix(n_samples, n_classes):
+ """Create and return zero matrix
+
+ Parameters
+ ----------
+ n_samples : integer
+ The number of nodes (samples) on the input graph
+ n_classes : integer
+ The number of classes (distinct labels) on the input graph
+
+ Returns
+ ----------
+ F : numpy array, shape = [n_samples, n_classes]
+ Label matrix
+ """
+ import numpy as np
+
+ F = np.zeros((n_samples, n_classes))
+ return F
+
+
+def _predict(F, label_dict):
+ """Predict labels by learnt label matrix
+
+ Parameters
+ ----------
+ F : numpy array, shape = [n_samples, n_classes]
+ Learnt (resulting) label matrix
+ label_dict : numpy array, shape = [n_classes]
+ Array of labels
+ i-th element contains the label corresponding label ID `i`
+
+ Returns
+ ----------
+ predicted : numpy array, shape = [n_samples]
+ Array of predicted labels
+ """
+ import numpy as np
+
+ predicted_label_ids = np.argmax(F, axis=1)
+ predicted = label_dict[predicted_label_ids].tolist()
+ return predicted
diff --git a/setup.py b/setup.py
index 02ee9cd9..dfe9cf52 100644
--- a/setup.py
+++ b/setup.py
@@ -34,6 +34,7 @@ packages = ["networkx",
"networkx.algorithms",
"networkx.algorithms.assortativity",
"networkx.algorithms.bipartite",
+ "networkx.algorithms.node_classification",
"networkx.algorithms.centrality",
"networkx.algorithms.community",
"networkx.algorithms.components",
@@ -85,6 +86,7 @@ package_data = {
'networkx.algorithms': ['tests/*.py'],
'networkx.algorithms.assortativity': ['tests/*.py'],
'networkx.algorithms.bipartite': ['tests/*.py'],
+ 'networkx.algorithms.node_classification': ['tests/*.py'],
'networkx.algorithms.centrality': ['tests/*.py'],
'networkx.algorithms.community': ['tests/*.py'],
'networkx.algorithms.components': ['tests/*.py'],