Add new '-j' option for running checks in sub-processes.

Patch by Michal Nowikowski.
author: Claudiu Popa <pcmanticore@gmail.com> 2014-10-15 14:30:13 +0300
committer: Claudiu Popa <pcmanticore@gmail.com> 2014-10-15 14:30:13 +0300
commit: 69672965cabf5f31d3f17c2095ba9685deafb0a8 (patch)
tree: 91bdd0b52e3fe032520f910ecc04b2184f2f7eed
parent: a20b20729bad9edfb9a97cb618f742d9a76266a5 (diff)
download: pylint-69672965cabf5f31d3f17c2095ba9685deafb0a8.tar.gz
9 files changed, 252 insertions, 23 deletions
diff --git a/ChangeLog b/ChangeLog
index bdd3ec5..0b12ace 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -16,6 +16,8 @@ ChangeLog for Pylint
       wrong-spelling-in-comment, wrong-spelling-in-docstring.
       New options: spelling-dict, spelling-ignore-words.
 
+    * Add new '-j' option for running checks in sub-processes.
+
     * Added new checks for line endings if they are mixed (LF vs CRLF)
       or if they are not as expected. New messages: mixed-line-endings,
       unexpected-line-ending-format. New option: expected-line-ending-format.
diff --git a/doc/Makefile b/doc/Makefile
index 98076a6..d483f63 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -12,7 +12,7 @@ PAPEROPT_a4     = -D latex_paper_size=a4
 PAPEROPT_letter = -D latex_paper_size=letter
 ALLSPHINXOPTS   = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) .
 
-.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest
+.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest all
 
 help:
 	@echo "Please use \`make <target>' where <target> is one of"
diff --git a/doc/run.rst b/doc/run.rst
index d4a2aa9..f8a65e2 100644
--- a/doc/run.rst
+++ b/doc/run.rst
@@ -90,7 +90,7 @@ expression in special cases). For a full list of options, use ``--help``
 
 Specifying all the options suitable for your setup and coding
 standards can be tedious, so it is possible to use a configuration file to
-specify the default values.  You can specify a configuration file on the 
+specify the default values.  You can specify a configuration file on the
 command line using the ``--rcfile`` option.  Otherwise, Pylint searches for a
 configuration file in the following order and uses the first one it finds:
 
@@ -117,7 +117,7 @@ includes:
 * Options appearing before ``--generate-rcfile`` on the Pylint command line
 
 Of course you can also start with the default values and hand tune the
-configuration. 
+configuration.
 
 Other useful global options include:
 
@@ -128,5 +128,26 @@ Other useful global options include:
 --output-format=<format>   Select output format (text, html, custom).
 --msg-template=<template>  Modifiy text output message template.
 --list-msgs                Generate pylint's messages.
---full-documentation       Generate pylint's full documentation, in reST 
+--full-documentation       Generate pylint's full documentation, in reST
                              format.
+
+Parallel execution
+------------------
+
+It is possible to speed up the execution of Pylint. If the running computer has more CPUs than one
+then the files to be checked could be spread on all processors to Pylint sub-processes.
+This functionality is exposed via ``-j`` command line parameter. It takes a number of sub-processes
+that should be spawned. If provided number is 0 then the number of CPUs will be taken.
+Default number is 1.
+
+Example::
+
+  pylint -j 4 mymodule1.py mymodule2.py mymodule3.py mymodule4.py
+
+This will spawn 4 parallel Pylint sub-process. Each provided module will be checked in parallel.
+Discovered problems by checkers are not displayed immediatelly. They are shown just after completing
+checking a module.
+
+There are some limitations in running checks in parallel in current implementation.
+It is not possible to use custom plugins (i.e. ``--load-plugins`` option).
+It is also not possible to use initialization hook (i.e. ``--init-hook`` option).
diff --git a/lint.py b/lint.py
index 18e37cd..4c92b4e 100644
--- a/lint.py
+++ b/lint.py
@@ -36,7 +36,13 @@ import tokenize
 from operator import attrgetter
 from warnings import warn
 
-from logilab.common.configuration import UnsupportedAction, OptionsManagerMixIn
+try:
+    import multiprocessing
+except ImportError:
+    multiprocessing = None
+
+from logilab.common.configuration import (
+    UnsupportedAction, OptionsManagerMixIn, format_option_value)
 from logilab.common.optik_ext import check_csv
 from logilab.common.interface import implements
 from logilab.common.textutils import splitstrip, unquote
@@ -51,12 +57,12 @@ from pylint.utils import (
     MSG_TYPES, OPTION_RGX,
     PyLintASTWalker, UnknownMessage, MessagesHandlerMixIn, ReportsHandlerMixIn,
     MessagesStore, FileState, EmptyReport,
-    expand_modules, tokenize_module)
+    expand_modules, tokenize_module, Message)
 from pylint.interfaces import IRawChecker, ITokenChecker, IAstroidChecker, CONFIDENCE_LEVELS
 from pylint.checkers import (BaseTokenChecker,
                              table_lines_from_stats,
                              initialize as checkers_initialize)
-from pylint.reporters import initialize as reporters_initialize
+from pylint.reporters import initialize as reporters_initialize, CollectingReporter
 from pylint import config
 
 from pylint.__pkginfo__ import version
@@ -155,6 +161,55 @@ def _deprecated_option(shortname, opt_type):
             'type': opt_type, 'action': 'callback', 'callback': _warn_deprecated}
 
 
+if multiprocessing is not None:
+    class ChildLinter(multiprocessing.Process):  # pylint: disable=no-member
+        def run(self):
+            tasks_queue, results_queue, config = self._args  # pylint: disable=no-member
+
+            for file_or_module in iter(tasks_queue.get, 'STOP'):
+                result = self._run_linter(config, file_or_module[0])
+                try:
+                    results_queue.put(result)
+                except Exception as ex:
+                    print("internal error with sending report for module %s" % file_or_module, file=sys.stderr)
+                    print(ex, file=sys.stderr)
+                    results_queue.put({})
+
+        def _run_linter(self, config, file_or_module):
+            linter = PyLinter()
+
+            # Register standard checkers.
+            linter.load_default_plugins()
+            # Load command line plugins.
+            # TODO linter.load_plugin_modules(self._plugins)
+            # i.e. Run options are not available here as they are patches to Pylinter options.
+            # To fix it Run options should be moved to Pylinter class to make them
+            # available here.
+
+            linter.disable('pointless-except')
+            linter.disable('suppressed-message')
+            linter.disable('useless-suppression')
+
+            # Copy config with skipping command-line specific options.
+            linter_config = {}
+            filter_options = {"symbols", "include-ids"}
+            for opt_providers in six.itervalues(linter._all_options):
+                for optname, optdict in opt_providers.options:
+                    if optname not in filter_options:
+                        linter_config[optname] = config[optname]
+            linter_config['jobs'] = 1  # Child does not parallelize any further.
+            linter.load_configuration(**linter_config)
+
+            linter.set_reporter(CollectingReporter())
+
+            # Run the checks.
+            linter.check(file_or_module)
+
+            msgs = [m.get_init_args() for m in linter.reporter.messages]
+            return (file_or_module, linter.file_state.base_name, linter.current_name,
+                     msgs, linter.stats, linter.msg_status)
+
+
 class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
                BaseTokenChecker):
     """lint Python modules using external checkers.
@@ -286,7 +341,14 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
 
                 ('include-ids', _deprecated_option('i', 'yn')),
                 ('symbols', _deprecated_option('s', 'yn')),
-               )
+
+                ('jobs',
+                 {'type' : 'int', 'metavar': '<n-processes>',
+                  'short': 'j',
+                  'default': 1,
+                  'help' : '''Use multiple processes to speed up PyLint.''',
+                  }), # jobs
+            )
 
     option_groups = (
         ('Messages control', 'Options controling analysis messages'),
@@ -539,7 +601,7 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
             messages = set(msg for msg in checker.msgs
                            if msg[0] != 'F' and self.is_message_enabled(msg))
             if (messages or
-                    any(self.report_is_enabled(r[0]) for r in checker.reports)):
+                any(self.report_is_enabled(r[0]) for r in checker.reports)):
                 neededcheckers.append(checker)
         # Sort checkers by priority
         neededcheckers = sorted(neededcheckers, key=attrgetter('priority'),
@@ -574,6 +636,104 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
 
         if not isinstance(files_or_modules, (list, tuple)):
             files_or_modules = (files_or_modules,)
+
+        if self.config.jobs == 1:
+            self._do_check(files_or_modules)
+        else:
+            self._parallel_check(files_or_modules)
+
+    def _parallel_check(self, files_or_modules):
+        """Spawn a defined number of subprocesses."""
+
+        manager = multiprocessing.Manager()  # pylint: disable=no-member
+        tasks_queue = manager.Queue()  # pylint: disable=no-member
+        results_queue = manager.Queue()  # pylint: disable=no-member
+
+        # Prepare configuration for child linters.
+        config = {}
+        for opt_providers in six.itervalues(self._all_options):
+            for optname, optdict, val in opt_providers.options_and_values():
+                config[optname] = format_option_value(optdict, val)
+
+        # Reset stats.
+        self.open()
+
+        # Spawn child linters.
+        childs = []
+        for _ in range(self.config.jobs):
+            cl = ChildLinter(args=(tasks_queue, results_queue, config))
+            cl.start()  # pylint: disable=no-member
+            childs.append(cl)
+
+        # send files to child linters
+        for files_or_module in files_or_modules:
+            tasks_queue.put([files_or_module])
+
+        # collect results from child linters
+        failed = False
+        all_stats = []
+        for i in range(len(files_or_modules)):
+            try:
+                (
+                    file_or_module,
+                    self.file_state.base_name,
+                    module,
+                    messages,
+                    stats,
+                    msg_status
+                ) = results_queue.get()
+            except Exception as ex:
+                print("internal error while receiving results from child linter",
+                      file=sys.stderr)
+                print(ex, file=sys.stderr)
+                failed = True
+                break
+
+            if file_or_module == files_or_modules[-1]:
+                last_module = module
+
+            for msg in messages:
+                msg = Message(*msg)
+                self.set_current_module(module)
+                self.reporter.handle_message(msg)
+
+            all_stats.append(stats)
+            self.msg_status |= msg_status
+
+        # Stop child linters and wait for their completion.
+        for i in range(self.config.jobs):
+            tasks_queue.put('STOP')
+        for cl in childs:
+            cl.join()
+
+        if failed:
+            print("Error occured, stopping the linter.", file=sys.stderr)
+            sys.exit(32)
+
+        all_stats.append(self.stats)
+        all_stats = self._merge_stats(all_stats)
+        self.stats = all_stats
+        self.current_name = last_module
+
+        # Insert stats data to local checkers.
+        for checker in self.get_checkers():
+            if checker is not self:
+                checker.stats = self.stats
+
+    def _merge_stats(self, stats):
+        merged = {}
+        for stat in stats:
+            for key, item in six.iteritems(stat):
+                if key not in merged:
+                    merged[key] = item
+                else:
+                    if isinstance(item, dict):
+                        merged[key].update(item)
+                    else:
+                        merged[key] = merged[key] + item
+        return merged
+
+    def _do_check(self, files_or_modules):
         walker = PyLintASTWalker(self)
         checkers = self.prepare_checkers()
         tokencheckers = [c for c in checkers if implements(c, ITokenChecker)
@@ -610,7 +770,6 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
             for msgid, line, args in self.file_state.iter_spurious_suppression_messages(self.msgs_store):
                 self.add_message(msgid, line, None, args)
         # notify global end
-        self.set_current_module('')
         self.stats['statement'] = walker.nbstatements
         checkers.reverse()
         for checker in checkers:
@@ -695,7 +854,7 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn,
         for msg_cat in six.itervalues(MSG_TYPES):
             self.stats[msg_cat] = 0
 
-    def close(self):
+    def generate_reports(self):
         """close the whole package /module, it's time to make reports !
 
         if persistent run, pickle results for later comparison
@@ -1009,6 +1168,20 @@ group are mutually exclusive.'),
         if not args:
             print(linter.help())
             sys.exit(32)
+
+        if linter.config.jobs < 0:
+            print("Jobs number (%d) should be greater than 0"
+                  % linter.config.jobs, file=sys.stderr)
+            sys.exit(32)
+        if linter.config.jobs > 1 or linter.config.jobs == 0:
+            if multiprocessing is None:
+                print("Multiprocessing library is missing, "
+                      "fallback to single process", file=sys.stderr)
+                linter.set_option("jobs", 1)
+            else:
+                if linter.config.jobs == 0:
+                    linter.config.jobs = multiprocessing.cpu_count()
+
         # insert current working directory to the python path to have a correct
         # behaviour
         linter.prepare_import_path(args)
@@ -1023,6 +1196,7 @@ group are mutually exclusive.'),
             data.print_stats(30)
         else:
             linter.check(args)
+            linter.generate_reports()
         linter.cleanup_import_path()
         if exit:
             sys.exit(self.linter.msg_status)
diff --git a/reporters/__init__.py b/reporters/__init__.py
index 8be5d3a..5ebb6aa 100644
--- a/reporters/__init__.py
+++ b/reporters/__init__.py
@@ -62,7 +62,7 @@ class BaseReporter(object):
 
         Invokes the legacy add_message API by default."""
         self.add_message(
-            msg.msg_id, (msg.abspath, msg.module, msg.obj, msg.line, msg.column), 
+            msg.msg_id, (msg.abspath, msg.module, msg.obj, msg.line, msg.column),
             msg.msg)
 
     def add_message(self, msg_id, location, msg):
@@ -115,6 +115,19 @@ class BaseReporter(object):
         pass
 
 
+class CollectingReporter(BaseReporter):
+    """collects messages"""
+
+    name = 'collector'
+
+    def __init__(self):
+        BaseReporter.__init__(self)
+        self.messages = []
+
+    def handle_message(self, msg):
+        self.messages.append(msg)
+
+
 def initialize(linter):
     """initialize linter with reporters in this package """
     utils.register_plugins(linter, __path__[0])
diff --git a/test/test_import_graph.py b/test/test_import_graph.py
index 1bb76b2..2948dd1 100644
--- a/test/test_import_graph.py
+++ b/test/test_import_graph.py
@@ -52,6 +52,7 @@ class ImportCheckerTC(unittest.TestCase):
         l.global_set_option('ignore', ('func_unknown_encoding.py',))
         try:
             l.check('input')
+            l.generate_reports()
             self.assertTrue(exists('import.dot'))
             self.assertTrue(exists('ext_import.dot'))
             self.assertTrue(exists('int_import.dot'))
diff --git a/test/test_self.py b/test/test_self.py
index 7eb3261..c9fa2f7 100644
--- a/test/test_self.py
+++ b/test/test_self.py
@@ -93,7 +93,8 @@ class RunTC(unittest.TestCase):
             HTMLReporter(StringIO()),
             ColorizedTextReporter(StringIO())
         ]
-        self._runtest(['pylint.lint'], reporter=MultiReporter(reporters))
+        self._runtest(['pylint/test/functional/arguments.py'],
+                      reporter=MultiReporter(reporters), code=1)
 
     def test_no_ext_file(self):
         self._runtest([join(HERE, 'input', 'noext')], code=0)
@@ -127,6 +128,12 @@ class RunTC(unittest.TestCase):
         self._runtest([join(HERE, 'regrtest_data/no_stdout_encoding.py')],
                       out=strio)
 
+    @unittest.skipIf(sys.platform.startswith("win") and sys.version_info[0] == 2,
+                     "This test does not work on Python 2.X due to a bug in "
+                     "multiprocessing.")
+    def test_parallel_execution(self):
+        self._runtest(['-j 2', 'pylint/test/functional/arguments.py',
+                       'pylint/test/functional/bad_continuation.py'], code=1)
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/test/unittest_lint.py b/test/unittest_lint.py
index 91525e3..bef9c14 100644
--- a/test/unittest_lint.py
+++ b/test/unittest_lint.py
@@ -39,6 +39,12 @@ if sys.platform == 'win32':
 else:
     HOME = 'HOME'
 
+def remove(file):
+    try:
+        os.remove(file)
+    except OSError:
+        pass
+
 class GetNoteMessageTC(unittest.TestCase):
     def test(self):
         msg = None
@@ -254,21 +260,21 @@ class PyLinterTC(unittest.TestCase):
             strio = 'io'
         self.linter.config.files_output = True
         pylint_strio = 'pylint_%s.txt' % strio
-        try:
-            self.linter.check(strio)
-            self.assertTrue(os.path.exists(pylint_strio))
-            self.assertTrue(os.path.exists('pylint_global.txt'))
-        finally:
-            try:
-                os.remove(pylint_strio)
-                os.remove('pylint_global.txt')
-            except:
-                pass
+        files = [pylint_strio, 'pylint_global.txt']
+        for file in files:
+            self.addCleanup(remove, file)
+
+        self.linter.check(strio)
+        self.linter.generate_reports()
+        for f in files:
+            self.assertTrue(os.path.exists(f))
 
     def test_lint_should_analyze_file(self):
         self.linter.set_reporter(text.TextReporter())
         self.linter.config.files_output = True
         self.linter.should_analyze_file = lambda *args: False
+        self.addCleanup(remove, 'pylint_logilab.txt')
+
         self.linter.check('logilab')
         self.assertTrue(os.path.exists('pylint_logilab.txt'))
         self.assertFalse(os.path.exists('pylint_logilab_common.txt'))
@@ -372,6 +378,7 @@ class PyLinterTC(unittest.TestCase):
         self.linter.set_reporter(html.HTMLReporter(output))
         self.linter.set_option('output-format', 'html')
         self.linter.check('troppoptop.py')
+        self.linter.generate_reports()
         value = output.getvalue()
         self.assertIn('troppoptop.py', value)
         self.assertIn('fatal', value)
diff --git a/utils.py b/utils.py
index 068afce..36194b7 100644
--- a/utils.py
+++ b/utils.py
@@ -94,6 +94,10 @@ class Message(_MsgBase):
             cls, msg_id, symbol, msg, msg_id[0], MSG_TYPES[msg_id[0]],
             confidence, *location)
 
+    def get_init_args(self):
+        location = (self.abspath, self.module, self.obj, self.line, self.column)
+        return (self.msg_id, self.symbol, location, self.msg, self.confidence)
+
     def format(self, template):
         """Format the message according to the given template.
author	Claudiu Popa <pcmanticore@gmail.com>	2014-10-15 14:30:13 +0300
committer	Claudiu Popa <pcmanticore@gmail.com>	2014-10-15 14:30:13 +0300
commit	69672965cabf5f31d3f17c2095ba9685deafb0a8 (patch)
tree	91bdd0b52e3fe032520f910ecc04b2184f2f7eed
parent	a20b20729bad9edfb9a97cb618f742d9a76266a5 (diff)
download	pylint-69672965cabf5f31d3f17c2095ba9685deafb0a8.tar.gz