diff options
author | Claudiu Popa <pcmanticore@gmail.com> | 2014-10-15 14:30:13 +0300 |
---|---|---|
committer | Claudiu Popa <pcmanticore@gmail.com> | 2014-10-15 14:30:13 +0300 |
commit | 69672965cabf5f31d3f17c2095ba9685deafb0a8 (patch) | |
tree | 91bdd0b52e3fe032520f910ecc04b2184f2f7eed | |
parent | a20b20729bad9edfb9a97cb618f742d9a76266a5 (diff) | |
download | pylint-69672965cabf5f31d3f17c2095ba9685deafb0a8.tar.gz |
Add new '-j' option for running checks in sub-processes.
Patch by Michal Nowikowski.
-rw-r--r-- | ChangeLog | 2 | ||||
-rw-r--r-- | doc/Makefile | 2 | ||||
-rw-r--r-- | doc/run.rst | 27 | ||||
-rw-r--r-- | lint.py | 188 | ||||
-rw-r--r-- | reporters/__init__.py | 15 | ||||
-rw-r--r-- | test/test_import_graph.py | 1 | ||||
-rw-r--r-- | test/test_self.py | 9 | ||||
-rw-r--r-- | test/unittest_lint.py | 27 | ||||
-rw-r--r-- | utils.py | 4 |
9 files changed, 252 insertions, 23 deletions
@@ -16,6 +16,8 @@ ChangeLog for Pylint wrong-spelling-in-comment, wrong-spelling-in-docstring. New options: spelling-dict, spelling-ignore-words. + * Add new '-j' option for running checks in sub-processes. + * Added new checks for line endings if they are mixed (LF vs CRLF) or if they are not as expected. New messages: mixed-line-endings, unexpected-line-ending-format. New option: expected-line-ending-format. diff --git a/doc/Makefile b/doc/Makefile index 98076a6..d483f63 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -12,7 +12,7 @@ PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . -.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest +.PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest all help: @echo "Please use \`make <target>' where <target> is one of" diff --git a/doc/run.rst b/doc/run.rst index d4a2aa9..f8a65e2 100644 --- a/doc/run.rst +++ b/doc/run.rst @@ -90,7 +90,7 @@ expression in special cases). For a full list of options, use ``--help`` Specifying all the options suitable for your setup and coding standards can be tedious, so it is possible to use a configuration file to -specify the default values. You can specify a configuration file on the +specify the default values. You can specify a configuration file on the command line using the ``--rcfile`` option. Otherwise, Pylint searches for a configuration file in the following order and uses the first one it finds: @@ -117,7 +117,7 @@ includes: * Options appearing before ``--generate-rcfile`` on the Pylint command line Of course you can also start with the default values and hand tune the -configuration. +configuration. Other useful global options include: @@ -128,5 +128,26 @@ Other useful global options include: --output-format=<format> Select output format (text, html, custom). --msg-template=<template> Modifiy text output message template. --list-msgs Generate pylint's messages. ---full-documentation Generate pylint's full documentation, in reST +--full-documentation Generate pylint's full documentation, in reST format. + +Parallel execution +------------------ + +It is possible to speed up the execution of Pylint. If the running computer has more CPUs than one +then the files to be checked could be spread on all processors to Pylint sub-processes. +This functionality is exposed via ``-j`` command line parameter. It takes a number of sub-processes +that should be spawned. If provided number is 0 then the number of CPUs will be taken. +Default number is 1. + +Example:: + + pylint -j 4 mymodule1.py mymodule2.py mymodule3.py mymodule4.py + +This will spawn 4 parallel Pylint sub-process. Each provided module will be checked in parallel. +Discovered problems by checkers are not displayed immediatelly. They are shown just after completing +checking a module. + +There are some limitations in running checks in parallel in current implementation. +It is not possible to use custom plugins (i.e. ``--load-plugins`` option). +It is also not possible to use initialization hook (i.e. ``--init-hook`` option). @@ -36,7 +36,13 @@ import tokenize from operator import attrgetter from warnings import warn -from logilab.common.configuration import UnsupportedAction, OptionsManagerMixIn +try: + import multiprocessing +except ImportError: + multiprocessing = None + +from logilab.common.configuration import ( + UnsupportedAction, OptionsManagerMixIn, format_option_value) from logilab.common.optik_ext import check_csv from logilab.common.interface import implements from logilab.common.textutils import splitstrip, unquote @@ -51,12 +57,12 @@ from pylint.utils import ( MSG_TYPES, OPTION_RGX, PyLintASTWalker, UnknownMessage, MessagesHandlerMixIn, ReportsHandlerMixIn, MessagesStore, FileState, EmptyReport, - expand_modules, tokenize_module) + expand_modules, tokenize_module, Message) from pylint.interfaces import IRawChecker, ITokenChecker, IAstroidChecker, CONFIDENCE_LEVELS from pylint.checkers import (BaseTokenChecker, table_lines_from_stats, initialize as checkers_initialize) -from pylint.reporters import initialize as reporters_initialize +from pylint.reporters import initialize as reporters_initialize, CollectingReporter from pylint import config from pylint.__pkginfo__ import version @@ -155,6 +161,55 @@ def _deprecated_option(shortname, opt_type): 'type': opt_type, 'action': 'callback', 'callback': _warn_deprecated} +if multiprocessing is not None: + class ChildLinter(multiprocessing.Process): # pylint: disable=no-member + def run(self): + tasks_queue, results_queue, config = self._args # pylint: disable=no-member + + for file_or_module in iter(tasks_queue.get, 'STOP'): + result = self._run_linter(config, file_or_module[0]) + try: + results_queue.put(result) + except Exception as ex: + print("internal error with sending report for module %s" % file_or_module, file=sys.stderr) + print(ex, file=sys.stderr) + results_queue.put({}) + + def _run_linter(self, config, file_or_module): + linter = PyLinter() + + # Register standard checkers. + linter.load_default_plugins() + # Load command line plugins. + # TODO linter.load_plugin_modules(self._plugins) + # i.e. Run options are not available here as they are patches to Pylinter options. + # To fix it Run options should be moved to Pylinter class to make them + # available here. + + linter.disable('pointless-except') + linter.disable('suppressed-message') + linter.disable('useless-suppression') + + # Copy config with skipping command-line specific options. + linter_config = {} + filter_options = {"symbols", "include-ids"} + for opt_providers in six.itervalues(linter._all_options): + for optname, optdict in opt_providers.options: + if optname not in filter_options: + linter_config[optname] = config[optname] + linter_config['jobs'] = 1 # Child does not parallelize any further. + linter.load_configuration(**linter_config) + + linter.set_reporter(CollectingReporter()) + + # Run the checks. + linter.check(file_or_module) + + msgs = [m.get_init_args() for m in linter.reporter.messages] + return (file_or_module, linter.file_state.base_name, linter.current_name, + msgs, linter.stats, linter.msg_status) + + class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn, BaseTokenChecker): """lint Python modules using external checkers. @@ -286,7 +341,14 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn, ('include-ids', _deprecated_option('i', 'yn')), ('symbols', _deprecated_option('s', 'yn')), - ) + + ('jobs', + {'type' : 'int', 'metavar': '<n-processes>', + 'short': 'j', + 'default': 1, + 'help' : '''Use multiple processes to speed up PyLint.''', + }), # jobs + ) option_groups = ( ('Messages control', 'Options controling analysis messages'), @@ -539,7 +601,7 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn, messages = set(msg for msg in checker.msgs if msg[0] != 'F' and self.is_message_enabled(msg)) if (messages or - any(self.report_is_enabled(r[0]) for r in checker.reports)): + any(self.report_is_enabled(r[0]) for r in checker.reports)): neededcheckers.append(checker) # Sort checkers by priority neededcheckers = sorted(neededcheckers, key=attrgetter('priority'), @@ -574,6 +636,104 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn, if not isinstance(files_or_modules, (list, tuple)): files_or_modules = (files_or_modules,) + + if self.config.jobs == 1: + self._do_check(files_or_modules) + else: + self._parallel_check(files_or_modules) + + def _parallel_check(self, files_or_modules): + """Spawn a defined number of subprocesses.""" + + manager = multiprocessing.Manager() # pylint: disable=no-member + tasks_queue = manager.Queue() # pylint: disable=no-member + results_queue = manager.Queue() # pylint: disable=no-member + + # Prepare configuration for child linters. + config = {} + for opt_providers in six.itervalues(self._all_options): + for optname, optdict, val in opt_providers.options_and_values(): + config[optname] = format_option_value(optdict, val) + + # Reset stats. + self.open() + + # Spawn child linters. + childs = [] + for _ in range(self.config.jobs): + cl = ChildLinter(args=(tasks_queue, results_queue, config)) + cl.start() # pylint: disable=no-member + childs.append(cl) + + # send files to child linters + for files_or_module in files_or_modules: + tasks_queue.put([files_or_module]) + + # collect results from child linters + failed = False + all_stats = [] + for i in range(len(files_or_modules)): + try: + ( + file_or_module, + self.file_state.base_name, + module, + messages, + stats, + msg_status + ) = results_queue.get() + except Exception as ex: + print("internal error while receiving results from child linter", + file=sys.stderr) + print(ex, file=sys.stderr) + failed = True + break + + if file_or_module == files_or_modules[-1]: + last_module = module + + for msg in messages: + msg = Message(*msg) + self.set_current_module(module) + self.reporter.handle_message(msg) + + all_stats.append(stats) + self.msg_status |= msg_status + + # Stop child linters and wait for their completion. + for i in range(self.config.jobs): + tasks_queue.put('STOP') + for cl in childs: + cl.join() + + if failed: + print("Error occured, stopping the linter.", file=sys.stderr) + sys.exit(32) + + all_stats.append(self.stats) + all_stats = self._merge_stats(all_stats) + self.stats = all_stats + self.current_name = last_module + + # Insert stats data to local checkers. + for checker in self.get_checkers(): + if checker is not self: + checker.stats = self.stats + + def _merge_stats(self, stats): + merged = {} + for stat in stats: + for key, item in six.iteritems(stat): + if key not in merged: + merged[key] = item + else: + if isinstance(item, dict): + merged[key].update(item) + else: + merged[key] = merged[key] + item + return merged + + def _do_check(self, files_or_modules): walker = PyLintASTWalker(self) checkers = self.prepare_checkers() tokencheckers = [c for c in checkers if implements(c, ITokenChecker) @@ -610,7 +770,6 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn, for msgid, line, args in self.file_state.iter_spurious_suppression_messages(self.msgs_store): self.add_message(msgid, line, None, args) # notify global end - self.set_current_module('') self.stats['statement'] = walker.nbstatements checkers.reverse() for checker in checkers: @@ -695,7 +854,7 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn, for msg_cat in six.itervalues(MSG_TYPES): self.stats[msg_cat] = 0 - def close(self): + def generate_reports(self): """close the whole package /module, it's time to make reports ! if persistent run, pickle results for later comparison @@ -1009,6 +1168,20 @@ group are mutually exclusive.'), if not args: print(linter.help()) sys.exit(32) + + if linter.config.jobs < 0: + print("Jobs number (%d) should be greater than 0" + % linter.config.jobs, file=sys.stderr) + sys.exit(32) + if linter.config.jobs > 1 or linter.config.jobs == 0: + if multiprocessing is None: + print("Multiprocessing library is missing, " + "fallback to single process", file=sys.stderr) + linter.set_option("jobs", 1) + else: + if linter.config.jobs == 0: + linter.config.jobs = multiprocessing.cpu_count() + # insert current working directory to the python path to have a correct # behaviour linter.prepare_import_path(args) @@ -1023,6 +1196,7 @@ group are mutually exclusive.'), data.print_stats(30) else: linter.check(args) + linter.generate_reports() linter.cleanup_import_path() if exit: sys.exit(self.linter.msg_status) diff --git a/reporters/__init__.py b/reporters/__init__.py index 8be5d3a..5ebb6aa 100644 --- a/reporters/__init__.py +++ b/reporters/__init__.py @@ -62,7 +62,7 @@ class BaseReporter(object): Invokes the legacy add_message API by default.""" self.add_message( - msg.msg_id, (msg.abspath, msg.module, msg.obj, msg.line, msg.column), + msg.msg_id, (msg.abspath, msg.module, msg.obj, msg.line, msg.column), msg.msg) def add_message(self, msg_id, location, msg): @@ -115,6 +115,19 @@ class BaseReporter(object): pass +class CollectingReporter(BaseReporter): + """collects messages""" + + name = 'collector' + + def __init__(self): + BaseReporter.__init__(self) + self.messages = [] + + def handle_message(self, msg): + self.messages.append(msg) + + def initialize(linter): """initialize linter with reporters in this package """ utils.register_plugins(linter, __path__[0]) diff --git a/test/test_import_graph.py b/test/test_import_graph.py index 1bb76b2..2948dd1 100644 --- a/test/test_import_graph.py +++ b/test/test_import_graph.py @@ -52,6 +52,7 @@ class ImportCheckerTC(unittest.TestCase): l.global_set_option('ignore', ('func_unknown_encoding.py',)) try: l.check('input') + l.generate_reports() self.assertTrue(exists('import.dot')) self.assertTrue(exists('ext_import.dot')) self.assertTrue(exists('int_import.dot')) diff --git a/test/test_self.py b/test/test_self.py index 7eb3261..c9fa2f7 100644 --- a/test/test_self.py +++ b/test/test_self.py @@ -93,7 +93,8 @@ class RunTC(unittest.TestCase): HTMLReporter(StringIO()), ColorizedTextReporter(StringIO()) ] - self._runtest(['pylint.lint'], reporter=MultiReporter(reporters)) + self._runtest(['pylint/test/functional/arguments.py'], + reporter=MultiReporter(reporters), code=1) def test_no_ext_file(self): self._runtest([join(HERE, 'input', 'noext')], code=0) @@ -127,6 +128,12 @@ class RunTC(unittest.TestCase): self._runtest([join(HERE, 'regrtest_data/no_stdout_encoding.py')], out=strio) + @unittest.skipIf(sys.platform.startswith("win") and sys.version_info[0] == 2, + "This test does not work on Python 2.X due to a bug in " + "multiprocessing.") + def test_parallel_execution(self): + self._runtest(['-j 2', 'pylint/test/functional/arguments.py', + 'pylint/test/functional/bad_continuation.py'], code=1) if __name__ == '__main__': unittest.main() diff --git a/test/unittest_lint.py b/test/unittest_lint.py index 91525e3..bef9c14 100644 --- a/test/unittest_lint.py +++ b/test/unittest_lint.py @@ -39,6 +39,12 @@ if sys.platform == 'win32': else: HOME = 'HOME' +def remove(file): + try: + os.remove(file) + except OSError: + pass + class GetNoteMessageTC(unittest.TestCase): def test(self): msg = None @@ -254,21 +260,21 @@ class PyLinterTC(unittest.TestCase): strio = 'io' self.linter.config.files_output = True pylint_strio = 'pylint_%s.txt' % strio - try: - self.linter.check(strio) - self.assertTrue(os.path.exists(pylint_strio)) - self.assertTrue(os.path.exists('pylint_global.txt')) - finally: - try: - os.remove(pylint_strio) - os.remove('pylint_global.txt') - except: - pass + files = [pylint_strio, 'pylint_global.txt'] + for file in files: + self.addCleanup(remove, file) + + self.linter.check(strio) + self.linter.generate_reports() + for f in files: + self.assertTrue(os.path.exists(f)) def test_lint_should_analyze_file(self): self.linter.set_reporter(text.TextReporter()) self.linter.config.files_output = True self.linter.should_analyze_file = lambda *args: False + self.addCleanup(remove, 'pylint_logilab.txt') + self.linter.check('logilab') self.assertTrue(os.path.exists('pylint_logilab.txt')) self.assertFalse(os.path.exists('pylint_logilab_common.txt')) @@ -372,6 +378,7 @@ class PyLinterTC(unittest.TestCase): self.linter.set_reporter(html.HTMLReporter(output)) self.linter.set_option('output-format', 'html') self.linter.check('troppoptop.py') + self.linter.generate_reports() value = output.getvalue() self.assertIn('troppoptop.py', value) self.assertIn('fatal', value) @@ -94,6 +94,10 @@ class Message(_MsgBase): cls, msg_id, symbol, msg, msg_id[0], MSG_TYPES[msg_id[0]], confidence, *location) + def get_init_args(self): + location = (self.abspath, self.module, self.obj, self.line, self.column) + return (self.msg_id, self.symbol, location, self.msg, self.confidence) + def format(self, template): """Format the message according to the given template. |