diff options
author | Sylvain Thénault <thenault@gmail.com> | 2013-05-07 10:01:47 +0200 |
---|---|---|
committer | Sylvain Thénault <thenault@gmail.com> | 2013-05-07 10:01:47 +0200 |
commit | 8632761321fb522621c944722e68d0541fe32159 (patch) | |
tree | 89fe831e44dde84296e612d8ed93af7eaf5410eb | |
parent | 86c05cca464f1e6ee354d7724b0bad69516cb51a (diff) | |
parent | 7c5f3eee257a16245409598e4adb1b5f379de126 (diff) | |
download | pylint-git-8632761321fb522621c944722e68d0541fe32159.tar.gz |
Merged in tmarek/pylint (pull request #15)
Tokenize the input source only once and hand it to all checkers that need the token stream.
-rw-r--r-- | ChangeLog | 4 | ||||
-rw-r--r-- | checkers/__init__.py | 12 | ||||
-rw-r--r-- | checkers/format.py | 22 | ||||
-rw-r--r-- | checkers/raw_metrics.py | 10 | ||||
-rw-r--r-- | checkers/strings.py | 8 | ||||
-rw-r--r-- | interfaces.py | 9 | ||||
-rw-r--r-- | lint.py | 31 | ||||
-rw-r--r-- | test/unittest_lint.py | 4 | ||||
-rw-r--r-- | utils.py | 16 |
9 files changed, 73 insertions, 43 deletions
@@ -4,6 +4,10 @@ ChangeLog for Pylint -- * bitbucket #6: put back documentation in source distribution + * Added a new base class and interface for checkers that work on the + tokens rather than the syntax, and only tokenize the input file + once. + 2013-04-25 -- 0.28.0 * bitbucket #1: fix "dictionary changed size during iteration" crash diff --git a/checkers/__init__.py b/checkers/__init__.py index 700a78e0f..dd868c655 100644 --- a/checkers/__init__.py +++ b/checkers/__init__.py @@ -39,6 +39,7 @@ messages nor reports. XXX not true, emit a 07 report ! """ import tokenize +import warnings from os import listdir from os.path import dirname, join, isdir, splitext @@ -121,6 +122,9 @@ class BaseRawChecker(BaseChecker): stream must implement the readline method """ + warnings.warn("Modules that need access to the tokens should " + "use the ITokenChecker interface.", + DeprecationWarning) stream = node.file_stream stream.seek(0) # XXX may be removed with astng > 0.23 self.process_tokens(tokenize.generate_tokens(stream.readline)) @@ -130,6 +134,14 @@ class BaseRawChecker(BaseChecker): raise NotImplementedError() +class BaseTokenChecker(BaseChecker): + """Base class for checkers that want to have access to the token stream.""" + + def process_tokens(self, tokens): + """Should be overridden by subclasses.""" + raise NotImplementedError() + + PY_EXTS = ('.py', '.pyc', '.pyo', '.pyw', '.so', '.dll') def initialize(linter): diff --git a/checkers/format.py b/checkers/format.py index 4a68d3942..ea8cf1756 100644 --- a/checkers/format.py +++ b/checkers/format.py @@ -29,8 +29,8 @@ if not hasattr(tokenize, 'NL'): from logilab.common.textutils import pretty_match from logilab.astng import nodes -from pylint.interfaces import IRawChecker, IASTNGChecker -from pylint.checkers import BaseRawChecker +from pylint.interfaces import ITokenChecker, IASTNGChecker +from pylint.checkers import BaseTokenChecker from pylint.checkers.utils import check_messages from pylint.utils import WarningScope @@ -163,7 +163,7 @@ def check_line(line): return msg_id, pretty_match(match, line.rstrip()) -class FormatChecker(BaseRawChecker): +class FormatChecker(BaseTokenChecker): """checks for : * unauthorized constructions * strict indentation @@ -171,7 +171,7 @@ class FormatChecker(BaseRawChecker): * use of <> instead of != """ - __implements__ = (IRawChecker, IASTNGChecker) + __implements__ = (ITokenChecker, IASTNGChecker) # configuration section name name = 'format' @@ -192,22 +192,10 @@ class FormatChecker(BaseRawChecker): " " (4 spaces) or "\\t" (1 tab).'}), ) def __init__(self, linter=None): - BaseRawChecker.__init__(self, linter) + BaseTokenChecker.__init__(self, linter) self._lines = None self._visited_lines = None - def process_module(self, node): - """extracts encoding from the stream and decodes each line, so that - international text's length is properly calculated. - """ - stream = node.file_stream - stream.seek(0) # XXX may be removed with astng > 0.23 - readline = stream.readline - if sys.version_info < (3, 0): - if node.file_encoding is not None: - readline = lambda: stream.readline().decode(node.file_encoding, 'replace') - self.process_tokens(tokenize.generate_tokens(readline)) - def new_line(self, tok_type, line, line_num, junk): """a new line has been encountered, process it if necessary""" if not tok_type in junk: diff --git a/checkers/raw_metrics.py b/checkers/raw_metrics.py index 872ca7bcc..8728fb6bc 100644 --- a/checkers/raw_metrics.py +++ b/checkers/raw_metrics.py @@ -24,8 +24,8 @@ import tokenize from logilab.common.ureports import Table -from pylint.interfaces import IRawChecker -from pylint.checkers import BaseRawChecker, EmptyReport +from pylint.interfaces import ITokenChecker +from pylint.checkers import BaseTokenChecker, EmptyReport from pylint.reporters import diff_string def report_raw_stats(sect, stats, old_stats): @@ -50,7 +50,7 @@ def report_raw_stats(sect, stats, old_stats): sect.append(Table(children=lines, cols=5, rheaders=1)) -class RawMetricsChecker(BaseRawChecker): +class RawMetricsChecker(BaseTokenChecker): """does not check anything but gives some raw metrics : * total number of lines * total number of code lines @@ -59,7 +59,7 @@ class RawMetricsChecker(BaseRawChecker): * total number of empty lines """ - __implements__ = (IRawChecker,) + __implements__ = (ITokenChecker,) # configuration section name name = 'metrics' @@ -71,7 +71,7 @@ class RawMetricsChecker(BaseRawChecker): reports = ( ('RP0701', 'Raw metrics', report_raw_stats), ) def __init__(self, linter): - BaseRawChecker.__init__(self, linter) + BaseTokenChecker.__init__(self, linter) self.stats = None def open(self): diff --git a/checkers/strings.py b/checkers/strings.py index 5c7d1e797..52ff003b0 100644 --- a/checkers/strings.py +++ b/checkers/strings.py @@ -23,8 +23,8 @@ import tokenize from logilab import astng -from pylint.interfaces import IRawChecker, IASTNGChecker -from pylint.checkers import BaseChecker, BaseRawChecker +from pylint.interfaces import ITokenChecker, IASTNGChecker +from pylint.checkers import BaseChecker, BaseTokenChecker from pylint.checkers import utils _PY3K = sys.version_info >= (3, 0) @@ -190,9 +190,9 @@ class StringMethodsChecker(BaseChecker): args=(func.bound.name, func.name)) -class StringConstantChecker(BaseRawChecker): +class StringConstantChecker(BaseTokenChecker): """Check string literals""" - __implements__ = (IRawChecker, IASTNGChecker) + __implements__ = (ITokenChecker,) name = 'string_constant' msgs = { 'W1401': ('Anomalous backslash in string: \'%s\'. ' diff --git a/interfaces.py b/interfaces.py index e29026dcc..a24e36f3c 100644 --- a/interfaces.py +++ b/interfaces.py @@ -50,6 +50,15 @@ class IRawChecker(IChecker): """ +class ITokenChecker(IChecker): + """Interface for checkers that need access to the token list.""" + def process_tokens(self, tokens): + """Process a module. + + tokens is a list of all source code tokens in the file. + """ + + class IASTNGChecker(IChecker): """ interface for checker which prefers receive events according to statement type @@ -48,9 +48,9 @@ from logilab.astng.__pkginfo__ import version as astng_version from pylint.utils import (PyLintASTWalker, UnknownMessage, MessagesHandlerMixIn, ReportsHandlerMixIn, MSG_TYPES, expand_modules, - WarningScope) -from pylint.interfaces import ILinter, IRawChecker, IASTNGChecker -from pylint.checkers import (BaseRawChecker, EmptyReport, + WarningScope, tokenize_module) +from pylint.interfaces import ILinter, IRawChecker, ITokenChecker, IASTNGChecker +from pylint.checkers import (BaseTokenChecker, EmptyReport, table_lines_from_stats) from pylint.reporters.text import (TextReporter, ParseableTextReporter, VSTextReporter, ColorizedTextReporter) @@ -157,7 +157,7 @@ MSGS = { class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn, - BaseRawChecker): + BaseTokenChecker): """lint Python modules using external checkers. This is the main checker controlling the other ones and the reports @@ -171,7 +171,7 @@ class PyLinter(OptionsManagerMixIn, MessagesHandlerMixIn, ReportsHandlerMixIn, to ensure the latest code version is actually checked. """ - __implements__ = (ILinter, IRawChecker) + __implements__ = (ILinter, ITokenChecker) name = 'master' priority = 0 @@ -310,7 +310,7 @@ This is used by the global evaluation report (RP0004).'}), config_file=pylintrc or config.PYLINTRC) MessagesHandlerMixIn.__init__(self) ReportsHandlerMixIn.__init__(self) - BaseRawChecker.__init__(self) + BaseTokenChecker.__init__(self) # provided reports self.reports = (('RP0001', 'Messages by category', report_total_messages_stats), @@ -385,7 +385,7 @@ This is used by the global evaluation report (RP0004).'}), self.set_reporter(reporter_class()) try: - BaseRawChecker.set_option(self, optname, value, action, optdict) + BaseTokenChecker.set_option(self, optname, value, action, optdict) except UnsupportedAction: print >> sys.stderr, 'option %s can\'t be read from config file' % \ optname @@ -565,8 +565,9 @@ This is used by the global evaluation report (RP0004).'}), files_or_modules = (files_or_modules,) walker = PyLintASTWalker(self) checkers = self.prepare_checkers() - rawcheckers = [c for c in checkers if implements(c, IRawChecker) - and c is not self] + tokencheckers = [c for c in checkers if implements(c, ITokenChecker) + and c is not self] + rawcheckers = [c for c in checkers if implements(c, IRawChecker)] # notify global begin for checker in checkers: checker.open() @@ -589,7 +590,7 @@ This is used by the global evaluation report (RP0004).'}), # fix the current file (if the source file was not available or # if it's actually a c extension) self.current_file = astng.file - self.check_astng_module(astng, walker, rawcheckers) + self.check_astng_module(astng, walker, rawcheckers, tokencheckers) self._add_suppression_messages() # notify global end self.set_current_module('') @@ -645,16 +646,18 @@ This is used by the global evaluation report (RP0004).'}), traceback.print_exc() self.add_message('F0002', args=(ex.__class__, ex)) - def check_astng_module(self, astng, walker, rawcheckers): + def check_astng_module(self, astng, walker, rawcheckers, tokencheckers): """check a module from its astng representation, real work""" # call raw checkers if possible + tokens = tokenize_module(astng) + if not astng.pure_python: self.add_message('I0001', args=astng.name) else: #assert astng.file.endswith('.py') - # invoke IRawChecker interface on self to fetch module/block + # invoke ITokenChecker interface on self to fetch module/block # level options - self.process_module(astng) + self.process_tokens(tokens) if self._ignore_file: return False # walk ast to collect line numbers @@ -666,6 +669,8 @@ This is used by the global evaluation report (RP0004).'}), self.collect_block_lines(astng, orig_state) for checker in rawcheckers: checker.process_module(astng) + for checker in tokencheckers: + checker.process_tokens(tokens) # generate events to astng checkers walker.walk(astng) return True diff --git a/test/unittest_lint.py b/test/unittest_lint.py index 2043ca64d..94c0b0343 100644 --- a/test/unittest_lint.py +++ b/test/unittest_lint.py @@ -27,7 +27,7 @@ from pylint import config from pylint.lint import PyLinter, Run, UnknownMessage, preprocess_options, \ ArgumentPreprocessingError from pylint.utils import sort_msgs, PyLintASTWalker, MSG_STATE_SCOPE_CONFIG, \ - MSG_STATE_SCOPE_MODULE + MSG_STATE_SCOPE_MODULE, tokenize_module from pylint import checkers @@ -154,7 +154,7 @@ class PyLinterTC(TestCase): filepath = join(INPUTDIR, 'func_block_disable_msg.py') linter.set_current_module('func_block_disable_msg') astng = linter.get_astng(filepath, 'func_block_disable_msg') - linter.process_module(astng) + linter.process_tokens(tokenize_module(astng)) orig_state = linter._module_msgs_state.copy() linter._module_msgs_state = {} linter._suppression_mapping = {} @@ -18,6 +18,7 @@ main pylint class """ import sys +import tokenize from warnings import warn from os.path import dirname, basename, splitext, exists, isdir, join, normpath @@ -31,7 +32,7 @@ from logilab.common.ureports import Section from logilab.astng import nodes, Module from pylint.checkers import EmptyReport -from pylint.interfaces import IRawChecker +from pylint.interfaces import IRawChecker, ITokenChecker class UnknownMessage(Exception): @@ -104,6 +105,17 @@ def category_id(id): return MSG_TYPES_LONG.get(id) +def tokenize_module(module): + stream = module.file_stream + stream.seek(0) + if sys.version_info < (3, 0) and module.file_encoding is not None: + readline = lambda: stream.readline().decode(module.file_encoding, + 'replace') + else: + readline = stream.readline + return list(tokenize.generate_tokens(readline)) + + class Message: def __init__(self, checker, msgid, msg, descr, symbol, scope): assert len(msgid) == 5, 'Invalid message id %s' % msgid @@ -147,7 +159,7 @@ class MessagesHandlerMixIn: chkid = None for msgid, msg_tuple in msgs_dict.iteritems(): - if implements(checker, IRawChecker): + if implements(checker, (IRawChecker, ITokenChecker)): scope = WarningScope.LINE else: scope = WarningScope.NODE |