diff options
author | Sylvain Thénault <sylvain.thenault@logilab.fr> | 2013-03-29 17:39:14 +0100 |
---|---|---|
committer | Sylvain Thénault <sylvain.thenault@logilab.fr> | 2013-03-29 17:39:14 +0100 |
commit | 5f675eceeac5879994a0e3a55ac1d3e6be0af00f (patch) | |
tree | e4dd6c1a439e79ea04d0c108f4910fbcc01af073 | |
parent | fc58a92c0437f618f8c9038d64e5cd39b5c3d770 (diff) | |
download | pylint-git-5f675eceeac5879994a0e3a55ac1d3e6be0af00f.tar.gz |
move string literal checker from format.py to strings.py
-rw-r--r-- | checkers/__init__.py | 2 | ||||
-rw-r--r-- | checkers/format.py | 105 | ||||
-rw-r--r-- | checkers/strings.py | 107 | ||||
-rw-r--r-- | checkers/utils.py | 3 |
4 files changed, 110 insertions, 107 deletions
diff --git a/checkers/__init__.py b/checkers/__init__.py index 299078af0..700a78e0f 100644 --- a/checkers/__init__.py +++ b/checkers/__init__.py @@ -30,7 +30,7 @@ Base id of standard checkers (used in msg and report ids): 12: logging 13: string_format 14: string_constant -14-50: not yet used: reserved for future internal checkers. +15-50: not yet used: reserved for future internal checkers. 51-99: perhaps used: reserved for external checkers The raw_metrics checker has no number associated since it doesn't emit any diff --git a/checkers/format.py b/checkers/format.py index 82d55feaa..6ae8706fd 100644 --- a/checkers/format.py +++ b/checkers/format.py @@ -1,6 +1,4 @@ -# Copyright (c) 2003-2010 Sylvain Thenault (thenault@gmail.com). -# Copyright (c) 2003-2012 LOGILAB S.A. (Paris, FRANCE). -# Copyright 2012 Google Inc. +# Copyright (c) 2003-2013 LOGILAB S.A. (Paris, FRANCE). # # This program is free software; you can redistribute it and/or modify it under # the terms of the GNU General Public License as published by the Free Software @@ -128,7 +126,6 @@ BAD_CONSTRUCT_RGXS = ( 'C0324'), ) -_PY3K = sys.version_info >= (3, 0) def get_string_coords(line): """return a list of string positions (tuple (start, end)) in the line @@ -371,106 +368,6 @@ class FormatChecker(BaseRawChecker): expected * unit_size)) -class StringConstantChecker(BaseRawChecker): - """Check string literals""" - - msgs = { - 'W1401': ('Anomalous backslash in string: \'%s\'. ' - 'String constant might be missing an r prefix.', - 'anomalous-backslash-in-string', - 'Used when a backslash is in a literal string but not as an ' - 'escape.'), - 'W1402': ('Anomalous Unicode escape in byte string: \'%s\'. ' - 'String constant might be missing an r or u prefix.', - 'anomalous-unicode-escape-in-string', - 'Used when an escape like \\u is encountered in a byte ' - 'string where it has no effect.'), - } - name = 'string_constant' - __implements__ = (IRawChecker, IASTNGChecker) - - # Characters that have a special meaning after a backslash in either - # Unicode or byte strings. - ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"01234567' - - # TODO(mbp): Octal characters are quite an edge case today; people may - # prefer a separate warning where they occur. \0 should be allowed. - - # Characters that have a special meaning after a backslash but only in - # Unicode strings. - UNICODE_ESCAPE_CHARACTERS = 'uUN' - - def process_tokens(self, tokens): - for (tok_type, token, (start_row, start_col), _, _) in tokens: - if tok_type == tokenize.STRING: - # 'token' is the whole un-parsed token; we can look at the start - # of it to see whether it's a raw or unicode string etc. - self.process_string_token(token, start_row, start_col) - - def process_string_token(self, token, start_row, start_col): - for i, c in enumerate(token): - if c in '\'\"': - quote_char = c - break - prefix = token[:i].lower() # markers like u, b, r. - after_prefix = token[i:] - if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char: - string_body = after_prefix[3:-3] - else: - string_body = after_prefix[1:-1] # Chop off quotes - # No special checks on raw strings at the moment. - if 'r' not in prefix: - self.process_non_raw_string_token(prefix, string_body, - start_row, start_col) - - def process_non_raw_string_token(self, prefix, string_body, start_row, - start_col): - """check for bad escapes in a non-raw string. - - prefix: lowercase string of eg 'ur' string prefix markers. - string_body: the un-parsed body of the string, not including the quote - marks. - start_row: integer line number in the source. - start_col: integer column number in the source. - """ - # Walk through the string; if we see a backslash then escape the next - # character, and skip over it. If we see a non-escaped character, - # alert, and continue. - # - # Accept a backslash when it escapes a backslash, or a quote, or - # end-of-line, or one of the letters that introduce a special escape - # sequence <http://docs.python.org/reference/lexical_analysis.html> - # - # TODO(mbp): Maybe give a separate warning about the rarely-used - # \a \b \v \f? - # - # TODO(mbp): We could give the column of the problem character, but - # add_message doesn't seem to have a way to pass it through at present. - i = 0 - while True: - i = string_body.find('\\', i) - if i == -1: - break - # There must be a next character; having a backslash at the end - # of the string would be a SyntaxError. - next_char = string_body[i+1] - match = string_body[i:i+2] - if next_char in self.UNICODE_ESCAPE_CHARACTERS: - if 'u' in prefix: - pass - elif _PY3K and 'b' not in prefix: - pass # unicode by default - else: - self.add_message('W1402', line=start_row, args=(match, )) - elif next_char not in self.ESCAPE_CHARACTERS: - self.add_message('W1401', line=start_row, args=(match, )) - # Whether it was a valid escape or not, backslash followed by - # another character can always be consumed whole: the second - # character can never be the start of a new backslash escape. - i += 2 - - def register(linter): """required method to auto register this checker """ linter.register_checker(FormatChecker(linter)) - linter.register_checker(StringConstantChecker(linter)) diff --git a/checkers/strings.py b/checkers/strings.py index 978609062..5c7d1e797 100644 --- a/checkers/strings.py +++ b/checkers/strings.py @@ -18,12 +18,16 @@ """Checker for string formatting operations. """ +import sys +import tokenize + from logilab import astng -from pylint.interfaces import IASTNGChecker -from pylint.checkers import BaseChecker +from pylint.interfaces import IRawChecker, IASTNGChecker +from pylint.checkers import BaseChecker, BaseRawChecker from pylint.checkers import utils +_PY3K = sys.version_info >= (3, 0) MSGS = { 'E1300': ("Unsupported format character %r (%#02x) at index %d", @@ -186,8 +190,107 @@ class StringMethodsChecker(BaseChecker): args=(func.bound.name, func.name)) +class StringConstantChecker(BaseRawChecker): + """Check string literals""" + __implements__ = (IRawChecker, IASTNGChecker) + name = 'string_constant' + msgs = { + 'W1401': ('Anomalous backslash in string: \'%s\'. ' + 'String constant might be missing an r prefix.', + 'anomalous-backslash-in-string', + 'Used when a backslash is in a literal string but not as an ' + 'escape.'), + 'W1402': ('Anomalous Unicode escape in byte string: \'%s\'. ' + 'String constant might be missing an r or u prefix.', + 'anomalous-unicode-escape-in-string', + 'Used when an escape like \\u is encountered in a byte ' + 'string where it has no effect.'), + } + + # Characters that have a special meaning after a backslash in either + # Unicode or byte strings. + ESCAPE_CHARACTERS = 'abfnrtvx\n\r\t\\\'\"01234567' + + # TODO(mbp): Octal characters are quite an edge case today; people may + # prefer a separate warning where they occur. \0 should be allowed. + + # Characters that have a special meaning after a backslash but only in + # Unicode strings. + UNICODE_ESCAPE_CHARACTERS = 'uUN' + + def process_tokens(self, tokens): + for (tok_type, token, (start_row, start_col), _, _) in tokens: + if tok_type == tokenize.STRING: + # 'token' is the whole un-parsed token; we can look at the start + # of it to see whether it's a raw or unicode string etc. + self.process_string_token(token, start_row, start_col) + + def process_string_token(self, token, start_row, start_col): + for i, c in enumerate(token): + if c in '\'\"': + quote_char = c + break + prefix = token[:i].lower() # markers like u, b, r. + after_prefix = token[i:] + if after_prefix[:3] == after_prefix[-3:] == 3 * quote_char: + string_body = after_prefix[3:-3] + else: + string_body = after_prefix[1:-1] # Chop off quotes + # No special checks on raw strings at the moment. + if 'r' not in prefix: + self.process_non_raw_string_token(prefix, string_body, + start_row, start_col) + + def process_non_raw_string_token(self, prefix, string_body, start_row, + start_col): + """check for bad escapes in a non-raw string. + + prefix: lowercase string of eg 'ur' string prefix markers. + string_body: the un-parsed body of the string, not including the quote + marks. + start_row: integer line number in the source. + start_col: integer column number in the source. + """ + # Walk through the string; if we see a backslash then escape the next + # character, and skip over it. If we see a non-escaped character, + # alert, and continue. + # + # Accept a backslash when it escapes a backslash, or a quote, or + # end-of-line, or one of the letters that introduce a special escape + # sequence <http://docs.python.org/reference/lexical_analysis.html> + # + # TODO(mbp): Maybe give a separate warning about the rarely-used + # \a \b \v \f? + # + # TODO(mbp): We could give the column of the problem character, but + # add_message doesn't seem to have a way to pass it through at present. + i = 0 + while True: + i = string_body.find('\\', i) + if i == -1: + break + # There must be a next character; having a backslash at the end + # of the string would be a SyntaxError. + next_char = string_body[i+1] + match = string_body[i:i+2] + if next_char in self.UNICODE_ESCAPE_CHARACTERS: + if 'u' in prefix: + pass + elif _PY3K and 'b' not in prefix: + pass # unicode by default + else: + self.add_message('W1402', line=start_row, args=(match, )) + elif next_char not in self.ESCAPE_CHARACTERS: + self.add_message('W1401', line=start_row, args=(match, )) + # Whether it was a valid escape or not, backslash followed by + # another character can always be consumed whole: the second + # character can never be the start of a new backslash escape. + i += 2 + + def register(linter): """required method to auto register this checker """ linter.register_checker(StringFormatChecker(linter)) linter.register_checker(StringMethodsChecker(linter)) + linter.register_checker(StringConstantChecker(linter)) diff --git a/checkers/utils.py b/checkers/utils.py index aa4563ee4..da09e86a1 100644 --- a/checkers/utils.py +++ b/checkers/utils.py @@ -20,9 +20,11 @@ import re import string + from logilab import astng from logilab.astng import scoped_nodes from logilab.common.compat import builtins + BUILTINS_NAME = builtins.__name__ COMP_NODE_TYPES = astng.ListComp, astng.SetComp, astng.DictComp, astng.GenExpr @@ -365,6 +367,7 @@ def is_super_call(expr): return (isinstance(expr, astng.CallFunc) and isinstance(expr.func, astng.Name) and expr.func.name == 'super') + def is_attr_private(attrname): """Check that attribute name is private (at least two leading underscores, at most one trailing underscore) |