summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorDieter Verfaillie <dieterv@optionexplicit.be>2015-04-21 21:53:07 +0200
committerDieter Verfaillie <dieterv@optionexplicit.be>2015-06-25 21:29:05 +0200
commita1fb01e08df6e6f81d76220f5f5e576be4188997 (patch)
tree6ef80b4bcd2d5b9658702ac5b49f966a55920898
parentbce29907c6dff1ee45e766d0be5dfedbc4589750 (diff)
downloadgobject-introspection-wip/dieterv/drive-by-review.tar.gz
tests: Update misc/pep8.py to 1.6.2wip/dieterv/drive-by-review
Updated from: https://raw.githubusercontent.com/jcrocholl/pep8/1.6.2/pep8.py
-rw-r--r--Makefile.am4
-rw-r--r--giscanner/ast.py12
-rw-r--r--giscanner/gdumpparser.py4
-rw-r--r--giscanner/introspectablepass.py2
-rw-r--r--giscanner/maintransformer.py8
-rw-r--r--giscanner/message.py2
-rw-r--r--giscanner/sourcescanner.py2
-rw-r--r--giscanner/transformer.py4
-rw-r--r--misc/pep8.py976
9 files changed, 626 insertions, 388 deletions
diff --git a/Makefile.am b/Makefile.am
index cbd57121..9ea81910 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -63,9 +63,9 @@ PEP8_EXCLUDES=--exclude='.svn,CVS,.bzr,.hg,.git,__pycache__,.\#*'
check-local:
@echo "TEST: PEP-8 INQUISITION"
@find $(top_srcdir)/giscanner -name \*.py | sort | uniq | xargs \
- $(PYTHON) $(top_srcdir)/misc/pep8.py --max-line-length=99 --ignore=E128 $(PEP8_EXCLUDES)
+ $(PYTHON) $(top_srcdir)/misc/pep8.py --max-line-length=99 --ignore=E128,W503 $(PEP8_EXCLUDES)
@find $(top_srcdir)/tests -name \*.py | sort | uniq | xargs \
- $(PYTHON) $(top_srcdir)/misc/pep8.py --ignore=E127,E501 $(PEP8_EXCLUDES)
+ $(PYTHON) $(top_srcdir)/misc/pep8.py --ignore=E127,E402,E501,E731 $(PEP8_EXCLUDES)
check-pyflakes:
@echo " CHECK Pyflakes"
diff --git a/giscanner/ast.py b/giscanner/ast.py
index fe3c5672..c7ea2d74 100644
--- a/giscanner/ast.py
+++ b/giscanner/ast.py
@@ -172,13 +172,10 @@ class TypeUnknown(Type):
def __init__(self):
Type.__init__(self, _target_unknown=True)
-######
-## Fundamental types
-######
-# Two special ones
+# Fundamental types, two special ones
TYPE_NONE = Type(target_fundamental='none', ctype='void')
TYPE_ANY = Type(target_fundamental='gpointer', ctype='gpointer')
-# "Basic" types
+# Fundamental types, "Basic" types
TYPE_BOOLEAN = Type(target_fundamental='gboolean', ctype='gboolean')
TYPE_INT8 = Type(target_fundamental='gint8', ctype='gint8')
TYPE_UINT8 = Type(target_fundamental='guint8', ctype='guint8')
@@ -328,10 +325,7 @@ type_names['ssize_t'] = TYPE_LONG
# Obj-C
type_names['id'] = TYPE_ANY
-##
-## Parameters
-##
-
+# Parameters
PARAM_DIRECTION_IN = 'in'
PARAM_DIRECTION_OUT = 'out'
PARAM_DIRECTION_INOUT = 'inout'
diff --git a/giscanner/gdumpparser.py b/giscanner/gdumpparser.py
index e1fc9358..b49ceef1 100644
--- a/giscanner/gdumpparser.py
+++ b/giscanner/gdumpparser.py
@@ -203,7 +203,7 @@ blob containing data gleaned from GObject's primitive introspection."""
def _initparse_gobject_record(self, record):
if (record.name.startswith('ParamSpec')
- and not record.name in ('ParamSpecPool', 'ParamSpecClass', 'ParamSpecTypeInfo')):
+ and record.name not in ('ParamSpecPool', 'ParamSpecClass', 'ParamSpecTypeInfo')):
parent = None
if record.name != 'ParamSpec':
parent = ast.Type(target_giname='GObject.ParamSpec')
@@ -359,7 +359,7 @@ different --identifier-prefix.""" % (xmlnode.attrib['name'], self._namespace.ide
else:
self._namespace.append(node, replace=True)
- ## WORKAROUND ##
+ # WORKAROUND
# https://bugzilla.gnome.org/show_bug.cgi?id=550616
def _introspect_boxed_gstreamer_workaround(self, xmlnode):
node = ast.Boxed('ParamSpecMiniObject', gtype_name='GParamSpecMiniObject',
diff --git a/giscanner/introspectablepass.py b/giscanner/introspectablepass.py
index 3d67c73e..ac68b9f7 100644
--- a/giscanner/introspectablepass.py
+++ b/giscanner/introspectablepass.py
@@ -96,7 +96,7 @@ class IntrospectablePass(object):
if (is_parameter
and isinstance(target, ast.Callback)
- and not node.type.target_giname in ('GLib.DestroyNotify', 'Gio.AsyncReadyCallback')
+ and node.type.target_giname not in ('GLib.DestroyNotify', 'Gio.AsyncReadyCallback')
and node.scope is None):
self._parameter_warning(
parent,
diff --git a/giscanner/maintransformer.py b/giscanner/maintransformer.py
index df598adf..b138a121 100644
--- a/giscanner/maintransformer.py
+++ b/giscanner/maintransformer.py
@@ -349,7 +349,7 @@ class MainTransformer(object):
# (except enums and flags) or basic types that are
# as big as a gpointer
if array_type == ast.Array.GLIB_PTRARRAY:
- if ((element_type in ast.BASIC_GIR_TYPES and not element_type in ast.POINTER_TYPES)
+ if ((element_type in ast.BASIC_GIR_TYPES and element_type not in ast.POINTER_TYPES)
or isinstance(element_type, (ast.Enum, ast.Bitfield))):
message.warn("invalid (element-type) for a GPtrArray, "
"must be a pointer", annotations.position)
@@ -358,7 +358,7 @@ class MainTransformer(object):
if array_type == ast.Array.GLIB_BYTEARRAY:
if element_type == ast.TYPE_ANY:
array.element_type = ast.TYPE_UINT8
- elif not element_type in [ast.TYPE_UINT8, ast.TYPE_INT8, ast.TYPE_CHAR]:
+ elif element_type not in [ast.TYPE_UINT8, ast.TYPE_INT8, ast.TYPE_CHAR]:
message.warn("invalid (element-type) for a GByteArray, "
"must be one of guint8, gint8 or gchar",
annotations.position)
@@ -1250,8 +1250,8 @@ method or constructor of some type."""
"""Look for virtual methods from the class structure."""
if not node.glib_type_struct:
# https://bugzilla.gnome.org/show_bug.cgi?id=629080
- #message.warn_node(node,
- # "Failed to find class structure for %r" % (node.name, ))
+ # message.warn_node(node,
+ # "Failed to find class structure for %r" % (node.name, ))
return
node_type = node.create_type()
diff --git a/giscanner/message.py b/giscanner/message.py
index 34f23f94..6392dfc2 100644
--- a/giscanner/message.py
+++ b/giscanner/message.py
@@ -103,7 +103,7 @@ class MessageLogger(object):
self._warning_count += 1
- if not log_type in self._enable_warnings:
+ if log_type not in self._enable_warnings:
return
if type(positions) == set:
diff --git a/giscanner/sourcescanner.py b/giscanner/sourcescanner.py
index 9a814213..15ced3f5 100644
--- a/giscanner/sourcescanner.py
+++ b/giscanner/sourcescanner.py
@@ -236,7 +236,7 @@ class SourceScanner(object):
('-U', undefines)]:
for arg in (args or []):
opt = prefix + arg
- if not opt in self._cpp_options:
+ if opt not in self._cpp_options:
self._cpp_options.append(opt)
def parse_files(self, filenames):
diff --git a/giscanner/transformer.py b/giscanner/transformer.py
index db8e5a04..7acca226 100644
--- a/giscanner/transformer.py
+++ b/giscanner/transformer.py
@@ -96,7 +96,7 @@ class Transformer(object):
def parse(self, symbols):
for symbol in symbols:
- ## WORKAROUND ##
+ # WORKAROUND
# https://bugzilla.gnome.org/show_bug.cgi?id=550616
if symbol.ident in ['gst_g_error_get_type']:
continue
@@ -159,7 +159,7 @@ namespaces."""
if ns == self._namespace.name:
return self._namespace.get(giname)
# Fallback to the main namespace if not a dependency and matches a prefix
- if ns in self._namespace.identifier_prefixes and not ns in self._parsed_includes:
+ if ns in self._namespace.identifier_prefixes and ns not in self._parsed_includes:
message.warn(("Deprecated reference to identifier " +
"prefix %s in GIName %s") % (ns, name))
return self._namespace.get(giname)
diff --git a/misc/pep8.py b/misc/pep8.py
index 8413270f..d7907e53 100644
--- a/misc/pep8.py
+++ b/misc/pep8.py
@@ -1,7 +1,8 @@
#!/usr/bin/env python
# pep8.py - Check Python source code formatting, according to PEP 8
# Copyright (C) 2006-2009 Johann C. Rocholl <johann@rocholl.net>
-# Copyright (C) 2009-2013 Florent Xicluna <florent.xicluna@gmail.com>
+# Copyright (C) 2009-2014 Florent Xicluna <florent.xicluna@gmail.com>
+# Copyright (C) 2014 Ian Lee <ianlee1521@gmail.com>
#
# Permission is hereby granted, free of charge, to any person
# obtaining a copy of this software and associated documentation files
@@ -24,8 +25,7 @@
# SOFTWARE.
r"""
-Check Python source code formatting, according to PEP 8:
-http://www.python.org/dev/peps/pep-0008/
+Check Python source code formatting, according to PEP 8.
For usage and a list of options, try this:
$ python pep8.py -h
@@ -45,7 +45,7 @@ W warnings
700 statements
900 syntax error
"""
-__version__ = '1.4.6'
+from __future__ import with_statement
import os
import sys
@@ -62,13 +62,21 @@ try:
except ImportError:
from ConfigParser import RawConfigParser
-DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__'
-DEFAULT_IGNORE = 'E123,E226,E24'
-if sys.platform == 'win32':
- DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8')
-else:
- DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or
- os.path.expanduser('~/.config'), 'pep8')
+__version__ = '1.6.2'
+
+DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__,.tox'
+DEFAULT_IGNORE = 'E121,E123,E126,E226,E24,E704'
+try:
+ if sys.platform == 'win32':
+ USER_CONFIG = os.path.expanduser(r'~\.pep8')
+ else:
+ USER_CONFIG = os.path.join(
+ os.getenv('XDG_CONFIG_HOME') or os.path.expanduser('~/.config'),
+ 'pep8'
+ )
+except ImportError:
+ USER_CONFIG = None
+
PROJECT_CONFIG = ('setup.cfg', 'tox.ini', '.pep8')
TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite')
MAX_LINE_LENGTH = 79
@@ -87,18 +95,22 @@ WS_NEEDED_OPERATORS = frozenset([
'**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>',
'%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '='])
WHITESPACE = frozenset(' \t')
-SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE,
- tokenize.INDENT, tokenize.DEDENT])
+NEWLINE = frozenset([tokenize.NL, tokenize.NEWLINE])
+SKIP_TOKENS = NEWLINE.union([tokenize.INDENT, tokenize.DEDENT])
+# ERRORTOKEN is triggered by backticks in Python 3
+SKIP_COMMENTS = SKIP_TOKENS.union([tokenize.COMMENT, tokenize.ERRORTOKEN])
BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines']
INDENT_REGEX = re.compile(r'([ \t]*)')
RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,')
-RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+')
+RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,.*,\s*\w+\s*$')
ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b')
DOCSTRING_REGEX = re.compile(r'u?r?["\']')
EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]')
WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)')
-COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)')
+COMPARE_SINGLETON_REGEX = re.compile(r'\b(None|False|True)?\s*([=!]=)'
+ r'\s*(?(1)|(None|False|True))\b')
+COMPARE_NEGATIVE_REGEX = re.compile(r'\b(not)\s+[^][)(}{ ]+\s+(in|is)\s')
COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type'
r'|\s*\(\s*([^)]*[^ )])\s*\))')
KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS))
@@ -117,8 +129,7 @@ COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n'
def tabs_or_spaces(physical_line, indent_char):
- r"""
- Never mix tabs and spaces.
+ r"""Never mix tabs and spaces.
The most popular way of indenting Python is with spaces only. The
second-most popular way is with tabs only. Code indented with a mixture
@@ -137,9 +148,7 @@ def tabs_or_spaces(physical_line, indent_char):
def tabs_obsolete(physical_line):
- r"""
- For new projects, spaces-only are strongly recommended over tabs. Most
- editors have features that make this easy to do.
+ r"""For new projects, spaces-only are strongly recommended over tabs.
Okay: if True:\n return
W191: if True:\n\treturn
@@ -150,16 +159,7 @@ def tabs_obsolete(physical_line):
def trailing_whitespace(physical_line):
- r"""
- JCR: Trailing whitespace is superfluous.
- FBM: Except when it occurs as part of a blank line (i.e. the line is
- nothing but whitespace). According to Python docs[1] a line with only
- whitespace is considered a blank line, and is to be ignored. However,
- matching a blank line to its indentation level avoids mistakenly
- terminating a multi-line statement (e.g. class declaration) when
- pasting code into the standard Python interpreter.
-
- [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines
+ r"""Trailing whitespace is superfluous.
The warning returned varies on whether the line itself is blank, for easier
filtering for those who want to indent their blank lines.
@@ -179,30 +179,24 @@ def trailing_whitespace(physical_line):
return 0, "W293 blank line contains whitespace"
-def trailing_blank_lines(physical_line, lines, line_number):
- r"""
- JCR: Trailing blank lines are superfluous.
+def trailing_blank_lines(physical_line, lines, line_number, total_lines):
+ r"""Trailing blank lines are superfluous.
Okay: spam(1)
W391: spam(1)\n
- """
- if not physical_line.rstrip() and line_number == len(lines):
- return 0, "W391 blank line at end of file"
-
-def missing_newline(physical_line):
- """
- JCR: The last line should have a newline.
-
- Reports warning W292.
+ However the last line should end with a new line (warning W292).
"""
- if physical_line.rstrip() == physical_line:
- return len(physical_line), "W292 no newline at end of file"
+ if line_number == total_lines:
+ stripped_last_line = physical_line.rstrip()
+ if not stripped_last_line:
+ return 0, "W391 blank line at end of file"
+ if stripped_last_line == physical_line:
+ return len(physical_line), "W292 no newline at end of file"
-def maximum_line_length(physical_line, max_line_length):
- """
- Limit all lines to a maximum of 79 characters.
+def maximum_line_length(physical_line, max_line_length, multiline):
+ r"""Limit all lines to a maximum of 79 characters.
There are still many devices around that are limited to 80 character
lines; plus, limiting windows to 80 characters makes it possible to have
@@ -216,6 +210,13 @@ def maximum_line_length(physical_line, max_line_length):
line = physical_line.rstrip()
length = len(line)
if length > max_line_length and not noqa(line):
+ # Special case for long URLs in multi-line docstrings or comments,
+ # but still report the error when the 72 first chars are whitespaces.
+ chunks = line.split()
+ if ((len(chunks) == 1 and multiline) or
+ (len(chunks) == 2 and chunks[0] == '#')) and \
+ len(line) - len(chunks[-1]) < max_line_length - 7:
+ return
if hasattr(line, 'decode'): # Python 2
# The line could contain multi-byte characters
try:
@@ -233,9 +234,8 @@ def maximum_line_length(physical_line, max_line_length):
def blank_lines(logical_line, blank_lines, indent_level, line_number,
- previous_logical, previous_indent_level):
- r"""
- Separate top-level function and class definitions with two blank lines.
+ blank_before, previous_logical, previous_indent_level):
+ r"""Separate top-level function and class definitions with two blank lines.
Method definitions inside a class are separated by a single blank line.
@@ -263,19 +263,18 @@ def blank_lines(logical_line, blank_lines, indent_level, line_number,
yield 0, "E303 too many blank lines (%d)" % blank_lines
elif logical_line.startswith(('def ', 'class ', '@')):
if indent_level:
- if not (blank_lines or previous_indent_level < indent_level or
+ if not (blank_before or previous_indent_level < indent_level or
DOCSTRING_REGEX.match(previous_logical)):
yield 0, "E301 expected 1 blank line, found 0"
- elif blank_lines != 2:
- yield 0, "E302 expected 2 blank lines, found %d" % blank_lines
+ elif blank_before != 2:
+ yield 0, "E302 expected 2 blank lines, found %d" % blank_before
def extraneous_whitespace(logical_line):
- """
- Avoid extraneous whitespace in the following situations:
+ r"""Avoid extraneous whitespace.
+ Avoid extraneous whitespace in these situations:
- Immediately inside parentheses, brackets or braces.
-
- Immediately before a comma, semicolon, or colon.
Okay: spam(ham[1], {eggs: 2})
@@ -304,8 +303,7 @@ def extraneous_whitespace(logical_line):
def whitespace_around_keywords(logical_line):
- r"""
- Avoid extraneous whitespace around keywords.
+ r"""Avoid extraneous whitespace around keywords.
Okay: True and False
E271: True and False
@@ -328,8 +326,7 @@ def whitespace_around_keywords(logical_line):
def missing_whitespace(logical_line):
- """
- JCR: Each comma, semicolon or colon should be followed by whitespace.
+ r"""Each comma, semicolon or colon should be followed by whitespace.
Okay: [a, b]
Okay: (3,)
@@ -356,8 +353,7 @@ def missing_whitespace(logical_line):
def indentation(logical_line, previous_logical, indent_char,
indent_level, previous_indent_level):
- r"""
- Use 4 spaces per indentation level.
+ r"""Use 4 spaces per indentation level.
For really old code that you don't want to mess up, you can continue to
use 8-space tabs.
@@ -365,33 +361,37 @@ def indentation(logical_line, previous_logical, indent_char,
Okay: a = 1
Okay: if a == 0:\n a = 1
E111: a = 1
+ E114: # a = 1
Okay: for item in items:\n pass
E112: for item in items:\npass
+ E115: for item in items:\n# Hi\n pass
Okay: a = 1\nb = 2
E113: a = 1\n b = 2
+ E116: a = 1\n # b = 2
"""
- if indent_char == ' ' and indent_level % 4:
- yield 0, "E111 indentation is not a multiple of four"
+ c = 0 if logical_line else 3
+ tmpl = "E11%d %s" if logical_line else "E11%d %s (comment)"
+ if indent_level % 4:
+ yield 0, tmpl % (1 + c, "indentation is not a multiple of four")
indent_expect = previous_logical.endswith(':')
if indent_expect and indent_level <= previous_indent_level:
- yield 0, "E112 expected an indented block"
- if indent_level > previous_indent_level and not indent_expect:
- yield 0, "E113 unexpected indentation"
+ yield 0, tmpl % (2 + c, "expected an indented block")
+ elif not indent_expect and indent_level > previous_indent_level:
+ yield 0, tmpl % (3 + c, "unexpected indentation")
def continued_indentation(logical_line, tokens, indent_level, hang_closing,
- noqa, verbose):
- r"""
- Continuation lines should align wrapped elements either vertically using
- Python's implicit line joining inside parentheses, brackets and braces, or
- using a hanging indent.
+ indent_char, noqa, verbose):
+ r"""Continuation lines indentation.
- When using a hanging indent the following considerations should be applied:
+ Continuation lines should align wrapped elements either vertically
+ using Python's implicit line joining inside parentheses, brackets
+ and braces, or using a hanging indent.
+ When using a hanging indent these considerations should be applied:
- there should be no arguments on the first line, and
-
- further indentation should be used to clearly distinguish itself as a
continuation line.
@@ -403,10 +403,12 @@ def continued_indentation(logical_line, tokens, indent_level, hang_closing,
E122: a = (\n42)
E123: a = (\n 42\n )
E124: a = (24,\n 42\n)
- E125: if (a or\n b):\n pass
+ E125: if (\n b):\n pass
E126: a = (\n 42)
E127: a = (24,\n 42)
E128: a = (24,\n 42)
+ E129: if (a or\n b):\n pass
+ E131: a = (\n 42\n 24)
"""
first_row = tokens[0][2][0]
nrows = 1 + tokens[-1][2][0] - first_row
@@ -420,13 +422,21 @@ def continued_indentation(logical_line, tokens, indent_level, hang_closing,
indent_next = logical_line.endswith(':')
row = depth = 0
+ valid_hangs = (4,) if indent_char != '\t' else (4, 8)
# remember how many brackets were opened on each line
parens = [0] * nrows
# relative indents of physical lines
rel_indent = [0] * nrows
+ # for each depth, collect a list of opening rows
+ open_rows = [[0]]
+ # for each depth, memorize the hanging indentation
+ hangs = [None]
# visual indents
indent_chances = {}
last_indent = tokens[0][2]
+ visual_indent = None
+ last_token_multiline = False
+ # for each depth, memorize the visual indent column
indent = [last_indent[1]]
if verbose >= 3:
print(">>> " + tokens[0][4].rstrip())
@@ -436,8 +446,7 @@ def continued_indentation(logical_line, tokens, indent_level, hang_closing,
newline = row < start[0] - first_row
if newline:
row = start[0] - first_row
- newline = (not last_token_multiline and
- token_type not in (tokenize.NL, tokenize.NEWLINE))
+ newline = not last_token_multiline and token_type not in NEWLINE
if newline:
# this is the beginning of a continuation line.
@@ -448,17 +457,18 @@ def continued_indentation(logical_line, tokens, indent_level, hang_closing,
# record the initial indent.
rel_indent[row] = expand_indent(line) - indent_level
- if depth:
- # a bracket expression in a continuation line.
- # find the line that it was opened on
- for open_row in range(row - 1, -1, -1):
- if parens[open_row]:
- break
- else:
- # an unbracketed continuation line (ie, backslash)
- open_row = 0
- hang = rel_indent[row] - rel_indent[open_row]
+ # identify closing bracket
close_bracket = (token_type == tokenize.OP and text in ']})')
+
+ # is the indent relative to an opening bracket line?
+ for open_row in reversed(open_rows[depth]):
+ hang = rel_indent[row] - rel_indent[open_row]
+ hanging_indent = hang in valid_hangs
+ if hanging_indent:
+ break
+ if hangs[depth]:
+ hanging_indent = (hang == hangs[depth])
+ # is there any chance of visual indent?
visual_indent = (not close_bracket and hang > 0 and
indent_chances.get(start[1]))
@@ -471,37 +481,43 @@ def continued_indentation(logical_line, tokens, indent_level, hang_closing,
# closing bracket matches indentation of opening bracket's line
if hang_closing:
yield start, "E133 closing bracket is missing indentation"
- elif visual_indent is True:
- # visual indent is verified
- if not indent[depth]:
- indent[depth] = start[1]
- elif visual_indent in (text, str):
- # ignore token lined up with matching one from a previous line
- pass
elif indent[depth] and start[1] < indent[depth]:
- # visual indent is broken
- yield (start, "E128 continuation line "
- "under-indented for visual indent")
- elif hang == 4 or (indent_next and rel_indent[row] == 8):
+ if visual_indent is not True:
+ # visual indent is broken
+ yield (start, "E128 continuation line "
+ "under-indented for visual indent")
+ elif hanging_indent or (indent_next and rel_indent[row] == 8):
# hanging indent is verified
if close_bracket and not hang_closing:
yield (start, "E123 closing bracket does not match "
"indentation of opening bracket's line")
+ hangs[depth] = hang
+ elif visual_indent is True:
+ # visual indent is verified
+ indent[depth] = start[1]
+ elif visual_indent in (text, str):
+ # ignore token lined up with matching one from a previous line
+ pass
else:
# indent is broken
if hang <= 0:
error = "E122", "missing indentation or outdented"
elif indent[depth]:
error = "E127", "over-indented for visual indent"
- elif hang % 4:
- error = "E121", "indentation is not a multiple of four"
+ elif not close_bracket and hangs[depth]:
+ error = "E131", "unaligned for hanging indent"
else:
- error = "E126", "over-indented for hanging indent"
+ hangs[depth] = hang
+ if hang > 4:
+ error = "E126", "over-indented for hanging indent"
+ else:
+ error = "E121", "under-indented for hanging indent"
yield start, "%s continuation line %s" % error
# look for visual indenting
- if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT)
- and not indent[depth]):
+ if (parens[row] and
+ token_type not in (tokenize.NL, tokenize.COMMENT) and
+ not indent[depth]):
indent[depth] = start[1]
indent_chances[start[1]] = True
if verbose >= 4:
@@ -513,12 +529,18 @@ def continued_indentation(logical_line, tokens, indent_level, hang_closing,
# special case for the "if" statement because len("if (") == 4
elif not indent_chances and not row and not depth and text == 'if':
indent_chances[end[1] + 1] = True
+ elif text == ':' and line[end[1]:].isspace():
+ open_rows[depth].append(row)
# keep track of bracket depth
if token_type == tokenize.OP:
if text in '([{':
depth += 1
indent.append(0)
+ hangs.append(None)
+ if len(open_rows) == depth:
+ open_rows.append([])
+ open_rows[depth].append(row)
parens[row] += 1
if verbose >= 4:
print("bracket depth %s seen, col %s, visual min = %s" %
@@ -526,19 +548,20 @@ def continued_indentation(logical_line, tokens, indent_level, hang_closing,
elif text in ')]}' and depth > 0:
# parent indents should not be more than this one
prev_indent = indent.pop() or last_indent[1]
+ hangs.pop()
for d in range(depth):
if indent[d] > prev_indent:
indent[d] = 0
for ind in list(indent_chances):
if ind >= prev_indent:
del indent_chances[ind]
+ del open_rows[depth + 1:]
depth -= 1
if depth:
indent_chances[indent[depth]] = True
for idx in range(row, -1, -1):
if parens[idx]:
parens[idx] -= 1
- rel_indent[row] = rel_indent[idx]
break
assert len(indent) == depth + 1
if start[1] not in indent_chances:
@@ -546,21 +569,25 @@ def continued_indentation(logical_line, tokens, indent_level, hang_closing,
indent_chances[start[1]] = text
last_token_multiline = (start[0] != end[0])
+ if last_token_multiline:
+ rel_indent[end[0] - first_row] = rel_indent[row]
if indent_next and expand_indent(line) == indent_level + 4:
- yield (last_indent, "E125 continuation line does not distinguish "
- "itself from next logical line")
+ pos = (start[0], indent[0] + 4)
+ if visual_indent:
+ code = "E129 visually indented line"
+ else:
+ code = "E125 continuation line"
+ yield pos, "%s with same indent as next logical line" % code
def whitespace_before_parameters(logical_line, tokens):
- """
- Avoid extraneous whitespace in the following situations:
+ r"""Avoid extraneous whitespace.
- - Immediately before the open parenthesis that starts the argument
- list of a function call.
-
- - Immediately before the open parenthesis that starts an indexing or
- slicing.
+ Avoid extraneous whitespace in the following situations:
+ - before the open parenthesis that starts the argument list of a
+ function call.
+ - before the open parenthesis that starts an indexing or slicing.
Okay: spam(1)
E211: spam (1)
@@ -587,11 +614,7 @@ def whitespace_before_parameters(logical_line, tokens):
def whitespace_around_operator(logical_line):
- r"""
- Avoid extraneous whitespace in the following situations:
-
- - More than one space around an assignment (or other) operator to
- align it with another.
+ r"""Avoid extraneous whitespace around an operator.
Okay: a = 12 + 3
E221: a = 4 + 5
@@ -614,13 +637,15 @@ def whitespace_around_operator(logical_line):
def missing_whitespace_around_operator(logical_line, tokens):
- r"""
+ r"""Surround operators with a single space on either side.
+
- Always surround these binary operators with a single space on
either side: assignment (=), augmented assignment (+=, -= etc.),
- comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not),
+ comparisons (==, <, >, !=, <=, >=, in, not in, is, is not),
Booleans (and, or, not).
- - Use spaces around arithmetic operators.
+ - If operators with different priorities are used, consider adding
+ whitespace around the operators with the lowest priorities.
Okay: i = i + 1
Okay: submitted += 1
@@ -644,8 +669,7 @@ def missing_whitespace_around_operator(logical_line, tokens):
prev_type = tokenize.OP
prev_text = prev_end = None
for token_type, text, start, end, line in tokens:
- if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN):
- # ERRORTOKEN is triggered by backticks in Python 3
+ if token_type in SKIP_COMMENTS:
continue
if text in ('(', 'lambda'):
parens += 1
@@ -666,7 +690,7 @@ def missing_whitespace_around_operator(logical_line, tokens):
if need_space is True or need_space[1]:
# A needed trailing space was not found
yield prev_end, "E225 missing whitespace around operator"
- else:
+ elif prev_text != '**':
code, optype = 'E226', 'arithmetic'
if prev_text == '%':
code, optype = 'E228', 'modulo'
@@ -685,14 +709,8 @@ def missing_whitespace_around_operator(logical_line, tokens):
# Check if the operator is being used as a binary operator
# Allow unary operators: -123, -x, +1.
# Allow argument unpacking: foo(*args, **kwargs).
- if prev_type == tokenize.OP:
- binary_usage = (prev_text in '}])')
- elif prev_type == tokenize.NAME:
- binary_usage = (prev_text not in KEYWORDS)
- else:
- binary_usage = (prev_type not in SKIP_TOKENS)
-
- if binary_usage:
+ if (prev_text in '}])' if prev_type == tokenize.OP
+ else prev_text not in KEYWORDS):
need_space = None
elif text in WS_OPTIONAL_OPERATORS:
need_space = None
@@ -711,11 +729,7 @@ def missing_whitespace_around_operator(logical_line, tokens):
def whitespace_around_comma(logical_line):
- r"""
- Avoid extraneous whitespace in the following situations:
-
- - More than one space around an assignment (or other) operator to
- align it with another.
+ r"""Avoid extraneous whitespace after a comma or a colon.
Note: these checks are disabled by default
@@ -733,7 +747,8 @@ def whitespace_around_comma(logical_line):
def whitespace_around_named_parameter_equals(logical_line, tokens):
- """
+ r"""Don't use spaces around the '=' sign in function arguments.
+
Don't use spaces around the '=' sign when used to indicate a
keyword argument or a default parameter value.
@@ -743,6 +758,7 @@ def whitespace_around_named_parameter_equals(logical_line, tokens):
Okay: boolean(a != b)
Okay: boolean(a <= b)
Okay: boolean(a >= b)
+ Okay: def foo(arg: int = 42):
E251: def complex(real, imag = 0.0):
E251: return magic(r = real, i = imag)
@@ -750,56 +766,78 @@ def whitespace_around_named_parameter_equals(logical_line, tokens):
parens = 0
no_space = False
prev_end = None
+ annotated_func_arg = False
+ in_def = logical_line.startswith('def')
message = "E251 unexpected spaces around keyword / parameter equals"
for token_type, text, start, end, line in tokens:
+ if token_type == tokenize.NL:
+ continue
if no_space:
no_space = False
if start != prev_end:
yield (prev_end, message)
- elif token_type == tokenize.OP:
+ if token_type == tokenize.OP:
if text == '(':
parens += 1
elif text == ')':
parens -= 1
- elif parens and text == '=':
+ elif in_def and text == ':' and parens == 1:
+ annotated_func_arg = True
+ elif parens and text == ',' and parens == 1:
+ annotated_func_arg = False
+ elif parens and text == '=' and not annotated_func_arg:
no_space = True
if start != prev_end:
yield (prev_end, message)
+ if not parens:
+ annotated_func_arg = False
+
prev_end = end
-def whitespace_before_inline_comment(logical_line, tokens):
- """
- Separate inline comments by at least two spaces.
+def whitespace_before_comment(logical_line, tokens):
+ r"""Separate inline comments by at least two spaces.
An inline comment is a comment on the same line as a statement. Inline
comments should be separated by at least two spaces from the statement.
They should start with a # and a single space.
+ Each line of a block comment starts with a # and a single space
+ (unless it is indented text inside the comment).
+
Okay: x = x + 1 # Increment x
Okay: x = x + 1 # Increment x
+ Okay: # Block comment
E261: x = x + 1 # Increment x
E262: x = x + 1 #Increment x
E262: x = x + 1 # Increment x
+ E265: #Block comment
+ E266: ### Block comment
"""
prev_end = (0, 0)
for token_type, text, start, end, line in tokens:
if token_type == tokenize.COMMENT:
- if not line[:start[1]].strip():
- continue
- if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
- yield (prev_end,
- "E261 at least two spaces before inline comment")
+ inline_comment = line[:start[1]].strip()
+ if inline_comment:
+ if prev_end[0] == start[0] and start[1] < prev_end[1] + 2:
+ yield (prev_end,
+ "E261 at least two spaces before inline comment")
symbol, sp, comment = text.partition(' ')
- if symbol not in ('#', '#:') or comment[:1].isspace():
- yield start, "E262 inline comment should start with '# '"
+ bad_prefix = symbol not in '#:' and (symbol.lstrip('#')[:1] or '#')
+ if inline_comment:
+ if bad_prefix or comment[:1] in WHITESPACE:
+ yield start, "E262 inline comment should start with '# '"
+ elif bad_prefix and (bad_prefix != '!' or start[0] > 1):
+ if bad_prefix != '#':
+ yield start, "E265 block comment should start with '# '"
+ elif comment:
+ yield start, "E266 too many leading '#' for block comment"
elif token_type != tokenize.NL:
prev_end = end
def imports_on_separate_lines(logical_line):
- r"""
- Imports should usually be on separate lines.
+ r"""Imports should usually be on separate lines.
Okay: import os\nimport sys
E401: import sys, os
@@ -817,14 +855,65 @@ def imports_on_separate_lines(logical_line):
yield found, "E401 multiple imports on one line"
+def module_imports_on_top_of_file(
+ logical_line, indent_level, checker_state, noqa):
+ r"""Imports are always put at the top of the file, just after any module
+ comments and docstrings, and before module globals and constants.
+
+ Okay: import os
+ Okay: # this is a comment\nimport os
+ Okay: '''this is a module docstring'''\nimport os
+ Okay: r'''this is a module docstring'''\nimport os
+ Okay: try:\n import x\nexcept:\n pass\nelse:\n pass\nimport y
+ Okay: try:\n import x\nexcept:\n pass\nfinally:\n pass\nimport y
+ E402: a=1\nimport os
+ E402: 'One string'\n"Two string"\nimport os
+ E402: a=1\nfrom sys import x
+
+ Okay: if x:\n import os
+ """
+ def is_string_literal(line):
+ if line[0] in 'uUbB':
+ line = line[1:]
+ if line and line[0] in 'rR':
+ line = line[1:]
+ return line and (line[0] == '"' or line[0] == "'")
+
+ allowed_try_keywords = ('try', 'except', 'else', 'finally')
+
+ if indent_level: # Allow imports in conditional statements or functions
+ return
+ if not logical_line: # Allow empty lines or comments
+ return
+ if noqa:
+ return
+ line = logical_line
+ if line.startswith('import ') or line.startswith('from '):
+ if checker_state.get('seen_non_imports', False):
+ yield 0, "E402 module level import not at top of file"
+ elif any(line.startswith(kw) for kw in allowed_try_keywords):
+ # Allow try, except, else, finally keywords intermixed with imports in
+ # order to support conditional importing
+ return
+ elif is_string_literal(line):
+ # The first literal is a docstring, allow it. Otherwise, report error.
+ if checker_state.get('seen_docstring', False):
+ checker_state['seen_non_imports'] = True
+ else:
+ checker_state['seen_docstring'] = True
+ else:
+ checker_state['seen_non_imports'] = True
+
+
def compound_statements(logical_line):
- r"""
- Compound statements (multiple statements on the same line) are
- generally discouraged.
+ r"""Compound statements (on the same line) are generally discouraged.
While sometimes it's okay to put an if/for/while with a small body
- on the same line, never do this for multi-clause statements. Also
- avoid folding such long lines!
+ on the same line, never do this for multi-clause statements.
+ Also avoid folding such long lines!
+
+ Always use a def statement instead of an assignment statement that
+ binds a lambda expression directly to a name.
Okay: if foo == 'blah':\n do_blah_thing()
Okay: do_one()
@@ -839,20 +928,30 @@ def compound_statements(logical_line):
E701: try: something()
E701: finally: cleanup()
E701: if foo == 'blah': one(); two(); three()
-
E702: do_one(); do_two(); do_three()
E703: do_four(); # useless semicolon
+ E704: def f(x): return 2*x
+ E731: f = lambda x: 2*x
"""
line = logical_line
last_char = len(line) - 1
found = line.find(':')
while -1 < found < last_char:
before = line[:found]
- if (before.count('{') <= before.count('}') and # {'a': 1} (dict)
- before.count('[') <= before.count(']') and # [1:2] (slice)
- before.count('(') <= before.count(')') and # (Python 3 annotation)
- not LAMBDA_REGEX.search(before)): # lambda x: x
- yield found, "E701 multiple statements on one line (colon)"
+ if ((before.count('{') <= before.count('}') and # {'a': 1} (dict)
+ before.count('[') <= before.count(']') and # [1:2] (slice)
+ before.count('(') <= before.count(')'))): # (annotation)
+ lambda_kw = LAMBDA_REGEX.search(before)
+ if lambda_kw:
+ before = line[:lambda_kw.start()].rstrip()
+ if before[-1:] == '=' and isidentifier(before[:-1].strip()):
+ yield 0, ("E731 do not assign a lambda expression, use a "
+ "def")
+ break
+ if before.startswith('def '):
+ yield 0, "E704 multiple statements on one line (def)"
+ else:
+ yield found, "E701 multiple statements on one line (colon)"
found = line.find(':', found + 1)
found = line.find(';')
while -1 < found:
@@ -864,8 +963,7 @@ def compound_statements(logical_line):
def explicit_line_join(logical_line, tokens):
- r"""
- Avoid explicit line join between brackets.
+ r"""Avoid explicit line join between brackets.
The preferred way of wrapping long lines is by using Python's implied line
continuation inside parentheses, brackets and braces. Long lines can be
@@ -878,10 +976,15 @@ def explicit_line_join(logical_line, tokens):
Okay: aaa = [123,\n 123]
Okay: aaa = ("bbb "\n "ccc")
Okay: aaa = "bbb " \\n "ccc"
+ Okay: aaa = 123 # \\
"""
prev_start = prev_end = parens = 0
+ comment = False
+ backslash = None
for token_type, text, start, end, line in tokens:
- if start[0] != prev_start and parens and backslash:
+ if token_type == tokenize.COMMENT:
+ comment = True
+ if start[0] != prev_start and parens and backslash and not comment:
yield backslash, "E502 the backslash is redundant between brackets"
if end[0] != prev_end:
if line.rstrip('\r\n').endswith('\\'):
@@ -898,14 +1001,56 @@ def explicit_line_join(logical_line, tokens):
parens -= 1
-def comparison_to_singleton(logical_line, noqa):
+def break_around_binary_operator(logical_line, tokens):
+ r"""
+ Avoid breaks before binary operators.
+
+ The preferred place to break around a binary operator is after the
+ operator, not before it.
+
+ W503: (width == 0\n + height == 0)
+ W503: (width == 0\n and height == 0)
+
+ Okay: (width == 0 +\n height == 0)
+ Okay: foo(\n -x)
+ Okay: foo(x\n [])
+ Okay: x = '''\n''' + ''
+ Okay: foo(x,\n -y)
+ Okay: foo(x, # comment\n -y)
"""
+ def is_binary_operator(token_type, text):
+ # The % character is strictly speaking a binary operator, but the
+ # common usage seems to be to put it next to the format parameters,
+ # after a line break.
+ return ((token_type == tokenize.OP or text in ['and', 'or']) and
+ text not in "()[]{},:.;@=%")
+
+ line_break = False
+ unary_context = True
+ for token_type, text, start, end, line in tokens:
+ if token_type == tokenize.COMMENT:
+ continue
+ if ('\n' in text or '\r' in text) and token_type != tokenize.STRING:
+ line_break = True
+ else:
+ if (is_binary_operator(token_type, text) and line_break and
+ not unary_context):
+ yield start, "W503 line break before binary operator"
+ unary_context = text in '([{,;'
+ line_break = False
+
+
+def comparison_to_singleton(logical_line, noqa):
+ r"""Comparison to singletons should use "is" or "is not".
+
Comparisons to singletons like None should always be done
with "is" or "is not", never the equality operators.
Okay: if arg is not None:
E711: if arg != None:
+ E711: if None == arg:
E712: if arg == True:
+ E712: if False == arg:
Also, beware of writing if x when you really mean if x is not None --
e.g. when testing whether a variable or argument that defaults to None was
@@ -914,8 +1059,9 @@ def comparison_to_singleton(logical_line, noqa):
"""
match = not noqa and COMPARE_SINGLETON_REGEX.search(logical_line)
if match:
- same = (match.group(1) == '==')
- singleton = match.group(2)
+ singleton = match.group(1) or match.group(3)
+ same = (match.group(2) == '==')
+
msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton)
if singleton in ('None',):
code = 'E711'
@@ -924,14 +1070,35 @@ def comparison_to_singleton(logical_line, noqa):
nonzero = ((singleton == 'True' and same) or
(singleton == 'False' and not same))
msg += " or 'if %scond:'" % ('' if nonzero else 'not ')
- yield match.start(1), ("%s comparison to %s should be %s" %
+ yield match.start(2), ("%s comparison to %s should be %s" %
(code, singleton, msg))
-def comparison_type(logical_line):
+def comparison_negative(logical_line):
+ r"""Negative comparison should be done using "not in" and "is not".
+
+ Okay: if x not in y:\n pass
+ Okay: assert (X in Y or X is Z)
+ Okay: if not (X in Y):\n pass
+ Okay: zz = x is not y
+ E713: Z = not X in Y
+ E713: if not X.B in Y:\n pass
+ E714: if not X is Y:\n pass
+ E714: Z = not X.B is Y
"""
- Object type comparisons should always use isinstance() instead of
- comparing types directly.
+ match = COMPARE_NEGATIVE_REGEX.search(logical_line)
+ if match:
+ pos = match.start(1)
+ if match.group(2) == 'in':
+ yield pos, "E713 test for membership should be 'not in'"
+ else:
+ yield pos, "E714 test for object identity should be 'is not'"
+
+
+def comparison_type(logical_line, noqa):
+ r"""Object type comparisons should always use isinstance().
+
+ Do not compare types directly.
Okay: if isinstance(obj, int):
E721: if type(obj) is type(1):
@@ -944,35 +1111,28 @@ def comparison_type(logical_line):
Okay: if type(a1) is type(b1):
"""
match = COMPARE_TYPE_REGEX.search(logical_line)
- if match:
+ if match and not noqa:
inst = match.group(1)
if inst and isidentifier(inst) and inst not in SINGLETONS:
return # Allow comparison for types which are not obvious
yield match.start(), "E721 do not compare types, use 'isinstance()'"
-def python_3000_has_key(logical_line):
- r"""
- The {}.has_key() method is removed in the Python 3.
- Use the 'in' operation instead.
+def python_3000_has_key(logical_line, noqa):
+ r"""The {}.has_key() method is removed in Python 3: use the 'in' operator.
Okay: if "alph" in d:\n print d["alph"]
W601: assert d.has_key('alph')
"""
pos = logical_line.find('.has_key(')
- if pos > -1:
+ if pos > -1 and not noqa:
yield pos, "W601 .has_key() is deprecated, use 'in'"
def python_3000_raise_comma(logical_line):
- """
- When raising an exception, use "raise ValueError('message')"
- instead of the older form "raise ValueError, 'message'".
+ r"""When raising an exception, use "raise ValueError('message')".
- The paren-using form is preferred because when the exception arguments
- are long or include string formatting, you don't need to use line
- continuation characters thanks to the containing parentheses. The older
- form is removed in Python 3.
+ The older form is removed in Python 3.
Okay: raise DummyError("Message")
W602: raise DummyError, "Message"
@@ -983,9 +1143,8 @@ def python_3000_raise_comma(logical_line):
def python_3000_not_equal(logical_line):
- """
- != can also be written <>, but this is an obsolete usage kept for
- backwards compatibility only. New code should always use !=.
+ r"""New code should always use != instead of <>.
+
The older syntax is removed in Python 3.
Okay: if a != 'no':
@@ -997,9 +1156,7 @@ def python_3000_not_equal(logical_line):
def python_3000_backticks(logical_line):
- """
- Backticks are removed in Python 3.
- Use repr() instead.
+ r"""Backticks are removed in Python 3: use repr() instead.
Okay: val = repr(1 + 2)
W604: val = `1 + 2`
@@ -1017,47 +1174,40 @@ def python_3000_backticks(logical_line):
if '' == ''.encode():
# Python 2: implicit encoding.
def readlines(filename):
- f = open(filename)
- try:
+ """Read the source code."""
+ with open(filename, 'rU') as f:
return f.readlines()
- finally:
- f.close()
- isidentifier = re.compile(r'[a-zA-Z_]\w*').match
+ isidentifier = re.compile(r'[a-zA-Z_]\w*$').match
stdin_get_value = sys.stdin.read
else:
# Python 3
def readlines(filename):
- f = open(filename, 'rb')
+ """Read the source code."""
try:
- coding, lines = tokenize.detect_encoding(f.readline)
- f = TextIOWrapper(f, coding, line_buffering=True)
- return [l.decode(coding) for l in lines] + f.readlines()
+ with open(filename, 'rb') as f:
+ (coding, lines) = tokenize.detect_encoding(f.readline)
+ f = TextIOWrapper(f, coding, line_buffering=True)
+ return [l.decode(coding) for l in lines] + f.readlines()
except (LookupError, SyntaxError, UnicodeError):
- f.close()
- # Fall back if files are improperly declared
- f = open(filename, encoding='latin-1')
- return f.readlines()
- finally:
- f.close()
+ # Fall back if file encoding is improperly declared
+ with open(filename, encoding='latin-1') as f:
+ return f.readlines()
isidentifier = str.isidentifier
def stdin_get_value():
return TextIOWrapper(sys.stdin.buffer, errors='ignore').read()
-readlines.__doc__ = " Read the source code."
noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search
def expand_indent(line):
- r"""
- Return the amount of indentation.
+ r"""Return the amount of indentation.
+
Tabs are expanded to the next multiple of 8.
>>> expand_indent(' ')
4
>>> expand_indent('\t')
8
- >>> expand_indent(' \t')
- 8
>>> expand_indent(' \t')
8
>>> expand_indent(' \t')
@@ -1077,8 +1227,7 @@ def expand_indent(line):
def mute_string(text):
- """
- Replace contents with 'xxx' to prevent syntax matching.
+ """Replace contents with 'xxx' to prevent syntax matching.
>>> mute_string('"abc"')
'"xxx"'
@@ -1110,7 +1259,7 @@ def parse_udiff(diff, patterns=None, parent='.'):
continue
if line[:3] == '@@ ':
hunk_match = HUNK_REGEX.match(line)
- row, nrows = [int(g or '1') for g in hunk_match.groups()]
+ (row, nrows) = [int(g or '1') for g in hunk_match.groups()]
rv[path].update(range(row, row + nrows))
elif line[:3] == '+++':
path = line[4:].split('\t', 1)[0]
@@ -1122,9 +1271,27 @@ def parse_udiff(diff, patterns=None, parent='.'):
if rows and filename_match(path, patterns)])
-def filename_match(filename, patterns, default=True):
+def normalize_paths(value, parent=os.curdir):
+ """Parse a comma-separated list of paths.
+
+ Return a list of absolute paths.
"""
- Check if patterns contains a pattern that matches filename.
+ if not value:
+ return []
+ if isinstance(value, list):
+ return value
+ paths = []
+ for path in value.split(','):
+ path = path.strip()
+ if '/' in path:
+ path = os.path.abspath(os.path.join(parent, path))
+ paths.append(path.rstrip('/'))
+ return paths
+
+
+def filename_match(filename, patterns, default=True):
+ """Check if patterns contains a pattern that matches filename.
+
If patterns is unspecified, this always returns True.
"""
if not patterns:
@@ -1132,6 +1299,13 @@ def filename_match(filename, patterns, default=True):
return any(fnmatch(filename, pattern) for pattern in patterns)
+def _is_eol_token(token):
+ return token[0] in NEWLINE or token[4][token[3][1]:].lstrip() == '\\\n'
+if COMMENT_WITH_NL:
+ def _is_eol_token(token, _eol_token=_is_eol_token):
+ return _eol_token(token) or (token[0] == tokenize.COMMENT and
+ token[1] == token[4])
+
##############################################################################
# Framework to run all checks
##############################################################################
@@ -1141,9 +1315,7 @@ _checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}}
def register_check(check, codes=None):
- """
- Register a new check object.
- """
+ """Register a new check object."""
def _add_check(check, kind, codes, args):
if check in _checks[kind]:
_checks[kind][check][0].extend(codes or [])
@@ -1161,9 +1333,9 @@ def register_check(check, codes=None):
def init_checks_registry():
- """
- Register all globally visible functions where the first argument name
- is 'physical_line' or 'logical_line'.
+ """Register all globally visible functions.
+
+ The first argument name is either 'physical_line' or 'logical_line'.
"""
mod = inspect.getmodule(register_check)
for (name, function) in inspect.getmembers(mod, inspect.isfunction):
@@ -1172,9 +1344,7 @@ init_checks_registry()
class Checker(object):
- """
- Load a Python source file, tokenize it, check coding style.
- """
+ """Load a Python source file, tokenize it, check coding style."""
def __init__(self, filename=None, lines=None,
options=None, report=None, **kwargs):
@@ -1187,9 +1357,12 @@ class Checker(object):
self._logical_checks = options.logical_checks
self._ast_checks = options.ast_checks
self.max_line_length = options.max_line_length
+ self.multiline = False # in a multiline string?
self.hang_closing = options.hang_closing
self.verbose = options.verbose
self.filename = filename
+ # Dictionary where a checker can store its custom state.
+ self._checker_states = {}
if filename is None:
self.filename = 'stdin'
self.lines = lines or []
@@ -1200,7 +1373,7 @@ class Checker(object):
try:
self.lines = readlines(filename)
except IOError:
- exc_type, exc = sys.exc_info()[:2]
+ (exc_type, exc) = sys.exc_info()[:2]
self._io_error = '%s: %s' % (exc_type.__name__, exc)
self.lines = []
else:
@@ -1216,7 +1389,8 @@ class Checker(object):
self.report_error = self.report.error
def report_invalid_syntax(self):
- exc_type, exc = sys.exc_info()[:2]
+ """Check if the syntax is valid."""
+ (exc_type, exc) = sys.exc_info()[:2]
if len(exc.args) > 1:
offset = exc.args[1]
if len(offset) > 2:
@@ -1226,150 +1400,174 @@ class Checker(object):
self.report_error(offset[0], offset[1] or 0,
'E901 %s: %s' % (exc_type.__name__, exc.args[0]),
self.report_invalid_syntax)
- report_invalid_syntax.__doc__ = " Check if the syntax is valid."
def readline(self):
- """
- Get the next line from the input buffer.
- """
- self.line_number += 1
- if self.line_number > len(self.lines):
+ """Get the next line from the input buffer."""
+ if self.line_number >= self.total_lines:
return ''
- return self.lines[self.line_number - 1]
-
- def readline_check_physical(self):
- """
- Check and return the next physical line. This method can be
- used to feed tokenize.generate_tokens.
- """
- line = self.readline()
- if line:
- self.check_physical(line)
+ line = self.lines[self.line_number]
+ self.line_number += 1
+ if self.indent_char is None and line[:1] in WHITESPACE:
+ self.indent_char = line[0]
return line
def run_check(self, check, argument_names):
- """
- Run a check plugin.
- """
+ """Run a check plugin."""
arguments = []
for name in argument_names:
arguments.append(getattr(self, name))
return check(*arguments)
+ def init_checker_state(self, name, argument_names):
+ """ Prepares a custom state for the specific checker plugin."""
+ if 'checker_state' in argument_names:
+ self.checker_state = self._checker_states.setdefault(name, {})
+
def check_physical(self, line):
- """
- Run all physical checks on a raw input line.
- """
+ """Run all physical checks on a raw input line."""
self.physical_line = line
- if self.indent_char is None and line[:1] in WHITESPACE:
- self.indent_char = line[0]
for name, check, argument_names in self._physical_checks:
+ self.init_checker_state(name, argument_names)
result = self.run_check(check, argument_names)
if result is not None:
- offset, text = result
+ (offset, text) = result
self.report_error(self.line_number, offset, text, check)
+ if text[:4] == 'E101':
+ self.indent_char = line[0]
def build_tokens_line(self):
- """
- Build a logical line from tokens.
- """
- self.mapping = []
+ """Build a logical line from tokens."""
logical = []
comments = []
length = 0
- previous = None
- for token in self.tokens:
- token_type, text = token[0:2]
+ prev_row = prev_col = mapping = None
+ for token_type, text, start, end, line in self.tokens:
+ if token_type in SKIP_TOKENS:
+ continue
+ if not mapping:
+ mapping = [(0, start)]
if token_type == tokenize.COMMENT:
comments.append(text)
continue
- if token_type in SKIP_TOKENS:
- continue
if token_type == tokenize.STRING:
text = mute_string(text)
- if previous:
- end_row, end = previous[3]
- start_row, start = token[2]
- if end_row != start_row: # different row
- prev_text = self.lines[end_row - 1][end - 1]
- if prev_text == ',' or (prev_text not in '{[('
- and text not in '}])'):
- logical.append(' ')
- length += 1
- elif end != start: # different column
- fill = self.lines[end_row - 1][end:start]
- logical.append(fill)
- length += len(fill)
- self.mapping.append((length, token))
+ if prev_row:
+ (start_row, start_col) = start
+ if prev_row != start_row: # different row
+ prev_text = self.lines[prev_row - 1][prev_col - 1]
+ if prev_text == ',' or (prev_text not in '{[(' and
+ text not in '}])'):
+ text = ' ' + text
+ elif prev_col != start_col: # different column
+ text = line[prev_col:start_col] + text
logical.append(text)
length += len(text)
- previous = token
+ mapping.append((length, end))
+ (prev_row, prev_col) = end
self.logical_line = ''.join(logical)
self.noqa = comments and noqa(''.join(comments))
- # With Python 2, if the line ends with '\r\r\n' the assertion fails
- # assert self.logical_line.strip() == self.logical_line
+ return mapping
def check_logical(self):
- """
- Build a line from tokens and run all logical checks on it.
- """
- self.build_tokens_line()
+ """Build a line from tokens and run all logical checks on it."""
self.report.increment_logical_line()
- first_line = self.lines[self.mapping[0][1][2][0] - 1]
- indent = first_line[:self.mapping[0][1][2][1]]
- self.previous_indent_level = self.indent_level
- self.indent_level = expand_indent(indent)
+ mapping = self.build_tokens_line()
+
+ if not mapping:
+ return
+
+ (start_row, start_col) = mapping[0][1]
+ start_line = self.lines[start_row - 1]
+ self.indent_level = expand_indent(start_line[:start_col])
+ if self.blank_before < self.blank_lines:
+ self.blank_before = self.blank_lines
if self.verbose >= 2:
print(self.logical_line[:80].rstrip())
for name, check, argument_names in self._logical_checks:
if self.verbose >= 4:
print(' ' + name)
- for result in self.run_check(check, argument_names):
- offset, text = result
- if isinstance(offset, tuple):
- orig_number, orig_offset = offset
- else:
- for token_offset, token in self.mapping:
- if offset >= token_offset:
- orig_number = token[2][0]
- orig_offset = (token[2][1] + offset - token_offset)
- self.report_error(orig_number, orig_offset, text, check)
- self.previous_logical = self.logical_line
+ self.init_checker_state(name, argument_names)
+ for offset, text in self.run_check(check, argument_names) or ():
+ if not isinstance(offset, tuple):
+ for token_offset, pos in mapping:
+ if offset <= token_offset:
+ break
+ offset = (pos[0], pos[1] + offset - token_offset)
+ self.report_error(offset[0], offset[1], text, check)
+ if self.logical_line:
+ self.previous_indent_level = self.indent_level
+ self.previous_logical = self.logical_line
+ self.blank_lines = 0
+ self.tokens = []
def check_ast(self):
+ """Build the file's AST and run all AST checks."""
try:
tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST)
except (SyntaxError, TypeError):
return self.report_invalid_syntax()
- for name, cls, _ in self._ast_checks:
+ for name, cls, __ in self._ast_checks:
checker = cls(tree, self.filename)
for lineno, offset, text, check in checker.run():
- if not noqa(self.lines[lineno - 1]):
+ if not self.lines or not noqa(self.lines[lineno - 1]):
self.report_error(lineno, offset, text, check)
def generate_tokens(self):
+ """Tokenize the file, run physical line checks and yield tokens."""
if self._io_error:
self.report_error(1, 0, 'E902 %s' % self._io_error, readlines)
- tokengen = tokenize.generate_tokens(self.readline_check_physical)
+ tokengen = tokenize.generate_tokens(self.readline)
try:
for token in tokengen:
+ if token[2][0] > self.total_lines:
+ return
+ self.maybe_check_physical(token)
yield token
except (SyntaxError, tokenize.TokenError):
self.report_invalid_syntax()
+ def maybe_check_physical(self, token):
+ """If appropriate (based on token), check current physical line(s)."""
+ # Called after every token, but act only on end of line.
+ if _is_eol_token(token):
+ # Obviously, a newline token ends a single physical line.
+ self.check_physical(token[4])
+ elif token[0] == tokenize.STRING and '\n' in token[1]:
+ # Less obviously, a string that contains newlines is a
+ # multiline string, either triple-quoted or with internal
+ # newlines backslash-escaped. Check every physical line in the
+ # string *except* for the last one: its newline is outside of
+ # the multiline string, so we consider it a regular physical
+ # line, and will check it like any other physical line.
+ #
+ # Subtleties:
+ # - we don't *completely* ignore the last line; if it contains
+ # the magical "# noqa" comment, we disable all physical
+ # checks for the entire multiline string
+ # - have to wind self.line_number back because initially it
+ # points to the last line of the string, and we want
+ # check_physical() to give accurate feedback
+ if noqa(token[4]):
+ return
+ self.multiline = True
+ self.line_number = token[2][0]
+ for line in token[1].split('\n')[:-1]:
+ self.check_physical(line + '\n')
+ self.line_number += 1
+ self.multiline = False
+
def check_all(self, expected=None, line_offset=0):
- """
- Run all checks on the input file.
- """
+ """Run all checks on the input file."""
self.report.init_file(self.filename, self.lines, expected, line_offset)
+ self.total_lines = len(self.lines)
if self._ast_checks:
self.check_ast()
self.line_number = 0
self.indent_char = None
- self.indent_level = 0
+ self.indent_level = self.previous_indent_level = 0
self.previous_logical = ''
self.tokens = []
- self.blank_lines = blank_lines_before_comment = 0
+ self.blank_lines = self.blank_before = 0
parens = 0
for token in self.generate_tokens():
self.tokens.append(token)
@@ -1387,29 +1585,33 @@ class Checker(object):
elif text in '}])':
parens -= 1
elif not parens:
- if token_type == tokenize.NEWLINE:
- if self.blank_lines < blank_lines_before_comment:
- self.blank_lines = blank_lines_before_comment
- self.check_logical()
- self.tokens = []
- self.blank_lines = blank_lines_before_comment = 0
- elif token_type == tokenize.NL:
- if len(self.tokens) == 1:
+ if token_type in NEWLINE:
+ if token_type == tokenize.NEWLINE:
+ self.check_logical()
+ self.blank_before = 0
+ elif len(self.tokens) == 1:
# The physical line contains only this token.
self.blank_lines += 1
- self.tokens = []
- elif token_type == tokenize.COMMENT and len(self.tokens) == 1:
- if blank_lines_before_comment < self.blank_lines:
- blank_lines_before_comment = self.blank_lines
- self.blank_lines = 0
- if COMMENT_WITH_NL:
+ del self.tokens[0]
+ else:
+ self.check_logical()
+ elif COMMENT_WITH_NL and token_type == tokenize.COMMENT:
+ if len(self.tokens) == 1:
# The comment also ends a physical line
- self.tokens = []
+ token = list(token)
+ token[1] = text.rstrip('\r\n')
+ token[3] = (token[2][0], token[2][1] + len(token[1]))
+ self.tokens = [tuple(token)]
+ self.check_logical()
+ if self.tokens:
+ self.check_physical(self.lines[-1])
+ self.check_logical()
return self.report.get_file_results()
class BaseReport(object):
"""Collect the results of the checks."""
+
print_filename = False
def __init__(self, options):
@@ -1472,8 +1674,7 @@ class BaseReport(object):
for key in self.messages if key.startswith(prefix)])
def get_statistics(self, prefix=''):
- """
- Get statistics for message codes that start with the prefix.
+ """Get statistics for message codes that start with the prefix.
prefix='' matches all errors and warnings
prefix='E' matches all errors
@@ -1544,9 +1745,17 @@ class StandardReport(BaseReport):
else:
line = self.lines[line_number - 1]
print(line.rstrip())
- print(' ' * offset + '^')
+ print(re.sub(r'\S', ' ', line[:offset]) + '^')
if self._show_pep8 and doc:
- print(doc.lstrip('\n').rstrip())
+ print(' ' + doc.strip())
+
+ # stdout is block buffered when not stdout.isatty().
+ # line can be broken where buffer boundary since other processes
+ # write to same file.
+ # flush() after print() to avoid buffer boundary.
+ # Typical buffer size is 8192. line written safely when
+ # len(line) < 8192.
+ sys.stdout.flush()
return self.file_errors
@@ -1570,13 +1779,14 @@ class StyleGuide(object):
# build options from the command line
self.checker_class = kwargs.pop('checker_class', Checker)
parse_argv = kwargs.pop('parse_argv', False)
- config_file = kwargs.pop('config_file', None)
+ config_file = kwargs.pop('config_file', False)
parser = kwargs.pop('parser', None)
+ # build options from dict
+ options_dict = dict(*args, **kwargs)
+ arglist = None if parse_argv else options_dict.get('paths', None)
options, self.paths = process_options(
- parse_argv=parse_argv, config_file=config_file, parser=parser)
- if args or kwargs:
- # build options from dict
- options_dict = dict(*args, **kwargs)
+ arglist, parse_argv, config_file, parser)
+ if options_dict:
options.__dict__.update(options_dict)
if 'paths' in options_dict:
self.paths = options_dict['paths']
@@ -1587,8 +1797,6 @@ class StyleGuide(object):
if not options.reporter:
options.reporter = BaseReport if options.quiet else StandardReport
- for index, value in enumerate(options.exclude):
- options.exclude[index] = value.rstrip('/')
options.select = tuple(options.select or ())
if not (options.select or options.ignore or
options.testsuite or options.doctest) and DEFAULT_IGNORE:
@@ -1658,8 +1866,9 @@ class StyleGuide(object):
runner(os.path.join(root, filename))
def excluded(self, filename, parent=None):
- """
- Check if options.exclude contains a pattern that matches filename.
+ """Check if the file should be excluded.
+
+ Check if 'options.exclude' contains a pattern that matches filename.
"""
if not self.options.exclude:
return False
@@ -1668,21 +1877,25 @@ class StyleGuide(object):
return True
if parent:
filename = os.path.join(parent, filename)
+ filename = os.path.abspath(filename)
return filename_match(filename, self.options.exclude)
def ignore_code(self, code):
- """
- Check if the error code should be ignored.
+ """Check if the error code should be ignored.
If 'options.select' contains a prefix of the error code,
return False. Else, if 'options.ignore' contains a prefix of
the error code, return True.
"""
+ if len(code) < 4 and any(s.startswith(code)
+ for s in self.options.select):
+ return False
return (code.startswith(self.options.ignore) and
not code.startswith(self.options.select))
def get_checks(self, argument_name):
- """
+ """Get all the checks for this category.
+
Find all globally visible functions where the first argument name
starts with argument_name and which contain selected tests.
"""
@@ -1719,7 +1932,8 @@ def get_parser(prog='pep8', version=__version__):
parser.add_option('--select', metavar='errors', default='',
help="select errors and warnings (e.g. E,W6)")
parser.add_option('--ignore', metavar='errors', default='',
- help="skip errors and warnings (e.g. E4,W)")
+ help="skip errors and warnings (e.g. E4,W) "
+ "(default: %s)" % DEFAULT_IGNORE)
parser.add_option('--show-source', action='store_true',
help="show source code for each error")
parser.add_option('--show-pep8', action='store_true',
@@ -1755,22 +1969,39 @@ def get_parser(prog='pep8', version=__version__):
def read_config(options, args, arglist, parser):
- """Read both user configuration and local configuration."""
+ """Read and parse configurations
+
+ If a config file is specified on the command line with the "--config"
+ option, then only it is used for configuration.
+
+ Otherwise, the user configuration (~/.config/pep8) and any local
+ configurations in the current directory or above will be merged together
+ (in that order) using the read method of ConfigParser.
+ """
config = RawConfigParser()
- user_conf = options.config
- if user_conf and os.path.isfile(user_conf):
- if options.verbose:
- print('user configuration: %s' % user_conf)
- config.read(user_conf)
+ cli_conf = options.config
- parent = tail = args and os.path.abspath(os.path.commonprefix(args))
- while tail:
- if config.read([os.path.join(parent, fn) for fn in PROJECT_CONFIG]):
+ local_dir = os.curdir
+
+ if cli_conf and os.path.isfile(cli_conf):
+ if options.verbose:
+ print('cli configuration: %s' % cli_conf)
+ config.read(cli_conf)
+ else:
+ if USER_CONFIG and os.path.isfile(USER_CONFIG):
if options.verbose:
- print('local configuration: in %s' % parent)
- break
- parent, tail = os.path.split(parent)
+ print('user configuration: %s' % USER_CONFIG)
+ config.read(USER_CONFIG)
+
+ parent = tail = args and os.path.abspath(os.path.commonprefix(args))
+ while tail:
+ if config.read(os.path.join(parent, fn) for fn in PROJECT_CONFIG):
+ local_dir = parent
+ if options.verbose:
+ print('local configuration: in %s' % parent)
+ break
+ (parent, tail) = os.path.split(parent)
pep8_section = parser.prog
if config.has_section(pep8_section):
@@ -1778,52 +2009,57 @@ def read_config(options, args, arglist, parser):
for o in parser.option_list])
# First, read the default values
- new_options, _ = parser.parse_args([])
+ (new_options, __) = parser.parse_args([])
# Second, parse the configuration
for opt in config.options(pep8_section):
+ if opt.replace('_', '-') not in parser.config_options:
+ print(" unknown option '%s' ignored" % opt)
+ continue
if options.verbose > 1:
print(" %s = %s" % (opt, config.get(pep8_section, opt)))
- if opt.replace('_', '-') not in parser.config_options:
- print("Unknown option: '%s'\n not in [%s]" %
- (opt, ' '.join(parser.config_options)))
- sys.exit(1)
normalized_opt = opt.replace('-', '_')
opt_type = option_list[normalized_opt]
if opt_type in ('int', 'count'):
value = config.getint(pep8_section, opt)
elif opt_type == 'string':
value = config.get(pep8_section, opt)
+ if normalized_opt == 'exclude':
+ value = normalize_paths(value, local_dir)
else:
assert opt_type in ('store_true', 'store_false')
value = config.getboolean(pep8_section, opt)
setattr(new_options, normalized_opt, value)
# Third, overwrite with the command-line options
- options, _ = parser.parse_args(arglist, values=new_options)
+ (options, __) = parser.parse_args(arglist, values=new_options)
options.doctest = options.testsuite = False
return options
def process_options(arglist=None, parse_argv=False, config_file=None,
parser=None):
- """Process options passed either via arglist or via command line args."""
- if not arglist and not parse_argv:
- # Don't read the command line if the module is used as a library.
- arglist = []
+ """Process options passed either via arglist or via command line args.
+
+ Passing in the ``config_file`` parameter allows other tools, such as flake8
+ to specify their own options to be processed in pep8.
+ """
if not parser:
parser = get_parser()
if not parser.has_option('--config'):
- if config_file is True:
- config_file = DEFAULT_CONFIG
group = parser.add_option_group("Configuration", description=(
"The project options are read from the [%s] section of the "
"tox.ini file or the setup.cfg file located in any parent folder "
"of the path(s) being processed. Allowed options are: %s." %
(parser.prog, ', '.join(parser.config_options))))
group.add_option('--config', metavar='path', default=config_file,
- help="user config file location (default: %default)")
- options, args = parser.parse_args(arglist)
+ help="user config file location")
+ # Don't read the command line if the module is used as a library.
+ if not arglist and not parse_argv:
+ arglist = []
+ # If parse_argv is True and arglist is None, arguments are
+ # parsed from the command line (sys.argv)
+ (options, args) = parser.parse_args(arglist)
options.reporter = None
if options.ensure_value('testsuite', False):
@@ -1839,7 +2075,7 @@ def process_options(arglist=None, parse_argv=False, config_file=None,
options.reporter = parse_argv and options.quiet == 1 and FileReport
options.filename = options.filename and options.filename.split(',')
- options.exclude = options.exclude.split(',')
+ options.exclude = normalize_paths(options.exclude)
options.select = options.select and options.select.split(',')
options.ignore = options.ignore and options.ignore.split(',')
@@ -1854,7 +2090,15 @@ def process_options(arglist=None, parse_argv=False, config_file=None,
def _main():
"""Parse options and run checks on Python source."""
- pep8style = StyleGuide(parse_argv=True, config_file=True)
+ import signal
+
+ # Handle "Broken pipe" gracefully
+ try:
+ signal.signal(signal.SIGPIPE, lambda signum, frame: sys.exit(1))
+ except AttributeError:
+ pass # not supported on Windows
+
+ pep8style = StyleGuide(parse_argv=True)
options = pep8style.options
if options.doctest or options.testsuite:
from testsuite.support import run_tests