From c9e2d880011c530ff1454fe31a2d40d189860be5 Mon Sep 17 00:00:00 2001 From: Dieter Verfaillie Date: Wed, 24 Apr 2013 14:06:18 +0200 Subject: tests: Update misc/pep8.py to 1.4.5 Version in our tree is a wee bit outdated. For example, later work will introduce an utf8 encoded python source file which our old pep8.py does not yet understand (yeah, it really was *that* ancient)... Updated from: https://raw.github.com/jcrocholl/pep8/1.4.5/pep8.py Takes 552c1f1525e37a30376790151c1ba437776682c5, f941537d1c0a40f0906490ed160db6c79af572d3, 5a4afe2a77d0ff7d9fea13dd93c3304a6ca993de and a17f157e19bd6792c00321c8020dca5e5a281f45 into account... https://bugzilla.gnome.org/show_bug.cgi?id=699535 --- Makefile.am | 8 +- giscanner/annotationmain.py | 1 + giscanner/annotationparser.py | 65 +- giscanner/ast.py | 88 +- giscanner/cachestore.py | 5 +- giscanner/codegen.py | 12 +- giscanner/docmain.py | 1 + giscanner/docwriter.py | 23 +- giscanner/dumper.py | 13 +- giscanner/gdumpparser.py | 9 +- giscanner/girparser.py | 54 +- giscanner/girwriter.py | 21 +- giscanner/introspectablepass.py | 44 +- giscanner/maintransformer.py | 61 +- giscanner/message.py | 18 +- giscanner/scannermain.py | 30 +- giscanner/sectionparser.py | 4 + giscanner/shlibs.py | 4 + giscanner/sourcescanner.py | 8 +- giscanner/testcodegen.py | 3 + giscanner/transformer.py | 45 +- giscanner/utils.py | 9 + giscanner/xmlwriter.py | 18 +- misc/pep8.py | 1884 +++++++++++++++++++------ tests/scanner/annotationparser/test_parser.py | 3 +- tests/warn/warningtester.py | 16 +- 26 files changed, 1765 insertions(+), 682 deletions(-) diff --git a/Makefile.am b/Makefile.am index 202464fa..c04c05a0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -55,10 +55,16 @@ EXTRA_DIST += \ misc/pep8.py \ misc/pyflakes.py +# Default pep8.py --exclude + emacs backup files +PEP8_EXCLUDES=--exclude='.svn,CVS,.bzr,.hg,.git,__pycache__,.\#*' + check-local: gtester --verbose $(GTESTER_PROGS) @echo "TEST: PEP-8 INQUISITION" - @find $(top_srcdir)/giscanner -name \*.py | sort | uniq | xargs $(PYTHON) $(top_srcdir)/misc/pep8.py --repeat --exclude=config.py + @find $(top_srcdir)/giscanner -name \*.py | sort | uniq | xargs \ + $(PYTHON) $(top_srcdir)/misc/pep8.py --max-line-length=99 --ignore=E128 $(PEP8_EXCLUDES) + @find $(top_srcdir)/tests -name \*.py | sort | uniq | xargs \ + $(PYTHON) $(top_srcdir)/misc/pep8.py --ignore=E127,E501 $(PEP8_EXCLUDES) @echo "TEST: Annotation pattern programs" PYTHONPATH="$(top_builddir):$(top_srcdir)" $(PYTHON) $(top_srcdir)/tests/scanner/annotationparser/test_patterns.py @echo "TEST: GTK-Doc Annotation Parser" diff --git a/giscanner/annotationmain.py b/giscanner/annotationmain.py index 4df6e831..304f5a32 100644 --- a/giscanner/annotationmain.py +++ b/giscanner/annotationmain.py @@ -26,6 +26,7 @@ from giscanner.scannermain import (get_preprocessor_option_group, create_source_scanner, process_packages) + def annotation_main(args): parser = optparse.OptionParser('%prog [options] sources') diff --git a/giscanner/annotationparser.py b/giscanner/annotationparser.py index c0b439a1..a0657dc4 100644 --- a/giscanner/annotationparser.py +++ b/giscanner/annotationparser.py @@ -141,7 +141,8 @@ OPT_TRANSFER_FLOATING = 'floating' # Program matching the start of a comment block. # # Results in 0 symbolic groups. -COMMENT_START_RE = re.compile(r''' +COMMENT_START_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters / # 1 forward slash character @@ -157,7 +158,8 @@ COMMENT_START_RE = re.compile(r''' # # Results in 1 symbolic group: # - group 1 = description -COMMENT_END_RE = re.compile(r''' +COMMENT_END_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters (?P.*?) # description text @@ -173,7 +175,8 @@ COMMENT_END_RE = re.compile(r''' # line inside a comment block. # # Results in 0 symbolic groups. -COMMENT_ASTERISK_RE = re.compile(r''' +COMMENT_ASTERISK_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters \* # 1 asterisk character @@ -189,7 +192,8 @@ COMMENT_ASTERISK_RE = re.compile(r''' # # Results in 1 symbolic group: # - group 1 = indentation -COMMENT_INDENTATION_RE = re.compile(r''' +COMMENT_INDENTATION_RE = re.compile( + r''' ^ (?P[^\S\n\r]*) # 0 or more whitespace characters .* @@ -200,7 +204,8 @@ COMMENT_INDENTATION_RE = re.compile(r''' # Program matching an empty line. # # Results in 0 symbolic groups. -EMPTY_LINE_RE = re.compile(r''' +EMPTY_LINE_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters $ # end @@ -212,7 +217,8 @@ EMPTY_LINE_RE = re.compile(r''' # Results in 2 symbolic groups: # - group 1 = colon # - group 2 = section_name -SECTION_RE = re.compile(r''' +SECTION_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters SECTION # SECTION @@ -231,7 +237,8 @@ SECTION_RE = re.compile(r''' # - group 1 = symbol_name # - group 2 = colon # - group 3 = annotations -SYMBOL_RE = re.compile(r''' +SYMBOL_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters (?P[\w-]*\w) # symbol name @@ -251,7 +258,8 @@ SYMBOL_RE = re.compile(r''' # - group 2 = property_name # - group 3 = colon # - group 4 = annotations -PROPERTY_RE = re.compile(r''' +PROPERTY_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters (?P[\w]+) # class name @@ -275,7 +283,8 @@ PROPERTY_RE = re.compile(r''' # - group 2 = signal_name # - group 3 = colon # - group 4 = annotations -SIGNAL_RE = re.compile(r''' +SIGNAL_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters (?P[\w]+) # class name @@ -299,7 +308,8 @@ SIGNAL_RE = re.compile(r''' # - group 2 = annotations # - group 3 = colon # - group 4 = description -PARAMETER_RE = re.compile(r''' +PARAMETER_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters @ # @ character @@ -324,7 +334,8 @@ PARAMETER_RE = re.compile(r''' # - group 3 = colon # - group 4 = description _all_tags = '|'.join(_ALL_TAGS).replace(' ', '\\ ') -TAG_RE = re.compile(r''' +TAG_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters (?P''' + _all_tags + r''') # tag name @@ -348,7 +359,8 @@ TAG_RE = re.compile(r''' # - group 2 = annotations # - group 3 = colon # - group 4 = description -MULTILINE_ANNOTATION_CONTINUATION_RE = re.compile(r''' +MULTILINE_ANNOTATION_CONTINUATION_RE = re.compile( + r''' ^ # start [^\S\n\r]* # 0 or more whitespace characters (?P(?:\(.*?\)[^\S\n\r]*)*) # annotations @@ -444,8 +456,8 @@ class DocTag(object): s = 'one value' else: s = '%d values' % (n_params, ) - if ((n_params > 0 and (value is None or value.length() != n_params)) or - n_params == 0 and value is not None): + if ((n_params > 0 and (value is None or value.length() != n_params)) + or n_params == 0 and value is not None): if value is None: length = 0 else: @@ -814,7 +826,7 @@ class AnnotationParser(object): if description: comment_lines[-1] = (line_offset, description) position = message.Position(filename, lineno + line_offset) - marker = ' '*result.end('description') + '^' + marker = ' ' * result.end('description') + '^' message.warn("Comments should end with */ on a new line:\n%s\n%s" % (line, marker), position) @@ -922,7 +934,7 @@ class AnnotationParser(object): if 'colon' in result.groupdict() and result.group('colon') != ':': colon_start = result.start('colon') colon_column = column_offset + colon_start - marker = ' '*colon_column + '^' + marker = ' ' * colon_column + '^' message.warn("missing ':' at column %s:\n%s\n%s" % (colon_column + 1, original_line, marker), position) @@ -942,7 +954,7 @@ class AnnotationParser(object): # right thing to do because sooner or later some long # descriptions will contain something matching an identifier # pattern by accident. - marker = ' '*column_offset + '^' + marker = ' ' * column_offset + '^' message.warn('ignoring unrecognized GTK-Doc comment block, identifier not ' 'found:\n%s\n%s' % (original_line, marker), position) @@ -965,7 +977,7 @@ class AnnotationParser(object): if in_part != PART_PARAMETERS: column = result.start('parameter_name') + column_offset - marker = ' '*column + '^' + marker = ' ' * column + '^' message.warn("'@%s' parameter unexpected at this location:\n%s\n%s" % (param_name, original_line, marker), position) @@ -983,7 +995,7 @@ class AnnotationParser(object): position) elif param_name in comment_block.params.keys(): column = result.start('parameter_name') + column_offset - marker = ' '*column + '^' + marker = ' ' * column + '^' message.warn("multiple '@%s' parameters for identifier '%s':\n%s\n%s" % (param_name, comment_block.name, original_line, marker), position) @@ -1007,8 +1019,7 @@ class AnnotationParser(object): # identifier (when there are no parameters) and encounter an empty # line, we must be parsing the comment block description. #################################################################### - if (EMPTY_LINE_RE.match(line) - and in_part in [PART_IDENTIFIER, PART_PARAMETERS]): + if (EMPTY_LINE_RE.match(line) and in_part in [PART_IDENTIFIER, PART_PARAMETERS]): in_part = PART_DESCRIPTION part_indent = line_indent continue @@ -1022,7 +1033,7 @@ class AnnotationParser(object): tag_annotations = result.group('annotations') tag_description = result.group('description') - marker = ' '*(result.start('tag_name') + column_offset) + '^' + marker = ' ' * (result.start('tag_name') + column_offset) + '^' # Deprecated GTK-Doc Description: tag if tag_name.lower() == TAG_DESCRIPTION: @@ -1047,7 +1058,7 @@ class AnnotationParser(object): if in_part != PART_TAGS: column = result.start('tag_name') + column_offset - marker = ' '*column + '^' + marker = ' ' * column + '^' message.warn("'%s:' tag unexpected at this location:\n%s\n%s" % (tag_name, original_line, marker), position) @@ -1071,7 +1082,7 @@ class AnnotationParser(object): else: if tag_name.lower() in comment_block.tags.keys(): column = result.start('tag_name') + column_offset - marker = ' '*column + '^' + marker = ' ' * column + '^' message.warn("multiple '%s:' tags for identifier '%s':\n%s\n%s" % (tag_name, comment_block.name, original_line, marker), position) @@ -1149,7 +1160,7 @@ class AnnotationParser(object): part.value = '' def _validate_multiline_annotation_continuation(self, line, original_line, - column_offset, position): + column_offset, position): ''' Validate parameters and tags (except the first line) and generate warnings about invalid annotations spanning multiple lines. @@ -1163,7 +1174,7 @@ class AnnotationParser(object): result = MULTILINE_ANNOTATION_CONTINUATION_RE.match(line) if result: column = result.start('annotations') + column_offset - marker = ' '*column + '^' + marker = ' ' * column + '^' message.warn('ignoring invalid multiline annotation continuation:\n' '%s\n%s' % (original_line, marker), position) @@ -1179,7 +1190,7 @@ class AnnotationParser(object): for i, c in enumerate(value): if c == '(' and opened == -1: - opened = i+1 + opened = i + 1 if c == ')' and opened != -1: segment = value[opened:i] parts = segment.split(' ', 1) diff --git a/giscanner/ast.py b/giscanner/ast.py index 25184886..4c54b548 100644 --- a/giscanner/ast.py +++ b/giscanner/ast.py @@ -28,15 +28,17 @@ from .collections import OrderedDict from .message import Position from .utils import to_underscores + class Type(object): - """A Type can be either: -* A reference to a node (target_giname) -* A reference to a "fundamental" type like 'utf8' -* A "foreign" type - this can be any string." -If none are specified, then it's in an "unresolved" state. An -unresolved type can have two data sources; a "ctype" which comes -from a C type string, or a gtype_name (from g_type_name()). -""" # ''' + """ + A Type can be either: + * A reference to a node (target_giname) + * A reference to a "fundamental" type like 'utf8' + * A "foreign" type - this can be any string." + If none are specified, then it's in an "unresolved" state. An + unresolved type can have two data sources; a "ctype" which comes + from a C type string, or a gtype_name (from g_type_name()). + """ def __init__(self, ctype=None, @@ -125,11 +127,12 @@ in contrast to the other create_type() functions.""" def __cmp__(self, other): if self.target_fundamental: return cmp(self.target_fundamental, other.target_fundamental) - if self.target_giname: + elif self.target_giname: return cmp(self.target_giname, other.target_giname) - if self.target_foreign: + elif self.target_foreign: return cmp(self.target_foreign, other.target_foreign) - return cmp(self.ctype, other.ctype) + else: + return cmp(self.ctype, other.ctype) def is_equiv(self, typeval): """Return True if the specified types are compatible at @@ -170,6 +173,7 @@ in contrast to the other create_type() functions.""" data = '' return '%s(%sctype=%s)' % (self.__class__.__name__, data, self.ctype) + class TypeUnknown(Type): def __init__(self): Type.__init__(self, _target_unknown=True) @@ -351,9 +355,7 @@ SIGNAL_MUST_COLLECT = 'must-collect' class Namespace(object): - def __init__(self, name, version, - identifier_prefixes=None, - symbol_prefixes=None): + def __init__(self, name, version, identifier_prefixes=None, symbol_prefixes=None): self.name = name self.version = version if identifier_prefixes is not None: @@ -367,15 +369,15 @@ class Namespace(object): self.symbol_prefixes = [to_underscores(p).lower() for p in ps] # cache upper-cased versions self._ucase_symbol_prefixes = [p.upper() for p in self.symbol_prefixes] - self.names = OrderedDict() # Maps from GIName -> node - self.aliases = {} # Maps from GIName -> GIName - self.type_names = {} # Maps from GTName -> node - self.ctypes = {} # Maps from CType -> node - self.symbols = {} # Maps from function symbols -> Function - self.includes = set() # Include - self.shared_libraries = [] # str - self.c_includes = [] # str - self.exported_packages = [] # str + self.names = OrderedDict() # Maps from GIName -> node + self.aliases = {} # Maps from GIName -> GIName + self.type_names = {} # Maps from GTName -> node + self.ctypes = {} # Maps from CType -> node + self.symbols = {} # Maps from function symbols -> Function + self.includes = set() # Include + self.shared_libraries = [] # str + self.c_includes = [] # str + self.exported_packages = [] # str def type_from_name(self, name, ctype=None): """Backwards compatibility method for older .gir files, which @@ -416,7 +418,7 @@ but adds it to things like ctypes, symbols, and type_names. if isinstance(node, (Class, Interface)): for m in chain(node.signals, node.properties): m.namespace = self - if isinstance(node, Enum) or isinstance(node, Bitfield): + if isinstance(node, (Enum, Bitfield)): for fn in node.static_methods: if not isinstance(fn, Function): continue @@ -482,6 +484,7 @@ functions via get_by_symbol().""" for node in self.itervalues(): node.walk(callback, []) + class Include(object): def __init__(self, name, version): @@ -504,6 +507,7 @@ class Include(object): def __str__(self): return '%s-%s' % (self.name, self.version) + class Annotated(object): """An object which has a few generic metadata properties.""" @@ -511,12 +515,13 @@ properties.""" self.version = None self.skip = False self.introspectable = True - self.attributes = [] # (key, value)* + self.attributes = [] # (key, value)* self.stability = None self.deprecated = None self.deprecated_version = None self.doc = None + class Node(Annotated): """A node is a type of object which is uniquely identified by its (namespace, name) pair. When combined with a ., this is called a @@ -527,7 +532,7 @@ GIName. It's possible for nodes to contain or point to other nodes.""" def __init__(self, name=None): Annotated.__init__(self) - self.namespace = None # Should be set later by Namespace.append() + self.namespace = None # Should be set later by Namespace.append() self.name = name self.foreign = False self.file_positions = set() @@ -596,8 +601,8 @@ class Callable(Node): self.retval = retval self.parameters = parameters self.throws = not not throws - self.instance_parameter = None # Parameter - self.parent = None # A Class or Interface + self.instance_parameter = None # Parameter + self.parent = None # A Class or Interface # Returns all parameters, including the instance parameter @property @@ -627,10 +632,10 @@ class Function(Callable): self.symbol = symbol self.is_method = False self.is_constructor = False - self.shadowed_by = None # C symbol string - self.shadows = None # C symbol string - self.moved_to = None # namespaced function name string - self.internal_skipped = False # if True, this func will not be written to GIR + self.shadowed_by = None # C symbol string + self.shadows = None # C symbol string + self.moved_to = None # namespaced function name string + self.internal_skipped = False # if True, this func will not be written to GIR def clone(self): clone = copy.copy(self) @@ -641,8 +646,7 @@ class Function(Callable): def is_type_meta_function(self): # Named correctly - if not (self.name.endswith('_get_type') or - self.name.endswith('_get_gtype')): + if not (self.name.endswith('_get_type') or self.name.endswith('_get_gtype')): return False # Doesn't have any parameters @@ -651,14 +655,13 @@ class Function(Callable): # Returns GType rettype = self.retval.type - if (not rettype.is_equiv(TYPE_GTYPE) and - rettype.target_giname != 'Gtk.Type'): - message.warn("function '%s' returns '%r', not a GType" % - (self.name, rettype)) + if (not rettype.is_equiv(TYPE_GTYPE) and rettype.target_giname != 'Gtk.Type'): + message.warn("function '%s' returns '%r', not a GType" % (self.name, rettype)) return False return True + class ErrorQuarkFunction(Function): def __init__(self, name, retval, parameters, throws, symbol, error_domain): @@ -679,7 +682,6 @@ class VFunction(Callable): return obj - class Varargs(Type): def __init__(self): @@ -715,6 +717,7 @@ class Array(Type): arr.size = self.size return arr + class List(Type): def __init__(self, name, element_type, **kwargs): @@ -727,6 +730,7 @@ class List(Type): def clone(self): return List(self.name, self.element_type) + class Map(Type): def __init__(self, key_type, value_type, **kwargs): @@ -739,6 +743,7 @@ class Map(Type): def clone(self): return Map(self.key_type, self.value_type) + class Alias(Node): def __init__(self, name, target, ctype=None): @@ -881,6 +886,7 @@ class Compound(Node, Registered): if field.anonymous_node is not None: field.anonymous_node.walk(callback, chain) + class Field(Annotated): def __init__(self, name, typenode, readable, writable, bits=None, @@ -894,7 +900,7 @@ class Field(Annotated): self.bits = bits self.anonymous_node = anonymous_node self.private = False - self.parent = None # a compound + self.parent = None # a compound def __cmp__(self, other): return cmp(self.name, other.name) @@ -1090,7 +1096,7 @@ class Property(Node): self.transfer = PARAM_TRANSFER_NONE else: self.transfer = transfer - self.parent = None # A Class or Interface + self.parent = None # A Class or Interface class Callback(Callable): diff --git a/giscanner/cachestore.py b/giscanner/cachestore.py index 5f66b66a..ad4c7a36 100644 --- a/giscanner/cachestore.py +++ b/giscanner/cachestore.py @@ -31,6 +31,7 @@ import giscanner _CACHE_VERSION_FILENAME = '.cache-version' + def _get_versionhash(): toplevel = os.path.dirname(giscanner.__file__) # Use pyc instead of py to avoid extra IO @@ -40,6 +41,7 @@ def _get_versionhash(): mtimes = (str(os.stat(source).st_mtime) for source in sources) return hashlib.sha1(''.join(mtimes)).hexdigest() + def _get_cachedir(): if 'GI_SCANNER_DISABLE_CACHE' in os.environ: return None @@ -150,8 +152,7 @@ class CacheStore(object): if store_filename is None: return - if (os.path.exists(store_filename) and - self._cache_is_valid(store_filename, filename)): + if (os.path.exists(store_filename) and self._cache_is_valid(store_filename, filename)): return None tmp_fd, tmp_filename = tempfile.mkstemp(prefix='g-ir-scanner-cache-') diff --git a/giscanner/codegen.py b/giscanner/codegen.py index b73a7da3..e9ed9415 100644 --- a/giscanner/codegen.py +++ b/giscanner/codegen.py @@ -24,6 +24,7 @@ from contextlib import contextmanager from . import ast + class CCodeGenerator(object): def __init__(self, namespace, out_h_filename, out_c_filename): self.out_h_filename = out_h_filename @@ -36,15 +37,16 @@ class CCodeGenerator(object): return '%s_%s' % (self.namespace.symbol_prefixes[0], name) def _typecontainer_to_ctype(self, param): - if (isinstance(param, ast.Parameter) and - param.direction in (ast.PARAM_DIRECTION_OUT, - ast.PARAM_DIRECTION_INOUT)): + if (isinstance(param, ast.Parameter) + and param.direction in (ast.PARAM_DIRECTION_OUT, ast.PARAM_DIRECTION_INOUT)): suffix = '*' else: suffix = '' - if (param.type.is_equiv((ast.TYPE_STRING, ast.TYPE_FILENAME)) and - param.transfer == ast.PARAM_TRANSFER_NONE): + + if (param.type.is_equiv((ast.TYPE_STRING, ast.TYPE_FILENAME)) + and param.transfer == ast.PARAM_TRANSFER_NONE): return "const gchar*" + suffix + return param.type.ctype + suffix def _write_prelude(self, out, func): diff --git a/giscanner/docmain.py b/giscanner/docmain.py index afd509ff..3287dcc6 100644 --- a/giscanner/docmain.py +++ b/giscanner/docmain.py @@ -24,6 +24,7 @@ import optparse from .docwriter import DocWriter from .transformer import Transformer + def doc_main(args): parser = optparse.OptionParser('%prog [options] GIR-file') diff --git a/giscanner/docwriter.py b/giscanner/docwriter.py index b3716ec6..982ab37c 100644 --- a/giscanner/docwriter.py +++ b/giscanner/docwriter.py @@ -31,6 +31,7 @@ from mako.lookup import TemplateLookup from . import ast, xmlwriter from .utils import to_underscores + def make_page_id(node, recursive=False): if isinstance(node, ast.Namespace): if recursive: @@ -51,6 +52,7 @@ def make_page_id(node, recursive=False): else: return '%s.%s' % (make_page_id(parent, recursive=True), node.name) + def get_node_kind(node): if isinstance(node, ast.Namespace): node_kind = 'namespace' @@ -78,6 +80,7 @@ def get_node_kind(node): return node_kind + class TemplatedScanner(object): def __init__(self, specs): self.specs = self.unmangle_specs(specs) @@ -141,6 +144,7 @@ class TemplatedScanner(object): if pos < len(text): yield ('other', text[pos:], None) + class DocstringScanner(TemplatedScanner): def __init__(self): specs = [ @@ -156,6 +160,7 @@ class DocstringScanner(TemplatedScanner): super(DocstringScanner, self).__init__(specs) + class DocFormatter(object): def __init__(self, transformer): self._transformer = transformer @@ -358,6 +363,7 @@ class DocFormatter(object): parent_chain.reverse() return parent_chain + class DocFormatterC(DocFormatter): language = "C" mime_type = "text/x-csrc" @@ -380,7 +386,7 @@ class DocFormatterC(DocFormatter): return getattr(node, 'ctype') def format_function_name(self, func): - if isinstance(func, (ast.Function)): + if isinstance(func, ast.Function): return func.symbol else: return func.name @@ -388,6 +394,7 @@ class DocFormatterC(DocFormatter): def get_parameters(self, node): return node.all_parameters + class DocFormatterIntrospectableBase(DocFormatter): def should_render_node(self, node): if isinstance(node, ast.Record) and node.is_gtype_struct_for is not None: @@ -398,6 +405,7 @@ class DocFormatterIntrospectableBase(DocFormatter): return super(DocFormatterIntrospectableBase, self).should_render_node(node) + class DocFormatterPython(DocFormatterIntrospectableBase): language = "Python" mime_type = "text/python" @@ -418,7 +426,7 @@ class DocFormatterPython(DocFormatterIntrospectableBase): if getattr(node, "is_method", False): return True - if isinstance(node, (ast.VFunction)): + if isinstance(node, ast.VFunction): return True return False @@ -451,8 +459,7 @@ class DocFormatterPython(DocFormatterIntrospectableBase): "GParam": "GLib.Param", "PyObject": "object", "GStrv": "[str]", - "GVariant": "GLib.Variant", - } + "GVariant": "GLib.Variant"} return fundamental_types.get(name, name) @@ -476,6 +483,7 @@ class DocFormatterPython(DocFormatterIntrospectableBase): def get_parameters(self, node): return node.all_parameters + class DocFormatterGjs(DocFormatterIntrospectableBase): language = "Gjs" mime_type = "text/x-gjs" @@ -490,7 +498,7 @@ class DocFormatterGjs(DocFormatterIntrospectableBase): if getattr(node, "is_method", False): return True - if isinstance(node, (ast.VFunction)): + if isinstance(node, ast.VFunction): return True return False @@ -514,8 +522,7 @@ class DocFormatterGjs(DocFormatterIntrospectableBase): "GParam": "GLib.Param", "PyObject": "Object", "GStrv": "[String]", - "GVariant": "GLib.Variant", - } + "GVariant": "GLib.Variant"} return fundamental_types.get(name, name) @@ -558,12 +565,14 @@ class DocFormatterGjs(DocFormatterIntrospectableBase): params.append(param) return params + LANGUAGES = { "c": DocFormatterC, "python": DocFormatterPython, "gjs": DocFormatterGjs, } + class DocWriter(object): def __init__(self, transformer, language): self._transformer = transformer diff --git a/giscanner/dumper.py b/giscanner/dumper.py index b9f529ea..157b24da 100644 --- a/giscanner/dumper.py +++ b/giscanner/dumper.py @@ -223,7 +223,7 @@ class DumpCompiler(object): # The Microsoft compiler uses different option flags for # compilation result output if self._pkgconfig_msvc_flags: - args.extend(['-c', '-Fe'+output, '-Fo'+output]) + args.extend(['-c', '-Fe' + output, '-Fo' + output]) else: args.extend(['-c', '-o', output]) for source in sources: @@ -254,7 +254,7 @@ class DumpCompiler(object): # We can use -o for the Microsoft compiler/linker, # but it is considered deprecated usage with that if self._pkgconfig_msvc_flags: - args.extend(['-Fe'+output]) + args.extend(['-Fe' + output]) else: args.extend(['-o', output]) if libtool: @@ -313,9 +313,9 @@ class DumpCompiler(object): if self._options.namespace_version: args.append(str.lower(self._options.namespace_name) + '-' + - self._options.namespace_version+'.lib') + self._options.namespace_version + '.lib') else: - args.append(str.lower(self._options.namespace_name)+'.lib') + args.append(str.lower(self._options.namespace_name) + '.lib') else: args.append('-Wl,-rpath=.') @@ -329,7 +329,7 @@ class DumpCompiler(object): # to .lib files, not the .dll as the --library option specifies the # .dll(s) the .gir file refers to if self._pkgconfig_msvc_flags == '': - if library.endswith(".la"): # explicitly specified libtool library + if library.endswith(".la"): # explicitly specified libtool library args.append(library) else: args.append('-l' + library) @@ -358,11 +358,12 @@ class DumpCompiler(object): # The --library option on Windows pass in the .dll file(s) the # .gir files refer to, so don't link to them on Visual C++ if self._pkgconfig_msvc_flags == '': - if library.endswith(".la"): # explicitly specified libtool library + if library.endswith(".la"): # explicitly specified libtool library args.append(library) else: args.append('-l' + library) + def compile_introspection_binary(options, get_type_functions, error_quark_functions): dc = DumpCompiler(options, get_type_functions, error_quark_functions) diff --git a/giscanner/gdumpparser.py b/giscanner/gdumpparser.py index 79525030..568777bd 100644 --- a/giscanner/gdumpparser.py +++ b/giscanner/gdumpparser.py @@ -203,8 +203,7 @@ blob containing data gleaned from GObject's primitive introspection.""" def _initparse_gobject_record(self, record): if (record.name.startswith('ParamSpec') - and not record.name in ('ParamSpecPool', 'ParamSpecClass', - 'ParamSpecTypeInfo')): + and not record.name in ('ParamSpecPool', 'ParamSpecClass', 'ParamSpecTypeInfo')): parent = None if record.name != 'ParamSpec': parent = ast.Type(target_giname='GObject.ParamSpec') @@ -280,7 +279,6 @@ blob containing data gleaned from GObject's primitive introspection.""" member.attrib['name'], member.attrib['nick'])) - if xmlnode.tag == 'flags': klass = ast.Bitfield else: @@ -437,7 +435,7 @@ different --identifier-prefix.""" % (xmlnode.attrib['name'], self._namespace.ide if i == 0: argname = 'object' else: - argname = 'p%s' % (i-1, ) + argname = 'p%s' % (i - 1, ) pctype = parameter.attrib['type'] ptype = ast.Type.create_from_gtype_name(pctype) param = ast.Parameter(argname, ptype) @@ -526,8 +524,7 @@ different --identifier-prefix.""" % (xmlnode.attrib['name'], self._namespace.ide return False def _strip_class_suffix(self, name): - if (name.endswith('Class') or - name.endswith('Iface')): + if (name.endswith('Class') or name.endswith('Iface')): return name[:-5] elif name.endswith('Interface'): return name[:-9] diff --git a/giscanner/girparser.py b/giscanner/girparser.py index 63a3fd02..2538036a 100644 --- a/giscanner/girparser.py +++ b/giscanner/girparser.py @@ -101,9 +101,8 @@ class GIRParser(object): assert root.tag == _corens('repository') version = root.attrib['version'] if version != COMPATIBLE_GIR_VERSION: - raise SystemExit("%s: Incompatible version %s (supported: %s)" \ - % (self._get_current_file(), - version, COMPATIBLE_GIR_VERSION)) + raise SystemExit("%s: Incompatible version %s (supported: %s)" % + (self._get_current_file(), version, COMPATIBLE_GIR_VERSION)) for node in root.getchildren(): if node.tag == _corens('include'): @@ -122,9 +121,9 @@ class GIRParser(object): if symbol_prefixes: symbol_prefixes = symbol_prefixes.split(',') self._namespace = ast.Namespace(ns.attrib['name'], - ns.attrib['version'], - identifier_prefixes=identifier_prefixes, - symbol_prefixes=symbol_prefixes) + ns.attrib['version'], + identifier_prefixes=identifier_prefixes, + symbol_prefixes=symbol_prefixes) if 'shared-library' in ns.attrib: self._namespace.shared_libraries = ns.attrib['shared-library'].split(',') self._namespace.includes = self._includes @@ -140,8 +139,7 @@ class GIRParser(object): _corens('interface'): self._parse_object_interface, _corens('record'): self._parse_record, _corens('union'): self._parse_union, - _glibns('boxed'): self._parse_boxed, - } + _glibns('boxed'): self._parse_boxed} if not self._types_only: parser_methods[_corens('constant')] = self._parse_constant @@ -153,8 +151,7 @@ class GIRParser(object): method(node) def _parse_include(self, node): - include = ast.Include(node.attrib['name'], - node.attrib['version']) + include = ast.Include(node.attrib['name'], node.attrib['version']) self._includes.add(include) def _parse_pkgconfig_package(self, node): @@ -165,9 +162,7 @@ class GIRParser(object): def _parse_alias(self, node): typeval = self._parse_type(node) - alias = ast.Alias(node.attrib['name'], - typeval, - node.attrib.get(_cns('type'))) + alias = ast.Alias(node.attrib['name'], typeval, node.attrib.get(_cns('type'))) self._parse_generic_attribs(node, alias) self._namespace.append(alias) @@ -427,7 +422,8 @@ class GIRParser(object): return ast.Type(ctype=ctype) elif name in ['GLib.List', 'GLib.SList']: subchild = self._find_first_child(typenode, - map(_corens, ('callback', 'array', 'varargs', 'type'))) + map(_corens, ('callback', 'array', + 'varargs', 'type'))) if subchild is not None: element_type = self._parse_type(typenode) else: @@ -438,9 +434,7 @@ class GIRParser(object): subchildren_types = map(self._parse_type_simple, subchildren) while len(subchildren_types) < 2: subchildren_types.append(ast.TYPE_ANY) - return ast.Map(subchildren_types[0], - subchildren_types[1], - ctype=ctype) + return ast.Map(subchildren_types[0], subchildren_types[1], ctype=ctype) else: return self._namespace.type_from_name(name, ctype) else: @@ -462,8 +456,8 @@ class GIRParser(object): lenidx = typenode.attrib.get('length') if lenidx is not None: idx = int(lenidx) - assert idx < len(parent.parameters), "%r %d >= %d" \ - % (parent, idx, len(parent.parameters)) + assert idx < len(parent.parameters), "%r %d >= %d" % (parent, idx, + len(parent.parameters)) typeval.length_param_name = parent.parameters[idx].argname def _parse_boxed(self, node): @@ -509,11 +503,11 @@ class GIRParser(object): assert node.tag == _corens('field'), node.tag type_node = self._parse_type(node) field = ast.Field(node.attrib.get('name'), - type_node, - node.attrib.get('readable') != '0', - node.attrib.get('writable') == '1', - node.attrib.get('bits'), - anonymous_node=anonymous_node) + type_node, + node.attrib.get('readable') != '0', + node.attrib.get('writable') == '1', + node.attrib.get('bits'), + anonymous_node=anonymous_node) field.private = node.attrib.get('private') == '1' field.parent = parent self._parse_generic_attribs(node, field) @@ -521,12 +515,12 @@ class GIRParser(object): def _parse_property(self, node, parent): prop = ast.Property(node.attrib['name'], - self._parse_type(node), - node.attrib.get('readable') != '0', - node.attrib.get('writable') == '1', - node.attrib.get('construct') == '1', - node.attrib.get('construct-only') == '1', - node.attrib.get('transfer-ownership')) + self._parse_type(node), + node.attrib.get('readable') != '0', + node.attrib.get('writable') == '1', + node.attrib.get('construct') == '1', + node.attrib.get('construct-only') == '1', + node.attrib.get('transfer-ownership')) self._parse_generic_attribs(node, prop) prop.parent = parent return prop diff --git a/giscanner/girwriter.py b/giscanner/girwriter.py index 2578a331..e7af2533 100644 --- a/giscanner/girwriter.py +++ b/giscanner/girwriter.py @@ -28,14 +28,15 @@ from .xmlwriter import XMLWriter # Compatible changes we just make inline COMPATIBLE_GIR_VERSION = '1.2' + class GIRWriter(XMLWriter): def __init__(self, namespace): super(GIRWriter, self).__init__() self.write_comment( -'''This file was automatically generated from C sources - DO NOT EDIT! -To affect the contents of this file, edit the original C definitions, -and/or use gtk-doc annotations. ''') + 'This file was automatically generated from C sources - DO NOT EDIT!\n' + 'To affect the contents of this file, edit the original C definitions,\n' + 'and/or use gtk-doc annotations. ') self._write_repository(namespace) def _write_repository(self, namespace): @@ -43,8 +44,7 @@ and/or use gtk-doc annotations. ''') ('version', COMPATIBLE_GIR_VERSION), ('xmlns', 'http://www.gtk.org/introspection/core/1.0'), ('xmlns:c', 'http://www.gtk.org/introspection/c/1.0'), - ('xmlns:glib', 'http://www.gtk.org/introspection/glib/1.0'), - ] + ('xmlns:glib', 'http://www.gtk.org/introspection/glib/1.0')] with self.tagcontext('repository', attrs): for include in sorted(namespace.includes): self._write_include(include) @@ -292,8 +292,8 @@ and/or use gtk-doc annotations. ''') attrs.append(('fixed-size', '%d' % (ntype.size, ))) if ntype.length_param_name is not None: assert function - attrs.insert(0, ('length', '%d' - % (function.get_parameter_index(ntype.length_param_name, )))) + length = function.get_parameter_index(ntype.length_param_name) + attrs.insert(0, ('length', '%d' % (length, ))) with self.tagcontext('array', attrs): self._write_type(ntype.element_type) @@ -482,7 +482,7 @@ and/or use gtk-doc annotations. ''') attrs = list(extra_attrs) if record.name is not None: attrs.append(('name', record.name)) - if record.ctype is not None: # the record might be anonymous + if record.ctype is not None: # the record might be anonymous attrs.append(('c:type', record.ctype)) if record.disguised: attrs.append(('disguised', '1')) @@ -513,7 +513,7 @@ and/or use gtk-doc annotations. ''') attrs = [] if union.name is not None: attrs.append(('name', union.name)) - if union.ctype is not None: # the union might be anonymous + if union.ctype is not None: # the union might be anonymous attrs.append(('c:type', union.ctype)) self._append_version(union, attrs) self._append_node_generic(union, attrs) @@ -544,8 +544,7 @@ and/or use gtk-doc annotations. ''') elif isinstance(field.anonymous_node, ast.Union): self._write_union(field.anonymous_node) else: - raise AssertionError("Unknown field anonymous: %r" \ - % (field.anonymous_node, )) + raise AssertionError("Unknown field anonymous: %r" % (field.anonymous_node, )) else: attrs = [('name', field.name)] self._append_node_generic(field, attrs) diff --git a/giscanner/introspectablepass.py b/giscanner/introspectablepass.py index 3e295391..3d67c73e 100644 --- a/giscanner/introspectablepass.py +++ b/giscanner/introspectablepass.py @@ -21,6 +21,7 @@ from . import ast from . import message from .annotationparser import TAG_RETURNS + class IntrospectablePass(object): def __init__(self, transformer, blocks): @@ -79,7 +80,7 @@ class IntrospectablePass(object): if not node.type.resolved: self._parameter_warning(parent, node, -"Unresolved type: %r" % (node.type.unresolved_string, )) + "Unresolved type: %r" % (node.type.unresolved_string, )) parent.introspectable = False return @@ -88,22 +89,23 @@ class IntrospectablePass(object): return if (isinstance(node.type, (ast.List, ast.Array)) - and node.type.element_type == ast.TYPE_ANY): + and node.type.element_type == ast.TYPE_ANY): self._parameter_warning(parent, node, "Missing (element-type) annotation") parent.introspectable = False return if (is_parameter - and isinstance(target, ast.Callback) - and not node.type.target_giname in ('GLib.DestroyNotify', - 'Gio.AsyncReadyCallback') - and node.scope is None): - self._parameter_warning(parent, node, - ("Missing (scope) annotation for callback" + - " without GDestroyNotify (valid: %s, %s)") - % (ast.PARAM_SCOPE_CALL, ast.PARAM_SCOPE_ASYNC)) - parent.introspectable = False - return + and isinstance(target, ast.Callback) + and not node.type.target_giname in ('GLib.DestroyNotify', 'Gio.AsyncReadyCallback') + and node.scope is None): + self._parameter_warning( + parent, + node, + "Missing (scope) annotation for callback without " + "GDestroyNotify (valid: %s, %s)" % (ast.PARAM_SCOPE_CALL, ast.PARAM_SCOPE_ASYNC)) + + parent.introspectable = False + return if is_return and isinstance(target, ast.Callback): self._parameter_warning(parent, node, "Callbacks cannot be return values; use (skip)") @@ -111,12 +113,14 @@ class IntrospectablePass(object): return if (is_return - and isinstance(target, (ast.Record, ast.Union)) - and target.get_type is None - and not target.foreign): + and isinstance(target, (ast.Record, ast.Union)) + and target.get_type is None + and not target.foreign): if node.transfer != ast.PARAM_TRANSFER_NONE: - self._parameter_warning(parent, node, -"Invalid non-constant return of bare structure or union; register as boxed type or (skip)") + self._parameter_warning( + parent, node, + "Invalid non-constant return of bare structure or union; " + "register as boxed type or (skip)") parent.introspectable = False return @@ -143,10 +147,10 @@ class IntrospectablePass(object): # These are not introspectable pending us adding # larger type tags to the typelib (in theory these could # be 128 bit or larger) - if typeval.is_equiv((ast.TYPE_LONG_LONG, ast.TYPE_LONG_ULONG, - ast.TYPE_LONG_DOUBLE)): + elif typeval.is_equiv((ast.TYPE_LONG_LONG, ast.TYPE_LONG_ULONG, ast.TYPE_LONG_DOUBLE)): return False - return True + else: + return True target = self._transformer.lookup_typenode(typeval) if not target: return False diff --git a/giscanner/maintransformer.py b/giscanner/maintransformer.py index 8292f8eb..6acc5947 100644 --- a/giscanner/maintransformer.py +++ b/giscanner/maintransformer.py @@ -37,6 +37,7 @@ from .annotationparser import (OPT_ALLOW_NONE, OPT_ARRAY, OPT_ATTRIBUTE, OPT_TRANSFER_NONE, OPT_TRANSFER_FLOATING) from .utils import to_underscores_noprefix + class MainTransformer(object): def __init__(self, transformer, blocks): @@ -105,7 +106,7 @@ class MainTransformer(object): def _pass_fixup_hidden_fields(self, node, chain): """Hide all callbacks starting with _; the typical -usage is void (*_gtk_reserved1)(void);""" + usage is void (*_gtk_reserved1)(void);""" if isinstance(node, (ast.Class, ast.Interface, ast.Record, ast.Union)): for field in node.fields: if (field @@ -204,7 +205,7 @@ usage is void (*_gtk_reserved1)(void);""" if isinstance(node, ast.Function): self._apply_annotations_function(node, chain) if isinstance(node, ast.Callback): - self._apply_annotations_callable(node, chain, block = self._get_block(node)) + self._apply_annotations_callable(node, chain, block=self._get_block(node)) if isinstance(node, (ast.Class, ast.Interface, ast.Union, ast.Enum, ast.Bitfield, ast.Callback)): self._apply_annotations_annotated(node, self._get_block(node)) @@ -257,7 +258,7 @@ usage is void (*_gtk_reserved1)(void);""" Use resolver() on each identifier, and combiner() on the parts of each complete type. (top_combiner is used on the top-most type.)""" bits = re.split(r'([,<>()])', type_str, 1) - first, sep, rest = [bits[0], '', ''] if (len(bits)==1) else bits + first, sep, rest = [bits[0], '', ''] if (len(bits) == 1) else bits args = [resolver(first)] if sep == '<' or sep == '(': lastsep = '>' if (sep == '<') else ')' @@ -268,9 +269,11 @@ usage is void (*_gtk_reserved1)(void);""" else: rest = sep + rest return top_combiner(*args), rest + def resolver(ident): res = self._transformer.create_type_from_user_string(ident) return res + def combiner(base, *rest): if not rest: return base @@ -281,6 +284,7 @@ usage is void (*_gtk_reserved1)(void);""" message.warn( "Too many parameters in type specification %r" % (type_str, )) return base + def top_combiner(base, *rest): if type_node is not None and isinstance(type_node, ast.Type): base.is_const = type_node.is_const @@ -327,24 +331,23 @@ usage is void (*_gtk_reserved1)(void);""" return block.position def _check_array_element_type(self, array, options): + array_type = array.array_type + element_type = array.element_type + # GPtrArrays are allowed to contain non basic types # (except enums and flags) or basic types that are # as big as a gpointer - if array.array_type == ast.Array.GLIB_PTRARRAY and \ - ((array.element_type in ast.BASIC_GIR_TYPES - and not array.element_type in ast.POINTER_TYPES) or - isinstance(array.element_type, ast.Enum) or - isinstance(array.element_type, ast.Bitfield)): - message.warn("invalid (element-type) for a GPtrArray, " - "must be a pointer", options.position) + if array_type == ast.Array.GLIB_PTRARRAY: + if ((element_type in ast.BASIC_GIR_TYPES and not element_type in ast.POINTER_TYPES) + or isinstance(element_type, (ast.Enum, ast.Bitfield))): + message.warn("invalid (element-type) for a GPtrArray, " + "must be a pointer", options.position) # GByteArrays have (element-type) guint8 by default - if array.array_type == ast.Array.GLIB_BYTEARRAY: - if array.element_type == ast.TYPE_ANY: + if array_type == ast.Array.GLIB_BYTEARRAY: + if element_type == ast.TYPE_ANY: array.element_type = ast.TYPE_UINT8 - elif not array.element_type in [ast.TYPE_UINT8, - ast.TYPE_INT8, - ast.TYPE_CHAR]: + elif not element_type in [ast.TYPE_UINT8, ast.TYPE_INT8, ast.TYPE_CHAR]: message.warn("invalid (element-type) for a GByteArray, " "must be one of guint8, gint8 or gchar", options.position) @@ -454,8 +457,8 @@ usage is void (*_gtk_reserved1)(void);""" def _get_transfer_default_returntype_basic(self, typeval): if (typeval.is_equiv(ast.BASIC_GIR_TYPES) - or typeval.is_const - or typeval.is_equiv(ast.TYPE_NONE)): + or typeval.is_const + or typeval.is_equiv(ast.TYPE_NONE)): return ast.PARAM_TRANSFER_NONE elif typeval.is_equiv(ast.TYPE_STRING): # Non-const strings default to FULL @@ -535,8 +538,7 @@ usage is void (*_gtk_reserved1)(void);""" caller_allocates = False annotated_direction = None - if (OPT_INOUT in options or - OPT_INOUT_ALT in options): + if (OPT_INOUT in options or OPT_INOUT_ALT in options): annotated_direction = ast.PARAM_DIRECTION_INOUT elif OPT_OUT in options: subtype = options[OPT_OUT] @@ -574,9 +576,9 @@ usage is void (*_gtk_reserved1)(void);""" self._adjust_container_type(parent, node, options) - if (OPT_ALLOW_NONE in options or - node.type.target_giname == 'Gio.AsyncReadyCallback' or - node.type.target_giname == 'Gio.Cancellable'): + if (OPT_ALLOW_NONE in options + or node.type.target_giname == 'Gio.AsyncReadyCallback' + or node.type.target_giname == 'Gio.Cancellable'): node.allow_none = True if tag is not None and tag.comment is not None: @@ -605,7 +607,7 @@ usage is void (*_gtk_reserved1)(void);""" if ': ' in value: colon = value.find(': ') version = value[:colon] - desc = value[colon+2:] + desc = value[colon + 2:] else: desc = value version = None @@ -718,8 +720,7 @@ usage is void (*_gtk_reserved1)(void);""" (param, ) = unused text = ', should be %r' % (param, ) else: - text = ', should be one of %s' % ( - ', '.join(repr(p) for p in unused), ) + text = ', should be one of %s' % (', '.join(repr(p) for p in unused), ) tag = block.params.get(doc_name) message.warn( @@ -795,14 +796,14 @@ usage is void (*_gtk_reserved1)(void);""" # Resolve real parameter names early, so that in later # phase we can refer to them while resolving annotations. for i, param in enumerate(signal.parameters): - param.argname, tag = names[i+1] + param.argname, tag = names[i + 1] else: message.warn("incorrect number of parameters in comment block, " "parameter annotations will be ignored.", block.position) for i, param in enumerate(signal.parameters): if names: - name, tag = names[i+1] + name, tag = names[i + 1] options = getattr(tag, 'options', {}) param_type = options.get(OPT_TYPE) if param_type: @@ -1164,9 +1165,9 @@ method or constructor of some type.""" origin_node = self._get_constructor_class(func, subsymbol) if origin_node is None: if func.is_constructor: - message.warn_node(func, - "Can't find matching type for constructor; symbol=%r" \ - % (func.symbol, )) + message.warn_node( + func, + "Can't find matching type for constructor; symbol=%r" % (func.symbol, )) return False # Some sanity checks; only objects and boxeds can have ctors diff --git a/giscanner/message.py b/giscanner/message.py index 8a948cd3..3a330afe 100644 --- a/giscanner/message.py +++ b/giscanner/message.py @@ -61,7 +61,7 @@ class Position(object): return '%s:' % (filename, ) def offset(self, offset): - return Position(self.filename, self.line+offset, self.column) + return Position(self.filename, self.line + offset, self.column) class MessageLogger(object): @@ -119,16 +119,14 @@ If the warning is related to a ast.Node type, see log_node().""" elif log_type == FATAL: error_type = "Fatal" if prefix: - text = ( -'''%s: %s: %s: %s: %s\n''' % (last_position, error_type, self._namespace.name, - prefix, text)) + text = ('%s: %s: %s: %s: %s\n' % (last_position, error_type, + self._namespace.name, prefix, text)) else: if self._namespace: - text = ( -'''%s: %s: %s: %s\n''' % (last_position, error_type, self._namespace.name, text)) + text = ('%s: %s: %s: %s\n' % (last_position, error_type, + self._namespace.name, text)) else: - text = ( -'''%s: %s: %s\n''' % (last_position, error_type, text)) + text = ('%s: %s: %s\n' % (last_position, error_type, text)) self._output.write(text) if log_type == FATAL: @@ -169,17 +167,21 @@ def log_node(log_type, node, text, context=None, positions=None): ml = MessageLogger.get() ml.log_node(log_type, node, text, context=context, positions=positions) + def warn(text, positions=None, prefix=None): ml = MessageLogger.get() ml.log(WARNING, text, positions, prefix) + def warn_node(node, text, context=None, positions=None): log_node(WARNING, node, text, context=context, positions=positions) + def warn_symbol(symbol, text): ml = MessageLogger.get() ml.log_symbol(WARNING, symbol, text) + def fatal(text, positions=None, prefix=None): ml = MessageLogger.get() ml.log(FATAL, text, positions, prefix) diff --git a/giscanner/scannermain.py b/giscanner/scannermain.py index 8ccd1f09..00dc30d6 100755 --- a/giscanner/scannermain.py +++ b/giscanner/scannermain.py @@ -42,14 +42,17 @@ from giscanner.sourcescanner import SourceScanner, ALL_EXTS from giscanner.transformer import Transformer from . import utils + def process_cflags_begin(option, opt, value, parser): cflags = getattr(parser.values, option.dest) while len(parser.rargs) > 0 and parser.rargs[0] != '--cflags-end': cflags.append(parser.rargs.pop(0)) + def process_cflags_end(option, opt, value, parser): pass + def get_preprocessor_option_group(parser): group = optparse.OptionGroup(parser, "Preprocessor options") group.add_option("", "--cflags-begin", @@ -71,6 +74,7 @@ def get_preprocessor_option_group(parser): group.add_option("-p", dest="", help="Ignored") return group + def get_windows_option_group(parser): group = optparse.OptionGroup(parser, "Machine Dependent Options") group.add_option("-m", help="some machine dependent option", @@ -79,13 +83,13 @@ def get_windows_option_group(parser): return group + def _get_option_parser(): parser = optparse.OptionParser('%prog [options] sources') parser.add_option('', "--quiet", action="store_true", dest="quiet", default=False, - help="If passed, do not print details of normal" \ - + " operation") + help="If passed, do not print details of normal operation") parser.add_option("", "--format", action="store", dest="format", default="gir", @@ -204,6 +208,7 @@ match the namespace prefix.""") def _error(msg): raise SystemExit('ERROR: %s' % (msg, )) + def passthrough_gir(path, f): parser = GIRParser() parser.parse(path) @@ -211,6 +216,7 @@ def passthrough_gir(path, f): writer = GIRWriter(parser.get_namespace()) f.write(writer.get_xml()) + def test_codegen(optstring): (namespace, out_h_filename, out_c_filename) = optstring.split(',') if namespace == 'Everything': @@ -221,6 +227,7 @@ def test_codegen(optstring): _error("Invaild namespace %r" % (namespace, )) return 0 + def process_options(output, allowed_flags): for option in output.split(): for flag in allowed_flags: @@ -229,6 +236,7 @@ def process_options(output, allowed_flags): yield option break + def process_packages(options, packages): args = ['pkg-config', '--cflags'] args.extend(packages) @@ -248,6 +256,7 @@ def process_packages(options, packages): options.cpp_defines.extend(pkg_options.cpp_defines) options.cpp_undefines.extend(pkg_options.cpp_undefines) + def extract_filenames(args): filenames = [] for arg in args: @@ -261,6 +270,7 @@ def extract_filenames(args): filenames.append(os.path.abspath(arg)) return filenames + def extract_filelist(options): filenames = [] if not os.path.exists(options.filelist): @@ -271,10 +281,10 @@ def extract_filelist(options): # We don't support real C++ parsing yet, but we should be able # to understand C API implemented in C++ files. filename = line.strip() - if (filename.endswith('.c') or filename.endswith('.cpp') or - filename.endswith('.cc') or filename.endswith('.cxx') or - filename.endswith('.h') or filename.endswith('.hpp') or - filename.endswith('.hxx')): + if (filename.endswith('.c') or filename.endswith('.cpp') + or filename.endswith('.cc') or filename.endswith('.cxx') + or filename.endswith('.h') or filename.endswith('.hpp') + or filename.endswith('.hxx')): if not os.path.exists(filename): _error('%s: Invalid filelist entry-no such file or directory' % (line, )) # Make absolute, because we do comparisons inside scannerparser.c @@ -282,6 +292,7 @@ def extract_filelist(options): filenames.append(os.path.abspath(filename)) return filenames + def create_namespace(options): if options.strip_prefix: print """g-ir-scanner: warning: Option --strip-prefix has been deprecated; @@ -310,6 +321,7 @@ see --identifier-prefix and --symbol-prefix.""" identifier_prefixes=identifier_prefixes, symbol_prefixes=symbol_prefixes) + def create_transformer(namespace, options): transformer = Transformer(namespace, accept_unprefixed=options.accept_unprefixed) @@ -331,6 +343,7 @@ def create_transformer(namespace, options): return transformer + def create_binary(transformer, options, args): # Transform the C AST nodes into higher level # GLib/GObject nodes @@ -341,7 +354,7 @@ def create_binary(transformer, options, args): gdump_parser.init_parse() if options.program: - args=[options.program] + args = [options.program] args.extend(options.program_args) binary = IntrospectionBinary(args) else: @@ -354,6 +367,7 @@ def create_binary(transformer, options, args): gdump_parser.parse() return shlibs + def create_source_scanner(options, args): if hasattr(options, 'filelist') and options.filelist: filenames = extract_filelist(options) @@ -371,6 +385,7 @@ def create_source_scanner(options, args): ss.parse_macros(filenames) return ss + def write_output(data, options): if options.output == "-": output = sys.stdout @@ -407,6 +422,7 @@ def write_output(data, options): except IOError as e: _error("while writing output: %s" % (e.strerror, )) + def scanner_main(args): parser = _get_option_parser() (options, args) = parser.parse_args(args) diff --git a/giscanner/sectionparser.py b/giscanner/sectionparser.py index 0b013be1..61763e45 100644 --- a/giscanner/sectionparser.py +++ b/giscanner/sectionparser.py @@ -19,10 +19,12 @@ import re + class SectionsFile(object): def __init__(self, sections): self.sections = sections + class Section(object): def __init__(self): self.file = None @@ -30,11 +32,13 @@ class Section(object): self.includes = None self.subsections = [] + class Subsection(object): def __init__(self, name): self.name = name self.symbols = [] + def parse_sections_file(lines): sections = [] current_section = None diff --git a/giscanner/shlibs.py b/giscanner/shlibs.py index 4f622db3..1241827d 100644 --- a/giscanner/shlibs.py +++ b/giscanner/shlibs.py @@ -26,6 +26,7 @@ import subprocess from .utils import get_libtool_command, extract_libtool_shlib + # For .la files, the situation is easy. def _resolve_libtool(options, binary, libraries): shlibs = [] @@ -36,6 +37,7 @@ def _resolve_libtool(options, binary, libraries): return shlibs + # Assume ldd output is something vaguely like # # libpangoft2-1.0.so.0 => /usr/lib/libpangoft2-1.0.so.0 (0x006c1000) @@ -51,6 +53,7 @@ def _ldd_library_pattern(library_name): return re.compile("(? of -l) # against a library to find the shared library name. The shared # library name is suppose to be what you pass to dlopen() (or diff --git a/giscanner/sourcescanner.py b/giscanner/sourcescanner.py index 736ddbd4..d5c43926 100644 --- a/giscanner/sourcescanner.py +++ b/giscanner/sourcescanner.py @@ -93,8 +93,7 @@ def symbol_type_name(symbol_type): CSYMBOL_TYPE_UNION: 'union', CSYMBOL_TYPE_ENUM: 'enum', CSYMBOL_TYPE_TYPEDEF: 'typedef', - CSYMBOL_TYPE_MEMBER: 'member', - }.get(symbol_type) + CSYMBOL_TYPE_MEMBER: 'member'}.get(symbol_type) def ctype_name(ctype): @@ -108,8 +107,7 @@ def ctype_name(ctype): CTYPE_ENUM: 'enum', CTYPE_POINTER: 'pointer', CTYPE_ARRAY: 'array', - CTYPE_FUNCTION: 'function', - }.get(ctype) + CTYPE_FUNCTION: 'function'}.get(ctype) class SourceType(object): @@ -266,7 +264,7 @@ class SourceScanner(object): return self._scanner.get_comments() def dump(self): - print '-'*30 + print '-' * 30 for symbol in self._scanner.get_symbols(): print symbol.ident, symbol.base_type.name, symbol.type diff --git a/giscanner/testcodegen.py b/giscanner/testcodegen.py index f304dc7a..1ed247c7 100644 --- a/giscanner/testcodegen.py +++ b/giscanner/testcodegen.py @@ -27,12 +27,14 @@ DEFAULT_C_VALUES = {ast.TYPE_ANY: 'NULL', ast.TYPE_FILENAME: '""', ast.TYPE_GTYPE: 'g_object_get_type ()'} + def get_default_for_typeval(typeval): default = DEFAULT_C_VALUES.get(typeval) if default: return default return "0" + def uscore_from_type(typeval): if typeval.target_fundamental: return typeval.target_fundamental.replace(' ', '_') @@ -41,6 +43,7 @@ def uscore_from_type(typeval): else: assert False, typeval + class EverythingCodeGenerator(object): def __init__(self, out_h_filename, out_c_filename): diff --git a/giscanner/transformer.py b/giscanner/transformer.py index f1aa165d..f70d756a 100644 --- a/giscanner/transformer.py +++ b/giscanner/transformer.py @@ -34,6 +34,7 @@ from .sourcescanner import ( CSYMBOL_TYPE_MEMBER, CSYMBOL_TYPE_ELLIPSIS, CSYMBOL_TYPE_CONST, TYPE_QUALIFIER_CONST, TYPE_QUALIFIER_VOLATILE) + class TransformerException(Exception): pass @@ -54,7 +55,7 @@ class Transformer(object): self._namespace = namespace self._pkg_config_packages = set() self._typedefs_ns = {} - self._parsed_includes = {} # Namespace> + self._parsed_includes = {} # Namespace> self._includepaths = [] self._passthrough_mode = False @@ -104,7 +105,7 @@ class Transformer(object): if not ns_compound: ns_compound = self._namespace.get('_' + compound.name) if (not ns_compound and isinstance(compound, (ast.Record, ast.Union)) - and len(compound.fields) == 0): + and len(compound.fields) == 0): disguised = ast.Record(compound.name, typedef, disguised=True) self._namespace.append(disguised) elif not ns_compound: @@ -126,8 +127,8 @@ class Transformer(object): def register_include_uninstalled(self, include_path): basename = os.path.basename(include_path) if not basename.endswith('.gir'): - raise SystemExit( -"Include path %r must be a filename path ending in .gir" % (include_path, )) + raise SystemExit("Include path %r must be a filename path " + "ending in .gir" % (include_path, )) girname = basename[:-4] include = ast.Include.from_string(girname) if include in self._namespace.includes: @@ -175,8 +176,7 @@ None.""" path = os.path.join(d, girname) if os.path.exists(path): return path - sys.stderr.write("Couldn't find include %r (search path: %r)\n"\ - % (girname, searchdirs)) + sys.stderr.write("Couldn't find include %r (search path: %r)\n" % (girname, searchdirs)) sys.exit(1) @classmethod @@ -228,7 +228,7 @@ currently-scanned namespace is first.""" def _split_c_string_for_namespace_matches(self, name, is_identifier=False): matches = [] # Namespaces which might contain this name - unprefixed_namespaces = [] # Namespaces with no prefix, last resort + unprefixed_namespaces = [] # Namespaces with no prefix, last resort for ns in self._iter_namespaces(): if is_identifier: prefixes = ns.identifier_prefixes @@ -502,8 +502,8 @@ raise ValueError.""" def _create_member(self, symbol, parent_symbol=None): source_type = symbol.base_type - if (source_type.type == CTYPE_POINTER and - symbol.base_type.base_type.type == CTYPE_FUNCTION): + if (source_type.type == CTYPE_POINTER + and symbol.base_type.base_type.type == CTYPE_FUNCTION): node = self._create_callback(symbol, member=True) elif source_type.type == CTYPE_STRUCT and source_type.name is None: node = self._create_struct(symbol, anonymous=True) @@ -519,8 +519,8 @@ raise ValueError.""" # to be able to properly calculate the size of the compound # type (e.g. GValue) that contains this array, see # . - if (source_type.base_type.type == CTYPE_UNION and - source_type.base_type.name is None): + if (source_type.base_type.type == CTYPE_UNION + and source_type.base_type.name is None): synthesized_type = self._synthesize_union_type(symbol, parent_symbol) ftype = ast.Array(None, synthesized_type, complete_ctype=complete_ctype) else: @@ -558,11 +558,9 @@ raise ValueError.""" def _create_typedef(self, symbol): ctype = symbol.base_type.type - if (ctype == CTYPE_POINTER and - symbol.base_type.base_type.type == CTYPE_FUNCTION): + if (ctype == CTYPE_POINTER and symbol.base_type.base_type.type == CTYPE_FUNCTION): node = self._create_typedef_callback(symbol) - elif (ctype == CTYPE_POINTER and - symbol.base_type.base_type.type == CTYPE_STRUCT): + elif (ctype == CTYPE_POINTER and symbol.base_type.base_type.type == CTYPE_STRUCT): node = self._create_typedef_struct(symbol, disguised=True) elif ctype == CTYPE_STRUCT: node = self._create_typedef_struct(symbol) @@ -627,8 +625,7 @@ raise ValueError.""" derefed_typename = canonical.replace('*', '') # Preserve "pointerness" of struct/union members - if (is_member and canonical.endswith('*') and - derefed_typename in ast.basic_type_names): + if (is_member and canonical.endswith('*') and derefed_typename in ast.basic_type_names): return 'gpointer' else: return derefed_typename @@ -714,8 +711,7 @@ raise ValueError.""" # Don't create constants for non-public things # http://bugzilla.gnome.org/show_bug.cgi?id=572790 - if (symbol.source_filename is None or - not symbol.source_filename.endswith('.h')): + if (symbol.source_filename is None or not symbol.source_filename.endswith('.h')): return None try: name = self._strip_symbol(symbol) @@ -738,13 +734,13 @@ raise ValueError.""" if isinstance(target, ast.Type): unaliased = target if unaliased == ast.TYPE_UINT64: - value = str(symbol.const_int % 2**64) + value = str(symbol.const_int % 2 ** 64) elif unaliased == ast.TYPE_UINT32: - value = str(symbol.const_int % 2**32) + value = str(symbol.const_int % 2 ** 32) elif unaliased == ast.TYPE_UINT16: - value = str(symbol.const_int % 2**16) + value = str(symbol.const_int % 2 ** 16) elif unaliased == ast.TYPE_UINT8: - value = str(symbol.const_int % 2**16) + value = str(symbol.const_int % 2 ** 16) else: value = str(symbol.const_int) elif symbol.const_double is not None: @@ -842,8 +838,7 @@ raise ValueError.""" # Mark the 'user_data' arguments for i, param in enumerate(parameters): - if (param.type.target_fundamental == 'gpointer' and - param.argname == 'user_data'): + if (param.type.target_fundamental == 'gpointer' and param.argname == 'user_data'): param.closure_name = param.argname if member: diff --git a/giscanner/utils.py b/giscanner/utils.py index 9adf6d93..77d05b9e 100644 --- a/giscanner/utils.py +++ b/giscanner/utils.py @@ -22,7 +22,10 @@ import re import os import subprocess + _debugflags = None + + def have_debug_flag(flag): """Check for whether a specific debugging feature is enabled. Well-known flags: @@ -38,6 +41,7 @@ Well-known flags: _debugflags.remove('') return flag in _debugflags + def break_on_debug_flag(flag): if have_debug_flag(flag): import pdb @@ -69,8 +73,10 @@ def to_underscores_noprefix(name): name = _upperstr_pat2.sub(r'\1_\2', name) return name + _libtool_pat = re.compile("dlname='([A-z0-9\.\-\+]+)'\n") + def _extract_dlname_field(la_file): f = open(la_file) data = f.read() @@ -81,6 +87,7 @@ def _extract_dlname_field(la_file): else: return None + # Returns the name that we would pass to dlopen() the library # corresponding to this .la file def extract_libtool_shlib(la_file): @@ -92,6 +99,7 @@ def extract_libtool_shlib(la_file): # a path rather than the raw dlname return os.path.basename(dlname) + def extract_libtool(la_file): dlname = _extract_dlname_field(la_file) if dlname is None: @@ -104,6 +112,7 @@ def extract_libtool(la_file): libname = libname.replace('.libs/.libs', '.libs') return libname + # Returns arguments for invoking libtool, if applicable, otherwise None def get_libtool_command(options): libtool_infection = not options.nolibtool diff --git a/giscanner/xmlwriter.py b/giscanner/xmlwriter.py index 4a1dc9fb..6efe684d 100755 --- a/giscanner/xmlwriter.py +++ b/giscanner/xmlwriter.py @@ -44,8 +44,7 @@ def _calc_attrs_length(attributes, indent, self_indent): return attr_length + indent + self_indent -def collect_attributes(tag_name, attributes, self_indent, - self_indent_char, indent=-1): +def collect_attributes(tag_name, attributes, self_indent, self_indent_char, indent=-1): if not attributes: return '' if _calc_attrs_length(attributes, indent, self_indent) > 79: @@ -68,6 +67,7 @@ def collect_attributes(tag_name, attributes, self_indent, first = False return attr_value + def build_xml_tag(tag_name, attributes=None, data=None, self_indent=0, self_indent_char=' '): if attributes is None: @@ -86,6 +86,7 @@ def build_xml_tag(tag_name, attributes=None, data=None, self_indent=0, len(prefix) + len(suffix)) return prefix + attrs + suffix + with LibtoolImporter(None, None): if 'UNINSTALLED_INTROSPECTION_SRCDIR' in os.environ: from _giscanner import collect_attributes @@ -108,10 +109,8 @@ class XMLWriter(object): def _open_tag(self, tag_name, attributes=None): if attributes is None: attributes = [] - attrs = collect_attributes( - tag_name, attributes, self._indent, - self._indent_char, - len(tag_name) + 2) + attrs = collect_attributes(tag_name, attributes, + self._indent, self._indent_char, len(tag_name) + 2) self.write_line(u'<%s%s>' % (tag_name, attrs)) def _close_tag(self, tag_name): @@ -137,10 +136,9 @@ class XMLWriter(object): if do_escape: line = escape(line) if indent: - self._data.write('%s%s%s' % ( - self._indent_char * self._indent, - line.encode('utf-8'), - self._newline_char)) + self._data.write('%s%s%s' % (self._indent_char * self._indent, + line.encode('utf-8'), + self._newline_char)) else: self._data.write('%s%s' % (line.encode('utf-8'), self._newline_char)) diff --git a/misc/pep8.py b/misc/pep8.py index 38906294..f99ae3a3 100644 --- a/misc/pep8.py +++ b/misc/pep8.py @@ -1,6 +1,7 @@ -#!/usr/bin/python +#!/usr/bin/env python # pep8.py - Check Python source code formatting, according to PEP 8 -# Copyright (C) 2006 Johann C. Rocholl +# Copyright (C) 2006-2009 Johann C. Rocholl +# Copyright (C) 2009-2013 Florent Xicluna # # Permission is hereby granted, free of charge, to any person # obtaining a copy of this software and associated documentation files @@ -22,7 +23,7 @@ # CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE # SOFTWARE. -""" +r""" Check Python source code formatting, according to PEP 8: http://www.python.org/dev/peps/pep-0008/ @@ -30,8 +31,7 @@ For usage and a list of options, try this: $ python pep8.py -h This program and its regression test suite live here: -http://svn.browsershots.org/trunk/devtools/pep8/ -http://trac.browsershots.org/browser/trunk/devtools/pep8/ +http://github.com/jcrocholl/pep8 Groups of errors and warnings: E errors @@ -43,72 +43,72 @@ W warnings 500 line length 600 deprecation 700 statements - -You can add checks to this program by writing plugins. Each plugin is -a simple function that is called for each line of source code, either -physical or logical. - -Physical line: -- Raw line of text from the input file. - -Logical line: -- Multi-line statements converted to a single line. -- Stripped left and right. -- Contents of strings replaced with 'xxx' of same length. -- Comments removed. - -The check function requests physical or logical lines by the name of -the first argument: - -def maximum_line_length(physical_line) -def extraneous_whitespace(logical_line) -def blank_lines(logical_line, blank_lines, indent_level, line_number) - -The last example above demonstrates how check plugins can request -additional information with extra arguments. All attributes of the -Checker object are available. Some examples: - -lines: a list of the raw lines from the input file -tokens: the tokens that contribute to this logical line -line_number: line number in the input file -blank_lines: blank lines before this one -indent_char: first indentation character in this file (' ' or '\t') -indent_level: indentation (with tabs expanded to multiples of 8) -previous_indent_level: indentation on previous line -previous_logical: previous logical line - -The docstring of each check function shall be the relevant part of -text from PEP 8. It is printed if the user enables --show-pep8. - +900 syntax error """ +__version__ = '1.4.5' import os import sys import re import time import inspect +import keyword import tokenize from optparse import OptionParser -from keyword import iskeyword from fnmatch import fnmatch - -__version__ = '0.2.0' -__revision__ = '$Rev$' - -default_exclude = '.svn,CVS,*.pyc,*.pyo' - -indent_match = re.compile(r'([ \t]*)').match -raise_comma_match = re.compile(r'raise\s+\w+\s*(,)').match - -operators = """ -+ - * / % ^ & | = < > >> << -+= -= *= /= %= ^= &= |= == <= >= >>= <<= -!= <> : -in is or not and -""".split() - -options = None -args = None +try: + from configparser import RawConfigParser + from io import TextIOWrapper +except ImportError: + from ConfigParser import RawConfigParser + +DEFAULT_EXCLUDE = '.svn,CVS,.bzr,.hg,.git,__pycache__' +DEFAULT_IGNORE = 'E226,E24' +if sys.platform == 'win32': + DEFAULT_CONFIG = os.path.expanduser(r'~\.pep8') +else: + DEFAULT_CONFIG = os.path.join(os.getenv('XDG_CONFIG_HOME') or + os.path.expanduser('~/.config'), 'pep8') +PROJECT_CONFIG = ('.pep8', 'tox.ini', 'setup.cfg') +TESTSUITE_PATH = os.path.join(os.path.dirname(__file__), 'testsuite') +MAX_LINE_LENGTH = 79 +REPORT_FORMAT = { + 'default': '%(path)s:%(row)d:%(col)d: %(code)s %(text)s', + 'pylint': '%(path)s:%(row)d: [%(code)s] %(text)s', +} + +PyCF_ONLY_AST = 1024 +SINGLETONS = frozenset(['False', 'None', 'True']) +KEYWORDS = frozenset(keyword.kwlist + ['print']) - SINGLETONS +UNARY_OPERATORS = frozenset(['>>', '**', '*', '+', '-']) +ARITHMETIC_OP = frozenset(['**', '*', '/', '//', '+', '-']) +WS_OPTIONAL_OPERATORS = ARITHMETIC_OP.union(['^', '&', '|', '<<', '>>', '%']) +WS_NEEDED_OPERATORS = frozenset([ + '**=', '*=', '/=', '//=', '+=', '-=', '!=', '<>', '<', '>', + '%=', '^=', '&=', '|=', '==', '<=', '>=', '<<=', '>>=', '=']) +WHITESPACE = frozenset(' \t') +SKIP_TOKENS = frozenset([tokenize.COMMENT, tokenize.NL, tokenize.NEWLINE, + tokenize.INDENT, tokenize.DEDENT]) +BENCHMARK_KEYS = ['directories', 'files', 'logical lines', 'physical lines'] + +INDENT_REGEX = re.compile(r'([ \t]*)') +RAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,') +RERAISE_COMMA_REGEX = re.compile(r'raise\s+\w+\s*,\s*\w+\s*,\s*\w+') +ERRORCODE_REGEX = re.compile(r'\b[A-Z]\d{3}\b') +DOCSTRING_REGEX = re.compile(r'u?r?["\']') +EXTRANEOUS_WHITESPACE_REGEX = re.compile(r'[[({] | []}),;:]') +WHITESPACE_AFTER_COMMA_REGEX = re.compile(r'[,;:]\s*(?: |\t)') +COMPARE_SINGLETON_REGEX = re.compile(r'([=!]=)\s*(None|False|True)') +COMPARE_TYPE_REGEX = re.compile(r'(?:[=!]=|is(?:\s+not)?)\s*type(?:s.\w+Type' + r'|\s*\(\s*([^)]*[^ )])\s*\))') +KEYWORD_REGEX = re.compile(r'(\s*)\b(?:%s)\b(\s*)' % r'|'.join(KEYWORDS)) +OPERATOR_REGEX = re.compile(r'(?:[^,\s])(\s*)(?:[-+*/|!<=>%&^]+)(\s*)') +LAMBDA_REGEX = re.compile(r'\blambda\b') +HUNK_REGEX = re.compile(r'^@@ -\d+(?:,\d+)? \+(\d+)(?:,(\d+))? @@.*$') + +# Work around Python < 2.6 behaviour, which does not generate NL after +# a comment which is on a line by itself. +COMMENT_WITH_NL = tokenize.generate_tokens(['#\n'].pop).send(None)[1] == '#\n' ############################################################################## @@ -117,7 +117,7 @@ args = None def tabs_or_spaces(physical_line, indent_char): - """ + r""" Never mix tabs and spaces. The most popular way of indenting Python is with spaces only. The @@ -126,52 +126,81 @@ def tabs_or_spaces(physical_line, indent_char): invoking the Python command line interpreter with the -t option, it issues warnings about code that illegally mixes tabs and spaces. When using -tt these warnings become errors. These options are highly recommended! + + Okay: if a == 0:\n a = 1\n b = 1 + E101: if a == 0:\n a = 1\n\tb = 1 """ - indent = indent_match(physical_line).group(1) + indent = INDENT_REGEX.match(physical_line).group(1) for offset, char in enumerate(indent): if char != indent_char: return offset, "E101 indentation contains mixed spaces and tabs" def tabs_obsolete(physical_line): - """ + r""" For new projects, spaces-only are strongly recommended over tabs. Most editors have features that make this easy to do. + + Okay: if True:\n return + W191: if True:\n\treturn """ - indent = indent_match(physical_line).group(1) - if indent.count('\t'): + indent = INDENT_REGEX.match(physical_line).group(1) + if '\t' in indent: return indent.index('\t'), "W191 indentation contains tabs" def trailing_whitespace(physical_line): - """ + r""" JCR: Trailing whitespace is superfluous. + FBM: Except when it occurs as part of a blank line (i.e. the line is + nothing but whitespace). According to Python docs[1] a line with only + whitespace is considered a blank line, and is to be ignored. However, + matching a blank line to its indentation level avoids mistakenly + terminating a multi-line statement (e.g. class declaration) when + pasting code into the standard Python interpreter. + + [1] http://docs.python.org/reference/lexical_analysis.html#blank-lines + + The warning returned varies on whether the line itself is blank, for easier + filtering for those who want to indent their blank lines. + + Okay: spam(1)\n# + W291: spam(1) \n# + W293: class Foo(object):\n \n bang = 12 """ - physical_line = physical_line.rstrip('\n') # chr(10), newline - physical_line = physical_line.rstrip('\r') # chr(13), carriage return - physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L - stripped = physical_line.rstrip() + physical_line = physical_line.rstrip('\n') # chr(10), newline + physical_line = physical_line.rstrip('\r') # chr(13), carriage return + physical_line = physical_line.rstrip('\x0c') # chr(12), form feed, ^L + stripped = physical_line.rstrip(' \t\v') if physical_line != stripped: - return len(stripped), "W291 trailing whitespace" + if stripped: + return len(stripped), "W291 trailing whitespace" + else: + return 0, "W293 blank line contains whitespace" def trailing_blank_lines(physical_line, lines, line_number): - """ + r""" JCR: Trailing blank lines are superfluous. + + Okay: spam(1) + W391: spam(1)\n """ - if physical_line.strip() == '' and line_number == len(lines): + if not physical_line.rstrip() and line_number == len(lines): return 0, "W391 blank line at end of file" def missing_newline(physical_line): """ JCR: The last line should have a newline. + + Reports warning W292. """ if physical_line.rstrip() == physical_line: return len(physical_line), "W292 no newline at end of file" -def maximum_line_length(physical_line): +def maximum_line_length(physical_line, max_line_length): """ Limit all lines to a maximum of 79 characters. @@ -181,10 +210,23 @@ def maximum_line_length(physical_line): ugly. Therefore, please limit all lines to a maximum of 79 characters. For flowing long blocks of text (docstrings or comments), limiting the length to 72 characters is recommended. + + Reports error E501. """ - length = len(physical_line.rstrip()) - if length > 99: - return 79, "E501 line too long (%d characters)" % length + line = physical_line.rstrip() + length = len(line) + if length > max_line_length: + if noqa(line): + return + if hasattr(line, 'decode'): # Python 2 + # The line could contain multi-byte characters + try: + length = len(line.decode('utf-8')) + except UnicodeError: + pass + if length > max_line_length: + return (max_line_length, "E501 line too long " + "(%d > %d characters)" % (length, max_line_length)) ############################################################################## @@ -192,6 +234,44 @@ def maximum_line_length(physical_line): ############################################################################## +def blank_lines(logical_line, blank_lines, indent_level, line_number, + previous_logical, previous_indent_level): + r""" + Separate top-level function and class definitions with two blank lines. + + Method definitions inside a class are separated by a single blank line. + + Extra blank lines may be used (sparingly) to separate groups of related + functions. Blank lines may be omitted between a bunch of related + one-liners (e.g. a set of dummy implementations). + + Use blank lines in functions, sparingly, to indicate logical sections. + + Okay: def a():\n pass\n\n\ndef b():\n pass + Okay: def a():\n pass\n\n\n# Foo\n# Bar\n\ndef b():\n pass + + E301: class Foo:\n b = 0\n def bar():\n pass + E302: def a():\n pass\n\ndef b(n):\n pass + E303: def a():\n pass\n\n\n\ndef b(n):\n pass + E303: def a():\n\n\n\n pass + E304: @decorator\n\ndef a():\n pass + """ + if line_number < 3 and not previous_logical: + return # Don't expect blank lines before the first line + if previous_logical.startswith('@'): + if blank_lines: + yield 0, "E304 blank lines found after function decorator" + elif blank_lines > 2 or (indent_level and blank_lines == 2): + yield 0, "E303 too many blank lines (%d)" % blank_lines + elif logical_line.startswith(('def ', 'class ', '@')): + if indent_level: + if not (blank_lines or previous_indent_level < indent_level or + DOCSTRING_REGEX.match(previous_logical)): + yield 0, "E301 expected 1 blank line, found 0" + elif blank_lines != 2: + yield 0, "E302 expected 2 blank lines, found %d" % blank_lines + + def extraneous_whitespace(logical_line): """ Avoid extraneous whitespace in the following situations: @@ -199,51 +279,273 @@ def extraneous_whitespace(logical_line): - Immediately inside parentheses, brackets or braces. - Immediately before a comma, semicolon, or colon. + + Okay: spam(ham[1], {eggs: 2}) + E201: spam( ham[1], {eggs: 2}) + E201: spam(ham[ 1], {eggs: 2}) + E201: spam(ham[1], { eggs: 2}) + E202: spam(ham[1], {eggs: 2} ) + E202: spam(ham[1 ], {eggs: 2}) + E202: spam(ham[1], {eggs: 2 }) + + E203: if x == 4: print x, y; x, y = y , x + E203: if x == 4: print x, y ; x, y = y, x + E203: if x == 4 : print x, y; x, y = y, x """ line = logical_line - for char in '([{': - found = line.find(char + ' ') - if found > -1: - return found + 1, "E201 whitespace after '%s'" % char - for char in '}])': - found = line.find(' ' + char) - if found > -1 and line[found - 1] != ',': - return found, "E202 whitespace before '%s'" % char - for char in ',;:': - found = line.find(' ' + char) - if found > -1: - return found, "E203 whitespace before '%s'" % char + for match in EXTRANEOUS_WHITESPACE_REGEX.finditer(line): + text = match.group() + char = text.strip() + found = match.start() + if text == char + ' ': + # assert char in '([{' + yield found + 1, "E201 whitespace after '%s'" % char + elif line[found - 1] != ',': + code = ('E202' if char in '}])' else 'E203') # if char in ',;:' + yield found, "%s whitespace before '%s'" % (code, char) + + +def whitespace_around_keywords(logical_line): + r""" + Avoid extraneous whitespace around keywords. + + Okay: True and False + E271: True and False + E272: True and False + E273: True and\tFalse + E274: True\tand False + """ + for match in KEYWORD_REGEX.finditer(logical_line): + before, after = match.groups() + + if '\t' in before: + yield match.start(1), "E274 tab before keyword" + elif len(before) > 1: + yield match.start(1), "E272 multiple spaces before keyword" + + if '\t' in after: + yield match.start(2), "E273 tab after keyword" + elif len(after) > 1: + yield match.start(2), "E271 multiple spaces after keyword" def missing_whitespace(logical_line): """ JCR: Each comma, semicolon or colon should be followed by whitespace. + + Okay: [a, b] + Okay: (3,) + Okay: a[1:4] + Okay: a[:4] + Okay: a[1:] + Okay: a[1:4:2] + E231: ['a','b'] + E231: foo(bar,baz) + E231: [{'a':'b'}] """ line = logical_line for index in range(len(line) - 1): char = line[index] - if char in ',;:' and line[index + 1] != ' ': + if char in ',;:' and line[index + 1] not in WHITESPACE: before = line[:index] - if char == ':' and before.count('[') > before.count(']'): - continue # Slice syntax, no space required - return index, "E231 missing whitespace after '%s'" % char + if char == ':' and before.count('[') > before.count(']') and \ + before.rfind('{') < before.rfind('['): + continue # Slice syntax, no space required + if char == ',' and line[index + 1] == ')': + continue # Allow tuple with only one element: (3,) + yield index, "E231 missing whitespace after '%s'" % char def indentation(logical_line, previous_logical, indent_char, indent_level, previous_indent_level): - """ + r""" Use 4 spaces per indentation level. For really old code that you don't want to mess up, you can continue to use 8-space tabs. + + Okay: a = 1 + Okay: if a == 0:\n a = 1 + E111: a = 1 + + Okay: for item in items:\n pass + E112: for item in items:\npass + + Okay: a = 1\nb = 2 + E113: a = 1\n b = 2 """ if indent_char == ' ' and indent_level % 4: - return 0, "E111 indentation is not a multiple of four" + yield 0, "E111 indentation is not a multiple of four" indent_expect = previous_logical.endswith(':') if indent_expect and indent_level <= previous_indent_level: - return 0, "E112 expected an indented block" + yield 0, "E112 expected an indented block" if indent_level > previous_indent_level and not indent_expect: - return 0, "E113 unexpected indentation" + yield 0, "E113 unexpected indentation" + + +def continuation_line_indentation(logical_line, tokens, indent_level, verbose): + r""" + Continuation lines should align wrapped elements either vertically using + Python's implicit line joining inside parentheses, brackets and braces, or + using a hanging indent. + + When using a hanging indent the following considerations should be applied: + + - there should be no arguments on the first line, and + + - further indentation should be used to clearly distinguish itself as a + continuation line. + + Okay: a = (\n) + E123: a = (\n ) + + Okay: a = (\n 42) + E121: a = (\n 42) + E122: a = (\n42) + E123: a = (\n 42\n ) + E124: a = (24,\n 42\n) + E125: if (a or\n b):\n pass + E126: a = (\n 42) + E127: a = (24,\n 42) + E128: a = (24,\n 42) + """ + first_row = tokens[0][2][0] + nrows = 1 + tokens[-1][2][0] - first_row + if nrows == 1 or noqa(tokens[0][4]): + return + + # indent_next tells us whether the next block is indented; assuming + # that it is indented by 4 spaces, then we should not allow 4-space + # indents on the final continuation line; in turn, some other + # indents are allowed to have an extra 4 spaces. + indent_next = logical_line.endswith(':') + + row = depth = 0 + # remember how many brackets were opened on each line + parens = [0] * nrows + # relative indents of physical lines + rel_indent = [0] * nrows + # visual indents + indent_chances = {} + last_indent = tokens[0][2] + indent = [last_indent[1]] + if verbose >= 3: + print(">>> " + tokens[0][4].rstrip()) + + for token_type, text, start, end, line in tokens: + + newline = row < start[0] - first_row + if newline: + row = start[0] - first_row + newline = (not last_token_multiline and + token_type not in (tokenize.NL, tokenize.NEWLINE)) + + if newline: + # this is the beginning of a continuation line. + last_indent = start + if verbose >= 3: + print("... " + line.rstrip()) + + # record the initial indent. + rel_indent[row] = expand_indent(line) - indent_level + + if depth: + # a bracket expression in a continuation line. + # find the line that it was opened on + for open_row in range(row - 1, -1, -1): + if parens[open_row]: + break + else: + # an unbracketed continuation line (ie, backslash) + open_row = 0 + hang = rel_indent[row] - rel_indent[open_row] + visual_indent = indent_chances.get(start[1]) + + if token_type == tokenize.OP and text in ']})': + # this line starts with a closing bracket + if indent[depth]: + if start[1] != indent[depth]: + yield (start, "E124 closing bracket does not match " + "visual indentation") + elif hang: + yield (start, "E123 closing bracket does not match " + "indentation of opening bracket's line") + elif visual_indent is True: + # visual indent is verified + if not indent[depth]: + indent[depth] = start[1] + elif visual_indent in (text, str): + # ignore token lined up with matching one from a previous line + pass + elif indent[depth] and start[1] < indent[depth]: + # visual indent is broken + yield (start, "E128 continuation line " + "under-indented for visual indent") + elif hang == 4 or (indent_next and rel_indent[row] == 8): + # hanging indent is verified + pass + else: + # indent is broken + if hang <= 0: + error = "E122", "missing indentation or outdented" + elif indent[depth]: + error = "E127", "over-indented for visual indent" + elif hang % 4: + error = "E121", "indentation is not a multiple of four" + else: + error = "E126", "over-indented for hanging indent" + yield start, "%s continuation line %s" % error + + # look for visual indenting + if (parens[row] and token_type not in (tokenize.NL, tokenize.COMMENT) + and not indent[depth]): + indent[depth] = start[1] + indent_chances[start[1]] = True + if verbose >= 4: + print("bracket depth %s indent to %s" % (depth, start[1])) + # deal with implicit string concatenation + elif (token_type in (tokenize.STRING, tokenize.COMMENT) or + text in ('u', 'ur', 'b', 'br')): + indent_chances[start[1]] = str + # special case for the "if" statement because len("if (") == 4 + elif not indent_chances and not row and not depth and text == 'if': + indent_chances[end[1] + 1] = True + + # keep track of bracket depth + if token_type == tokenize.OP: + if text in '([{': + depth += 1 + indent.append(0) + parens[row] += 1 + if verbose >= 4: + print("bracket depth %s seen, col %s, visual min = %s" % + (depth, start[1], indent[depth])) + elif text in ')]}' and depth > 0: + # parent indents should not be more than this one + prev_indent = indent.pop() or last_indent[1] + for d in range(depth): + if indent[d] > prev_indent: + indent[d] = 0 + for ind in list(indent_chances): + if ind >= prev_indent: + del indent_chances[ind] + depth -= 1 + if depth: + indent_chances[indent[depth]] = True + for idx in range(row, -1, -1): + if parens[idx]: + parens[idx] -= 1 + break + assert len(indent) == depth + 1 + if start[1] not in indent_chances: + # allow to line up tokens + indent_chances[start[1]] = text + + last_token_multiline = (start[0] != end[0]) + + if indent_next and rel_indent[-1] == 4: + yield (last_indent, "E125 continuation line does not distinguish " + "itself from next logical line") def whitespace_before_parameters(logical_line, tokens): @@ -255,6 +557,13 @@ def whitespace_before_parameters(logical_line, tokens): - Immediately before the open parenthesis that starts an indexing or slicing. + + Okay: spam(1) + E211: spam (1) + + Okay: dict['key'] = list[index] + E211: dict ['key'] = list[index] + E211: dict['key'] = list [index] """ prev_type = tokens[0][0] prev_text = tokens[0][1] @@ -264,97 +573,391 @@ def whitespace_before_parameters(logical_line, tokens): if (token_type == tokenize.OP and text in '([' and start != prev_end and - prev_type == tokenize.NAME and + (prev_type == tokenize.NAME or prev_text in '}])') and + # Syntax "class A (B):" is allowed, but avoid it (index < 2 or tokens[index - 2][1] != 'class') and - (not iskeyword(prev_text))): - return prev_end, "E211 whitespace before '%s'" % text + # Allow "return (a.foo for a in range(5))" + not keyword.iskeyword(prev_text)): + yield prev_end, "E211 whitespace before '%s'" % text prev_type = token_type prev_text = text prev_end = end def whitespace_around_operator(logical_line): - """ + r""" Avoid extraneous whitespace in the following situations: - More than one space around an assignment (or other) operator to align it with another. + + Okay: a = 12 + 3 + E221: a = 4 + 5 + E222: a = 4 + 5 + E223: a = 4\t+ 5 + E224: a = 4 +\t5 """ - line = logical_line - for operator in operators: - found = line.find(' ' + operator) - if found > -1: - return found, "E221 multiple spaces before operator" - found = line.find(operator + ' ') - if found > -1: - return found, "E222 multiple spaces after operator" - found = line.find('\t' + operator) - if found > -1: - return found, "E223 tab before operator" - found = line.find(operator + '\t') - if found > -1: - return found, "E224 tab after operator" + for match in OPERATOR_REGEX.finditer(logical_line): + before, after = match.groups() + + if '\t' in before: + yield match.start(1), "E223 tab before operator" + elif len(before) > 1: + yield match.start(1), "E221 multiple spaces before operator" + + if '\t' in after: + yield match.start(2), "E224 tab after operator" + elif len(after) > 1: + yield match.start(2), "E222 multiple spaces after operator" + + +def missing_whitespace_around_operator(logical_line, tokens): + r""" + - Always surround these binary operators with a single space on + either side: assignment (=), augmented assignment (+=, -= etc.), + comparisons (==, <, >, !=, <>, <=, >=, in, not in, is, is not), + Booleans (and, or, not). + + - Use spaces around arithmetic operators. + + Okay: i = i + 1 + Okay: submitted += 1 + Okay: x = x * 2 - 1 + Okay: hypot2 = x * x + y * y + Okay: c = (a + b) * (a - b) + Okay: foo(bar, key='word', *args, **kwargs) + Okay: alpha[:-i] + + E225: i=i+1 + E225: submitted +=1 + E225: x = x /2 - 1 + E225: z = x **y + E226: c = (a+b) * (a-b) + E226: hypot2 = x*x + y*y + E227: c = a|b + E228: msg = fmt%(errno, errmsg) + """ + parens = 0 + need_space = False + prev_type = tokenize.OP + prev_text = prev_end = None + for token_type, text, start, end, line in tokens: + if token_type in (tokenize.NL, tokenize.NEWLINE, tokenize.ERRORTOKEN): + # ERRORTOKEN is triggered by backticks in Python 3 + continue + if text in ('(', 'lambda'): + parens += 1 + elif text == ')': + parens -= 1 + if need_space: + if start != prev_end: + # Found a (probably) needed space + if need_space is not True and not need_space[1]: + yield (need_space[0], + "E225 missing whitespace around operator") + need_space = False + elif text == '>' and prev_text in ('<', '-'): + # Tolerate the "<>" operator, even if running Python 3 + # Deal with Python 3's annotated return value "->" + pass + else: + if need_space is True or need_space[1]: + # A needed trailing space was not found + yield prev_end, "E225 missing whitespace around operator" + else: + code, optype = 'E226', 'arithmetic' + if prev_text == '%': + code, optype = 'E228', 'modulo' + elif prev_text not in ARITHMETIC_OP: + code, optype = 'E227', 'bitwise or shift' + yield (need_space[0], "%s missing whitespace " + "around %s operator" % (code, optype)) + need_space = False + elif token_type == tokenize.OP and prev_end is not None: + if text == '=' and parens: + # Allow keyword args or defaults: foo(bar=None). + pass + elif text in WS_NEEDED_OPERATORS: + need_space = True + elif text in UNARY_OPERATORS: + # Check if the operator is being used as a binary operator + # Allow unary operators: -123, -x, +1. + # Allow argument unpacking: foo(*args, **kwargs). + if prev_type == tokenize.OP: + binary_usage = (prev_text in '}])') + elif prev_type == tokenize.NAME: + binary_usage = (prev_text not in KEYWORDS) + else: + binary_usage = (prev_type not in SKIP_TOKENS) + + if binary_usage: + need_space = None + elif text in WS_OPTIONAL_OPERATORS: + need_space = None + + if need_space is None: + # Surrounding space is optional, but ensure that + # trailing space matches opening space + need_space = (prev_end, start != prev_end) + elif need_space and start == prev_end: + # A needed opening space was not found + yield prev_end, "E225 missing whitespace around operator" + need_space = False + prev_type = token_type + prev_text = text + prev_end = end def whitespace_around_comma(logical_line): - """ + r""" Avoid extraneous whitespace in the following situations: - More than one space around an assignment (or other) operator to align it with another. - JCR: This should also be applied around comma etc. + Note: these checks are disabled by default + + Okay: a = (1, 2) + E241: a = (1, 2) + E242: a = (1,\t2) """ line = logical_line - for separator in ',;:': - found = line.find(separator + ' ') - if found > -1: - return found + 1, "E241 multiple spaces after '%s'" % separator - found = line.find(separator + '\t') - if found > -1: - return found + 1, "E242 tab after '%s'" % separator + for m in WHITESPACE_AFTER_COMMA_REGEX.finditer(line): + found = m.start() + 1 + if '\t' in m.group(): + yield found, "E242 tab after '%s'" % m.group()[0] + else: + yield found, "E241 multiple spaces after '%s'" % m.group()[0] -def imports_on_separate_lines(logical_line): +def whitespace_around_named_parameter_equals(logical_line, tokens): + """ + Don't use spaces around the '=' sign when used to indicate a + keyword argument or a default parameter value. + + Okay: def complex(real, imag=0.0): + Okay: return magic(r=real, i=imag) + Okay: boolean(a == b) + Okay: boolean(a != b) + Okay: boolean(a <= b) + Okay: boolean(a >= b) + + E251: def complex(real, imag = 0.0): + E251: return magic(r = real, i = imag) + """ + parens = 0 + no_space = False + prev_end = None + message = "E251 unexpected spaces around keyword / parameter equals" + for token_type, text, start, end, line in tokens: + if no_space: + no_space = False + if start != prev_end: + yield (prev_end, message) + elif token_type == tokenize.OP: + if text == '(': + parens += 1 + elif text == ')': + parens -= 1 + elif parens and text == '=': + no_space = True + if start != prev_end: + yield (prev_end, message) + prev_end = end + + +def whitespace_before_inline_comment(logical_line, tokens): + """ + Separate inline comments by at least two spaces. + + An inline comment is a comment on the same line as a statement. Inline + comments should be separated by at least two spaces from the statement. + They should start with a # and a single space. + + Okay: x = x + 1 # Increment x + Okay: x = x + 1 # Increment x + E261: x = x + 1 # Increment x + E262: x = x + 1 #Increment x + E262: x = x + 1 # Increment x """ + prev_end = (0, 0) + for token_type, text, start, end, line in tokens: + if token_type == tokenize.COMMENT: + if not line[:start[1]].strip(): + continue + if prev_end[0] == start[0] and start[1] < prev_end[1] + 2: + yield (prev_end, + "E261 at least two spaces before inline comment") + symbol, sp, comment = text.partition(' ') + if symbol not in ('#', '#:') or comment[:1].isspace(): + yield start, "E262 inline comment should start with '# '" + elif token_type != tokenize.NL: + prev_end = end + + +def imports_on_separate_lines(logical_line): + r""" Imports should usually be on separate lines. + + Okay: import os\nimport sys + E401: import sys, os + + Okay: from subprocess import Popen, PIPE + Okay: from myclas import MyClass + Okay: from foo.bar.yourclass import YourClass + Okay: import myclass + Okay: import foo.bar.yourclass """ line = logical_line if line.startswith('import '): found = line.find(',') - if found > -1: - return found, "E401 multiple imports on one line" + if -1 < found and ';' not in line[:found]: + yield found, "E401 multiple imports on one line" def compound_statements(logical_line): - """ + r""" Compound statements (multiple statements on the same line) are generally discouraged. + + While sometimes it's okay to put an if/for/while with a small body + on the same line, never do this for multi-clause statements. Also + avoid folding such long lines! + + Okay: if foo == 'blah':\n do_blah_thing() + Okay: do_one() + Okay: do_two() + Okay: do_three() + + E701: if foo == 'blah': do_blah_thing() + E701: for x in lst: total += x + E701: while t < 10: t = delay() + E701: if foo == 'blah': do_blah_thing() + E701: else: do_non_blah_thing() + E701: try: something() + E701: finally: cleanup() + E701: if foo == 'blah': one(); two(); three() + + E702: do_one(); do_two(); do_three() + E703: do_four(); # useless semicolon """ line = logical_line + last_char = len(line) - 1 found = line.find(':') - if -1 < found < len(line) - 1: + if -1 < found < last_char: before = line[:found] - if (before.count('{') <= before.count('}') and # {'a': 1} (dict) - before.count('[') <= before.count(']') and # [1:2] (slice) - not re.search(r'\blambda\b', before)): # lambda x: x - return found, "E701 multiple statements on one line (colon)" + if (before.count('{') <= before.count('}') and # {'a': 1} (dict) + before.count('[') <= before.count(']') and # [1:2] (slice) + before.count('(') <= before.count(')') and # (Python 3 annotation) + not LAMBDA_REGEX.search(before)): # lambda x: x + yield found, "E701 multiple statements on one line (colon)" found = line.find(';') if -1 < found: - return found, "E702 multiple statements on one line (semicolon)" + if found < last_char: + yield found, "E702 multiple statements on one line (semicolon)" + else: + yield found, "E703 statement ends with a semicolon" -def python_3000_has_key(logical_line): +def explicit_line_join(logical_line, tokens): + r""" + Avoid explicit line join between brackets. + + The preferred way of wrapping long lines is by using Python's implied line + continuation inside parentheses, brackets and braces. Long lines can be + broken over multiple lines by wrapping expressions in parentheses. These + should be used in preference to using a backslash for line continuation. + + E502: aaa = [123, \\n 123] + E502: aaa = ("bbb " \\n "ccc") + + Okay: aaa = [123,\n 123] + Okay: aaa = ("bbb "\n "ccc") + Okay: aaa = "bbb " \\n "ccc" + """ + prev_start = prev_end = parens = 0 + for token_type, text, start, end, line in tokens: + if start[0] != prev_start and parens and backslash: + yield backslash, "E502 the backslash is redundant between brackets" + if end[0] != prev_end: + if line.rstrip('\r\n').endswith('\\'): + backslash = (end[0], len(line.splitlines()[-1]) - 1) + else: + backslash = None + prev_start = prev_end = end[0] + else: + prev_start = start[0] + if token_type == tokenize.OP: + if text in '([{': + parens += 1 + elif text in ')]}': + parens -= 1 + + +def comparison_to_singleton(logical_line): + """ + Comparisons to singletons like None should always be done + with "is" or "is not", never the equality operators. + + Okay: if arg is not None: + E711: if arg != None: + E712: if arg == True: + + Also, beware of writing if x when you really mean if x is not None -- + e.g. when testing whether a variable or argument that defaults to None was + set to some other value. The other value might have a type (such as a + container) that could be false in a boolean context! + """ + match = COMPARE_SINGLETON_REGEX.search(logical_line) + if match: + same = (match.group(1) == '==') + singleton = match.group(2) + msg = "'if cond is %s:'" % (('' if same else 'not ') + singleton) + if singleton in ('None',): + code = 'E711' + else: + code = 'E712' + nonzero = ((singleton == 'True' and same) or + (singleton == 'False' and not same)) + msg += " or 'if %scond:'" % ('' if nonzero else 'not ') + yield match.start(1), ("%s comparison to %s should be %s" % + (code, singleton, msg)) + + +def comparison_type(logical_line): """ - The {}.has_key() method will be removed in the future version of - Python. Use the 'in' operation instead, like: - d = {"a": 1, "b": 2} - if "b" in d: - print d["b"] + Object type comparisons should always use isinstance() instead of + comparing types directly. + + Okay: if isinstance(obj, int): + E721: if type(obj) is type(1): + + When checking if an object is a string, keep in mind that it might be a + unicode string too! In Python 2.3, str and unicode have a common base + class, basestring, so you can do: + + Okay: if isinstance(obj, basestring): + Okay: if type(a1) is type(b1): + """ + match = COMPARE_TYPE_REGEX.search(logical_line) + if match: + inst = match.group(1) + if inst and isidentifier(inst) and inst not in SINGLETONS: + return # Allow comparison for types which are not obvious + yield match.start(), "E721 do not compare types, use 'isinstance()'" + + +def python_3000_has_key(logical_line): + r""" + The {}.has_key() method is removed in the Python 3. + Use the 'in' operation instead. + + Okay: if "alph" in d:\n print d["alph"] + W601: assert d.has_key('alph') """ pos = logical_line.find('.has_key(') if pos > -1: - return pos, "W601 .has_key() is deprecated, use 'in'" + yield pos, "W601 .has_key() is deprecated, use 'in'" def python_3000_raise_comma(logical_line): @@ -365,11 +968,41 @@ def python_3000_raise_comma(logical_line): The paren-using form is preferred because when the exception arguments are long or include string formatting, you don't need to use line continuation characters thanks to the containing parentheses. The older - form will be removed in Python 3000. + form is removed in Python 3. + + Okay: raise DummyError("Message") + W602: raise DummyError, "Message" """ - match = raise_comma_match(logical_line) - if match: - return match.start(1), "W602 deprecated form of raising exception" + match = RAISE_COMMA_REGEX.match(logical_line) + if match and not RERAISE_COMMA_REGEX.match(logical_line): + yield match.end() - 1, "W602 deprecated form of raising exception" + + +def python_3000_not_equal(logical_line): + """ + != can also be written <>, but this is an obsolete usage kept for + backwards compatibility only. New code should always use !=. + The older syntax is removed in Python 3. + + Okay: if a != 'no': + W603: if a <> 'no': + """ + pos = logical_line.find('<>') + if pos > -1: + yield pos, "W603 '<>' is deprecated, use '!='" + + +def python_3000_backticks(logical_line): + """ + Backticks are removed in Python 3. + Use repr() instead. + + Okay: val = repr(1 + 2) + W604: val = `1 + 2` + """ + pos = logical_line.find('`') + if pos > -1: + yield pos, "W604 backticks are deprecated, use 'repr()'" ############################################################################## @@ -377,26 +1010,63 @@ def python_3000_raise_comma(logical_line): ############################################################################## +if '' == ''.encode(): + # Python 2: implicit encoding. + def readlines(filename): + f = open(filename) + try: + return f.readlines() + finally: + f.close() + + isidentifier = re.compile(r'[a-zA-Z_]\w*').match + stdin_get_value = sys.stdin.read +else: + # Python 3 + def readlines(filename): + f = open(filename, 'rb') + try: + coding, lines = tokenize.detect_encoding(f.readline) + f = TextIOWrapper(f, coding, line_buffering=True) + return [l.decode(coding) for l in lines] + f.readlines() + except (LookupError, SyntaxError, UnicodeError): + f.close() + # Fall back if files are improperly declared + f = open(filename, encoding='latin-1') + return f.readlines() + finally: + f.close() + + isidentifier = str.isidentifier + + def stdin_get_value(): + return TextIOWrapper(sys.stdin.buffer, errors='ignore').read() +readlines.__doc__ = " Read the source code." +noqa = re.compile(r'# no(?:qa|pep8)\b', re.I).search + + def expand_indent(line): - """ + r""" Return the amount of indentation. Tabs are expanded to the next multiple of 8. >>> expand_indent(' ') 4 - >>> expand_indent('\\t') + >>> expand_indent('\t') 8 - >>> expand_indent(' \\t') + >>> expand_indent(' \t') 8 - >>> expand_indent(' \\t') + >>> expand_indent(' \t') 8 - >>> expand_indent(' \\t') + >>> expand_indent(' \t') 16 """ + if '\t' not in line: + return len(line) - len(line.lstrip()) result = 0 for char in line: if char == '\t': - result = result / 8 * 8 + 8 + result = result // 8 * 8 + 8 elif char == ' ': result += 1 else: @@ -404,34 +1074,6 @@ def expand_indent(line): return result -############################################################################## -# Framework to run all checks -############################################################################## - - -def message(text): - """Print a message.""" - # print >> sys.stderr, options.prog + ': ' + text - # print >> sys.stderr, text - print text - - -def find_checks(argument_name): - """ - Find all globally visible functions where the first argument name - starts with argument_name. - """ - checks = [] - function_type = type(find_checks) - for name, function in globals().iteritems(): - if type(function) is function_type: - args = inspect.getargspec(function)[0] - if len(args) >= 1 and args[0].startswith(argument_name): - checks.append((name, function, args)) - checks.sort() - return checks - - def mute_string(text): """ Replace contents with 'xxx' to prevent syntax matching. @@ -443,32 +1085,135 @@ def mute_string(text): >>> mute_string("r'abc'") "r'xxx'" """ - start = 1 - end = len(text) - 1 # String modifiers (e.g. u or r) - if text.endswith('"'): - start += text.index('"') - elif text.endswith("'"): - start += text.index("'") + start = text.index(text[-1]) + 1 + end = len(text) - 1 # Triple quotes - if text.endswith('"""') or text.endswith("'''"): + if text[-3:] in ('"""', "'''"): start += 2 end -= 2 return text[:start] + 'x' * (end - start) + text[end:] -class Checker: +def parse_udiff(diff, patterns=None, parent='.'): + """Return a dictionary of matching lines.""" + # For each file of the diff, the entry key is the filename, + # and the value is a set of row numbers to consider. + rv = {} + path = nrows = None + for line in diff.splitlines(): + if nrows: + if line[:1] != '-': + nrows -= 1 + continue + if line[:3] == '@@ ': + hunk_match = HUNK_REGEX.match(line) + row, nrows = [int(g or '1') for g in hunk_match.groups()] + rv[path].update(range(row, row + nrows)) + elif line[:3] == '+++': + path = line[4:].split('\t', 1)[0] + if path[:2] == 'b/': + path = path[2:] + rv[path] = set() + return dict([(os.path.join(parent, path), rows) + for (path, rows) in rv.items() + if rows and filename_match(path, patterns)]) + + +def filename_match(filename, patterns, default=True): + """ + Check if patterns contains a pattern that matches filename. + If patterns is unspecified, this always returns True. + """ + if not patterns: + return default + return any(fnmatch(filename, pattern) for pattern in patterns) + + +############################################################################## +# Framework to run all checks +############################################################################## + + +_checks = {'physical_line': {}, 'logical_line': {}, 'tree': {}} + + +def register_check(check, codes=None): + """ + Register a new check object. + """ + def _add_check(check, kind, codes, args): + if check in _checks[kind]: + _checks[kind][check][0].extend(codes or []) + else: + _checks[kind][check] = (codes or [''], args) + if inspect.isfunction(check): + args = inspect.getargspec(check)[0] + if args and args[0] in ('physical_line', 'logical_line'): + if codes is None: + codes = ERRORCODE_REGEX.findall(check.__doc__ or '') + _add_check(check, args[0], codes, args) + elif inspect.isclass(check): + if inspect.getargspec(check.__init__)[0][:2] == ['self', 'tree']: + _add_check(check, 'tree', codes, None) + + +def init_checks_registry(): + """ + Register all globally visible functions where the first argument name + is 'physical_line' or 'logical_line'. + """ + mod = inspect.getmodule(register_check) + for (name, function) in inspect.getmembers(mod, inspect.isfunction): + register_check(function) +init_checks_registry() + + +class Checker(object): """ Load a Python source file, tokenize it, check coding style. """ - def __init__(self, filename): + def __init__(self, filename=None, lines=None, + options=None, report=None, **kwargs): + if options is None: + options = StyleGuide(kwargs).options + else: + assert not kwargs + self._io_error = None + self._physical_checks = options.physical_checks + self._logical_checks = options.logical_checks + self._ast_checks = options.ast_checks + self.max_line_length = options.max_line_length + self.verbose = options.verbose self.filename = filename - self.lines = file(filename).readlines() - self.physical_checks = find_checks('physical_line') - self.logical_checks = find_checks('logical_line') - options.counters['physical lines'] = \ - options.counters.get('physical lines', 0) + len(self.lines) + if filename is None: + self.filename = 'stdin' + self.lines = lines or [] + elif filename == '-': + self.filename = 'stdin' + self.lines = stdin_get_value().splitlines(True) + elif lines is None: + try: + self.lines = readlines(filename) + except IOError: + exc_type, exc = sys.exc_info()[:2] + self._io_error = '%s: %s' % (exc_type.__name__, exc) + self.lines = [] + else: + self.lines = lines + self.report = report or options.report + self.report_error = self.report.error + + def report_invalid_syntax(self): + exc_type, exc = sys.exc_info()[:2] + offset = exc.args[1] + if len(offset) > 2: + offset = offset[1:3] + self.report_error(offset[0], offset[1] or 0, + 'E901 %s: %s' % (exc_type.__name__, exc.args[0]), + self.report_invalid_syntax) + report_invalid_syntax.__doc__ = " Check if the syntax is valid." def readline(self): """ @@ -503,9 +1248,9 @@ class Checker: Run all physical checks on a raw input line. """ self.physical_line = line - if self.indent_char is None and len(line) and line[0] in ' \t': + if self.indent_char is None and line[:1] in WHITESPACE: self.indent_char = line[0] - for name, check, argument_names in self.physical_checks: + for name, check, argument_names in self._physical_checks: result = self.run_check(check, argument_names) if result is not None: offset, text = result @@ -521,21 +1266,21 @@ class Checker: previous = None for token in self.tokens: token_type, text = token[0:2] - if token_type in (tokenize.COMMENT, tokenize.NL, - tokenize.INDENT, tokenize.DEDENT, - tokenize.NEWLINE): + if token_type in SKIP_TOKENS: continue if token_type == tokenize.STRING: text = mute_string(text) if previous: - end_line, end = previous[3] - start_line, start = token[2] - if end_line != start_line: # different row - if self.lines[end_line - 1][end - 1] not in '{[(': + end_row, end = previous[3] + start_row, start = token[2] + if end_row != start_row: # different row + prev_text = self.lines[end_row - 1][end - 1] + if prev_text == ',' or (prev_text not in '{[(' + and text not in '}])'): logical.append(' ') length += 1 - elif end != start: # different column - fill = self.lines[end_line - 1][end:start] + elif end != start: # different column + fill = self.lines[end_row - 1][end:start] logical.append(fill) length += len(fill) self.mapping.append((length, token)) @@ -543,300 +1288,573 @@ class Checker: length += len(text) previous = token self.logical_line = ''.join(logical) - assert self.logical_line.lstrip() == self.logical_line - assert self.logical_line.rstrip() == self.logical_line + # With Python 2, if the line ends with '\r\r\n' the assertion fails + # assert self.logical_line.strip() == self.logical_line def check_logical(self): """ Build a line from tokens and run all logical checks on it. """ - options.counters['logical lines'] = \ - options.counters.get('logical lines', 0) + 1 self.build_tokens_line() + self.report.increment_logical_line() first_line = self.lines[self.mapping[0][1][2][0] - 1] indent = first_line[:self.mapping[0][1][2][1]] self.previous_indent_level = self.indent_level self.indent_level = expand_indent(indent) - if options.verbose >= 2: - print self.logical_line[:80].rstrip() - for name, check, argument_names in self.logical_checks: - if options.verbose >= 3: - print ' ', name - result = self.run_check(check, argument_names) - if result is not None: + if self.verbose >= 2: + print(self.logical_line[:80].rstrip()) + for name, check, argument_names in self._logical_checks: + if self.verbose >= 4: + print(' ' + name) + for result in self.run_check(check, argument_names): offset, text = result - if type(offset) is tuple: - original_number, original_offset = offset + if isinstance(offset, tuple): + orig_number, orig_offset = offset else: for token_offset, token in self.mapping: if offset >= token_offset: - original_number = token[2][0] - original_offset = (token[2][1] - + offset - token_offset) - self.report_error(original_number, original_offset, - text, check) + orig_number = token[2][0] + orig_offset = (token[2][1] + offset - token_offset) + self.report_error(orig_number, orig_offset, text, check) self.previous_logical = self.logical_line - def check_all(self): + def check_ast(self): + try: + tree = compile(''.join(self.lines), '', 'exec', PyCF_ONLY_AST) + except SyntaxError: + return self.report_invalid_syntax() + for name, cls, _ in self._ast_checks: + checker = cls(tree, self.filename) + for lineno, offset, text, check in checker.run(): + if not noqa(self.lines[lineno - 1]): + self.report_error(lineno, offset, text, check) + + def generate_tokens(self): + if self._io_error: + self.report_error(1, 0, 'E902 %s' % self._io_error, readlines) + tokengen = tokenize.generate_tokens(self.readline_check_physical) + try: + for token in tokengen: + yield token + except (SyntaxError, tokenize.TokenError): + self.report_invalid_syntax() + + def check_all(self, expected=None, line_offset=0): """ Run all checks on the input file. """ - self.file_errors = 0 + self.report.init_file(self.filename, self.lines, expected, line_offset) + if self._ast_checks: + self.check_ast() self.line_number = 0 self.indent_char = None self.indent_level = 0 self.previous_logical = '' - self.blank_lines = 0 self.tokens = [] + self.blank_lines = blank_lines_before_comment = 0 parens = 0 - for token in tokenize.generate_tokens(self.readline_check_physical): - # print tokenize.tok_name[token[0]], repr(token) + for token in self.generate_tokens(): self.tokens.append(token) token_type, text = token[0:2] - if token_type == tokenize.OP and text in '([{': - parens += 1 - if token_type == tokenize.OP and text in '}])': - parens -= 1 - if token_type == tokenize.NEWLINE and not parens: - self.check_logical() - self.blank_lines = 0 - self.tokens = [] - if token_type == tokenize.NL and not parens: - self.blank_lines += 1 - self.tokens = [] - if token_type == tokenize.COMMENT: - source_line = token[4] - token_start = token[2][1] - if source_line[:token_start].strip() == '': + if self.verbose >= 3: + if token[2][0] == token[3][0]: + pos = '[%s:%s]' % (token[2][1] or '', token[3][1]) + else: + pos = 'l.%s' % token[3][0] + print('l.%s\t%s\t%s\t%r' % + (token[2][0], pos, tokenize.tok_name[token[0]], text)) + if token_type == tokenize.OP: + if text in '([{': + parens += 1 + elif text in '}])': + parens -= 1 + elif not parens: + if token_type == tokenize.NEWLINE: + if self.blank_lines < blank_lines_before_comment: + self.blank_lines = blank_lines_before_comment + self.check_logical() + self.tokens = [] + self.blank_lines = blank_lines_before_comment = 0 + elif token_type == tokenize.NL: + if len(self.tokens) == 1: + # The physical line contains only this token. + self.blank_lines += 1 + self.tokens = [] + elif token_type == tokenize.COMMENT and len(self.tokens) == 1: + if blank_lines_before_comment < self.blank_lines: + blank_lines_before_comment = self.blank_lines self.blank_lines = 0 - return self.file_errors + if COMMENT_WITH_NL: + # The comment also ends a physical line + self.tokens = [] + return self.report.get_file_results() + + +class BaseReport(object): + """Collect the results of the checks.""" + print_filename = False + + def __init__(self, options): + self._benchmark_keys = options.benchmark_keys + self._ignore_code = options.ignore_code + # Results + self.elapsed = 0 + self.total_errors = 0 + self.counters = dict.fromkeys(self._benchmark_keys, 0) + self.messages = {} + + def start(self): + """Start the timer.""" + self._start_time = time.time() + + def stop(self): + """Stop the timer.""" + self.elapsed = time.time() - self._start_time + + def init_file(self, filename, lines, expected, line_offset): + """Signal a new file.""" + self.filename = filename + self.lines = lines + self.expected = expected or () + self.line_offset = line_offset + self.file_errors = 0 + self.counters['files'] += 1 + self.counters['physical lines'] += len(lines) - def report_error(self, line_number, offset, text, check): - """ - Report an error, according to options. - """ - if options.quiet == 1 and not self.file_errors: - message(self.filename) - self.file_errors += 1 + def increment_logical_line(self): + """Signal a new logical line.""" + self.counters['logical lines'] += 1 + + def error(self, line_number, offset, text, check): + """Report an error, according to options.""" code = text[:4] - options.counters[code] = options.counters.get(code, 0) + 1 - options.messages[code] = text[5:] - if options.quiet: + if self._ignore_code(code): return - if options.testsuite: - base = os.path.basename(self.filename)[:4] - if base == code: - return - if base[0] == 'E' and code[0] == 'W': - return - if ignore_code(code): + if code in self.counters: + self.counters[code] += 1 + else: + self.counters[code] = 1 + self.messages[code] = text[5:] + # Don't care about expected errors or warnings + if code in self.expected: return - if options.counters[code] == 1 or options.repeat: - message("%s:%s:%d: %s" % - (self.filename, line_number, offset + 1, text)) - if options.show_source: - line = self.lines[line_number - 1] - message(line.rstrip()) - message(' ' * offset + '^') - if options.show_pep8: - message(check.__doc__.lstrip('\n').rstrip()) - - -def input_file(filename): - """ - Run all checks on a Python source file. - """ - if excluded(filename) or not filename_match(filename): - return 0 - if options.verbose: - message('checking ' + filename) - options.counters['files'] = options.counters.get('files', 0) + 1 - errors = Checker(filename).check_all() - if options.testsuite and not errors: - message("%s: %s" % (filename, "no errors found")) - return errors - - -def input_dir(dirname): - """ - Check all Python source files in this directory and all subdirectories. - """ - dirname = dirname.rstrip('/') - if excluded(dirname): - return 0 - errors = 0 - for root, dirs, files in os.walk(dirname): - if options.verbose: - message('directory ' + root) - options.counters['directories'] = \ - options.counters.get('directories', 0) + 1 - dirs.sort() - for subdir in dirs: - if excluded(subdir): - dirs.remove(subdir) - files.sort() - for filename in files: - errors += input_file(os.path.join(root, filename)) - return errors - - -def excluded(filename): - """ - Check if options.exclude contains a pattern that matches filename. - """ - basename = os.path.basename(filename) - for pattern in options.exclude: - if fnmatch(basename, pattern): - # print basename, 'excluded because it matches', pattern - return True - - -def filename_match(filename): - """ - Check if options.filename contains a pattern that matches filename. - If options.filename is unspecified, this always returns True. - """ - if not options.filename: - return True - for pattern in options.filename: - if fnmatch(filename, pattern): - return True - - -def ignore_code(code): - """ - Check if options.ignore contains a prefix of the error code. - """ - for ignore in options.ignore: - if code.startswith(ignore): - return True - - -def get_error_statistics(): - """Get error statistics.""" - return get_statistics("E") + if self.print_filename and not self.file_errors: + print(self.filename) + self.file_errors += 1 + self.total_errors += 1 + return code + def get_file_results(self): + """Return the count of errors and warnings for this file.""" + return self.file_errors -def get_warning_statistics(): - """Get warning statistics.""" - return get_statistics("W") + def get_count(self, prefix=''): + """Return the total count of errors and warnings.""" + return sum([self.counters[key] + for key in self.messages if key.startswith(prefix)]) + def get_statistics(self, prefix=''): + """ + Get statistics for message codes that start with the prefix. -def get_statistics(prefix=''): - """ - Get statistics for message codes that start with the prefix. + prefix='' matches all errors and warnings + prefix='E' matches all errors + prefix='W' matches all warnings + prefix='E4' matches all errors that have to do with imports + """ + return ['%-7s %s %s' % (self.counters[key], key, self.messages[key]) + for key in sorted(self.messages) if key.startswith(prefix)] + + def print_statistics(self, prefix=''): + """Print overall statistics (number of errors and warnings).""" + for line in self.get_statistics(prefix): + print(line) + + def print_benchmark(self): + """Print benchmark numbers.""" + print('%-7.2f %s' % (self.elapsed, 'seconds elapsed')) + if self.elapsed: + for key in self._benchmark_keys: + print('%-7d %s per second (%d total)' % + (self.counters[key] / self.elapsed, key, + self.counters[key])) + + +class FileReport(BaseReport): + """Collect the results of the checks and print only the filenames.""" + print_filename = True + + +class StandardReport(BaseReport): + """Collect and print the results of the checks.""" + + def __init__(self, options): + super(StandardReport, self).__init__(options) + self._fmt = REPORT_FORMAT.get(options.format.lower(), + options.format) + self._repeat = options.repeat + self._show_source = options.show_source + self._show_pep8 = options.show_pep8 + + def init_file(self, filename, lines, expected, line_offset): + """Signal a new file.""" + self._deferred_print = [] + return super(StandardReport, self).init_file( + filename, lines, expected, line_offset) + + def error(self, line_number, offset, text, check): + """Report an error, according to options.""" + code = super(StandardReport, self).error(line_number, offset, + text, check) + if code and (self.counters[code] == 1 or self._repeat): + self._deferred_print.append( + (line_number, offset, code, text[5:], check.__doc__)) + return code + + def get_file_results(self): + """Print the result and return the overall count for this file.""" + self._deferred_print.sort() + for line_number, offset, code, text, doc in self._deferred_print: + print(self._fmt % { + 'path': self.filename, + 'row': self.line_offset + line_number, 'col': offset + 1, + 'code': code, 'text': text, + }) + if self._show_source: + if line_number > len(self.lines): + line = '' + else: + line = self.lines[line_number - 1] + print(line.rstrip()) + print(' ' * offset + '^') + if self._show_pep8 and doc: + print(doc.lstrip('\n').rstrip()) + return self.file_errors - prefix='' matches all errors and warnings - prefix='E' matches all errors - prefix='W' matches all warnings - prefix='E4' matches all errors that have to do with imports - """ - stats = [] - keys = options.messages.keys() - keys.sort() - for key in keys: - if key.startswith(prefix): - stats.append('%-7s %s %s' % - (options.counters[key], key, options.messages[key])) - return stats +class DiffReport(StandardReport): + """Collect and print the results for the changed lines only.""" -def print_statistics(prefix=''): - """Print overall statistics (number of errors and warnings).""" - for line in get_statistics(prefix): - print line + def __init__(self, options): + super(DiffReport, self).__init__(options) + self._selected = options.selected_lines + def error(self, line_number, offset, text, check): + if line_number not in self._selected[self.filename]: + return + return super(DiffReport, self).error(line_number, offset, text, check) + + +class StyleGuide(object): + """Initialize a PEP-8 instance with few options.""" + + def __init__(self, *args, **kwargs): + # build options from the command line + self.checker_class = kwargs.pop('checker_class', Checker) + parse_argv = kwargs.pop('parse_argv', False) + config_file = kwargs.pop('config_file', None) + parser = kwargs.pop('parser', None) + options, self.paths = process_options( + parse_argv=parse_argv, config_file=config_file, parser=parser) + if args or kwargs: + # build options from dict + options_dict = dict(*args, **kwargs) + options.__dict__.update(options_dict) + if 'paths' in options_dict: + self.paths = options_dict['paths'] + + self.runner = self.input_file + self.options = options + + if not options.reporter: + options.reporter = BaseReport if options.quiet else StandardReport + + for index, value in enumerate(options.exclude): + options.exclude[index] = value.rstrip('/') + options.select = tuple(options.select or ()) + if not (options.select or options.ignore or + options.testsuite or options.doctest) and DEFAULT_IGNORE: + # The default choice: ignore controversial checks + options.ignore = tuple(DEFAULT_IGNORE.split(',')) + else: + # Ignore all checks which are not explicitly selected + options.ignore = tuple(options.ignore or options.select and ('',)) + options.benchmark_keys = BENCHMARK_KEYS[:] + options.ignore_code = self.ignore_code + options.physical_checks = self.get_checks('physical_line') + options.logical_checks = self.get_checks('logical_line') + options.ast_checks = self.get_checks('tree') + self.init_report() + + def init_report(self, reporter=None): + """Initialize the report instance.""" + self.options.report = (reporter or self.options.reporter)(self.options) + return self.options.report + + def check_files(self, paths=None): + """Run all checks on the paths.""" + if paths is None: + paths = self.paths + report = self.options.report + runner = self.runner + report.start() + try: + for path in paths: + if os.path.isdir(path): + self.input_dir(path) + elif not self.excluded(path): + runner(path) + except KeyboardInterrupt: + print('... stopped') + report.stop() + return report + + def input_file(self, filename, lines=None, expected=None, line_offset=0): + """Run all checks on a Python source file.""" + if self.options.verbose: + print('checking %s' % filename) + fchecker = self.checker_class( + filename, lines=lines, options=self.options) + return fchecker.check_all(expected=expected, line_offset=line_offset) + + def input_dir(self, dirname): + """Check all files in this directory and all subdirectories.""" + dirname = dirname.rstrip('/') + if self.excluded(dirname): + return 0 + counters = self.options.report.counters + verbose = self.options.verbose + filepatterns = self.options.filename + runner = self.runner + for root, dirs, files in os.walk(dirname): + if verbose: + print('directory ' + root) + counters['directories'] += 1 + for subdir in sorted(dirs): + if self.excluded(os.path.join(root, subdir)): + dirs.remove(subdir) + for filename in sorted(files): + # contain a pattern that matches? + if ((filename_match(filename, filepatterns) and + not self.excluded(filename))): + runner(os.path.join(root, filename)) + + def excluded(self, filename): + """ + Check if options.exclude contains a pattern that matches filename. + """ + basename = os.path.basename(filename) + return any((filename_match(filename, self.options.exclude, + default=False), + filename_match(basename, self.options.exclude, + default=False))) -def print_benchmark(elapsed): - """ - Print benchmark numbers. - """ - print '%-7.2f %s' % (elapsed, 'seconds elapsed') - keys = ['directories', 'files', - 'logical lines', 'physical lines'] - for key in keys: - if key in options.counters: - print '%-7d %s per second (%d total)' % ( - options.counters[key] / elapsed, key, - options.counters[key]) + def ignore_code(self, code): + """ + Check if the error code should be ignored. + If 'options.select' contains a prefix of the error code, + return False. Else, if 'options.ignore' contains a prefix of + the error code, return True. + """ + return (code.startswith(self.options.ignore) and + not code.startswith(self.options.select)) -def process_options(arglist=None): - """ - Process options passed either via arglist or via command line args. - """ - global options, args - usage = "%prog [options] input ..." - parser = OptionParser(usage) + def get_checks(self, argument_name): + """ + Find all globally visible functions where the first argument name + starts with argument_name and which contain selected tests. + """ + checks = [] + for check, attrs in _checks[argument_name].items(): + (codes, args) = attrs + if any(not (code and self.ignore_code(code)) for code in codes): + checks.append((check.__name__, check, args)) + return sorted(checks) + + +def get_parser(prog='pep8', version=__version__): + parser = OptionParser(prog=prog, version=version, + usage="%prog [options] input ...") + parser.config_options = [ + 'exclude', 'filename', 'select', 'ignore', 'max-line-length', 'count', + 'format', 'quiet', 'show-pep8', 'show-source', 'statistics', 'verbose'] parser.add_option('-v', '--verbose', default=0, action='count', help="print status messages, or debug with -vv") parser.add_option('-q', '--quiet', default=0, action='count', help="report only file names, or nothing with -qq") - parser.add_option('--exclude', metavar='patterns', default=default_exclude, - help="skip matches (default %s)" % default_exclude) - parser.add_option('--filename', metavar='patterns', - help="only check matching files (e.g. *.py)") + parser.add_option('-r', '--repeat', default=True, action='store_true', + help="(obsolete) show all occurrences of the same error") + parser.add_option('--first', action='store_false', dest='repeat', + help="show first occurrence of each error") + parser.add_option('--exclude', metavar='patterns', default=DEFAULT_EXCLUDE, + help="exclude files or directories which match these " + "comma separated patterns (default: %default)") + parser.add_option('--filename', metavar='patterns', default='*.py', + help="when parsing directories, only check filenames " + "matching these comma separated patterns " + "(default: %default)") + parser.add_option('--select', metavar='errors', default='', + help="select errors and warnings (e.g. E,W6)") parser.add_option('--ignore', metavar='errors', default='', help="skip errors and warnings (e.g. E4,W)") - parser.add_option('--repeat', action='store_true', - help="show all occurrences of the same error") parser.add_option('--show-source', action='store_true', help="show source code for each error") parser.add_option('--show-pep8', action='store_true', - help="show text of PEP 8 for each error") + help="show text of PEP 8 for each error " + "(implies --first)") parser.add_option('--statistics', action='store_true', help="count errors and warnings") - parser.add_option('--benchmark', action='store_true', - help="measure processing speed") - parser.add_option('--testsuite', metavar='dir', - help="run regression tests from dir") - parser.add_option('--doctest', action='store_true', - help="run doctest on myself") + parser.add_option('--count', action='store_true', + help="print total number of errors and warnings " + "to standard error and set exit code to 1 if " + "total is not null") + parser.add_option('--max-line-length', type='int', metavar='n', + default=MAX_LINE_LENGTH, + help="set maximum allowed line length " + "(default: %default)") + parser.add_option('--format', metavar='format', default='default', + help="set the error format [default|pylint|]") + parser.add_option('--diff', action='store_true', + help="report only lines changed according to the " + "unified diff received on STDIN") + group = parser.add_option_group("Testing Options") + if os.path.exists(TESTSUITE_PATH): + group.add_option('--testsuite', metavar='dir', + help="run regression tests from dir") + group.add_option('--doctest', action='store_true', + help="run doctest on myself") + group.add_option('--benchmark', action='store_true', + help="measure processing speed") + return parser + + +def read_config(options, args, arglist, parser): + """Read both user configuration and local configuration.""" + config = RawConfigParser() + + user_conf = options.config + if user_conf and os.path.isfile(user_conf): + if options.verbose: + print('user configuration: %s' % user_conf) + config.read(user_conf) + + parent = tail = args and os.path.abspath(os.path.commonprefix(args)) + while tail: + for name in PROJECT_CONFIG: + local_conf = os.path.join(parent, name) + if os.path.isfile(local_conf): + break + else: + parent, tail = os.path.split(parent) + continue + if options.verbose: + print('local configuration: %s' % local_conf) + config.read(local_conf) + break + + pep8_section = parser.prog + if config.has_section(pep8_section): + option_list = dict([(o.dest, o.type or o.action) + for o in parser.option_list]) + + # First, read the default values + new_options, _ = parser.parse_args([]) + + # Second, parse the configuration + for opt in config.options(pep8_section): + if options.verbose > 1: + print(" %s = %s" % (opt, config.get(pep8_section, opt))) + if opt.replace('_', '-') not in parser.config_options: + print("Unknown option: '%s'\n not in [%s]" % + (opt, ' '.join(parser.config_options))) + sys.exit(1) + normalized_opt = opt.replace('-', '_') + opt_type = option_list[normalized_opt] + if opt_type in ('int', 'count'): + value = config.getint(pep8_section, opt) + elif opt_type == 'string': + value = config.get(pep8_section, opt) + else: + assert opt_type in ('store_true', 'store_false') + value = config.getboolean(pep8_section, opt) + setattr(new_options, normalized_opt, value) + + # Third, overwrite with the command-line options + options, _ = parser.parse_args(arglist, values=new_options) + options.doctest = options.testsuite = False + return options + + +def process_options(arglist=None, parse_argv=False, config_file=None, + parser=None): + """Process options passed either via arglist or via command line args.""" + if not arglist and not parse_argv: + # Don't read the command line if the module is used as a library. + arglist = [] + if not parser: + parser = get_parser() + if not parser.has_option('--config'): + if config_file is True: + config_file = DEFAULT_CONFIG + group = parser.add_option_group("Configuration", description=( + "The project options are read from the [%s] section of the " + "tox.ini file or the setup.cfg file located in any parent folder " + "of the path(s) being processed. Allowed options are: %s." % + (parser.prog, ', '.join(parser.config_options)))) + group.add_option('--config', metavar='path', default=config_file, + help="user config file location (default: %default)") options, args = parser.parse_args(arglist) - if options.testsuite: + options.reporter = None + + if options.ensure_value('testsuite', False): args.append(options.testsuite) - if len(args) == 0: - parser.error('input not specified') - options.prog = os.path.basename(sys.argv[0]) - options.exclude = options.exclude.split(',') - for index in range(len(options.exclude)): - options.exclude[index] = options.exclude[index].rstrip('/') + elif not options.ensure_value('doctest', False): + if parse_argv and not args: + if options.diff or any(os.path.exists(name) + for name in PROJECT_CONFIG): + args = ['.'] + else: + parser.error('input not specified') + options = read_config(options, args, arglist, parser) + options.reporter = parse_argv and options.quiet == 1 and FileReport + if options.filename: options.filename = options.filename.split(',') + options.exclude = options.exclude.split(',') + if options.select: + options.select = options.select.split(',') if options.ignore: options.ignore = options.ignore.split(',') - else: - options.ignore = [] - options.counters = {} - options.messages = {} + + if options.diff: + options.reporter = DiffReport + stdin = stdin_get_value() + options.selected_lines = parse_udiff(stdin, options.filename, args[0]) + args = sorted(options.selected_lines) return options, args def _main(): - """ - Parse options and run checks on Python source. - """ - options, args = process_options() - if options.doctest: - import doctest - return doctest.testmod() - start_time = time.time() - errors = 0 - for path in args: - # skip emacs backups - if path.startswith(".#"): - continue - if os.path.isdir(path): - errors += input_dir(path) - else: - errors += input_file(path) - elapsed = time.time() - start_time + """Parse options and run checks on Python source.""" + pep8style = StyleGuide(parse_argv=True, config_file=True) + options = pep8style.options + if options.doctest or options.testsuite: + from testsuite.support import run_tests + report = run_tests(pep8style) + else: + report = pep8style.check_files() if options.statistics: - print_statistics() + report.print_statistics() if options.benchmark: - print_benchmark(elapsed) - return errors > 0 + report.print_benchmark() + if options.testsuite and not options.quiet: + report.print_results() + if report.total_errors: + if options.count: + sys.stderr.write(str(report.total_errors) + '\n') + sys.exit(1) if __name__ == '__main__': - sys.exit(_main()) + _main() diff --git a/tests/scanner/annotationparser/test_parser.py b/tests/scanner/annotationparser/test_parser.py index a1c5866c..ce3ccd22 100644 --- a/tests/scanner/annotationparser/test_parser.py +++ b/tests/scanner/annotationparser/test_parser.py @@ -108,7 +108,7 @@ def parsed2tree(docblock): parsed += ' \n' for key, value in tag.options.values: parsed += ' \n' - parsed += ' %s\n' %(key, ) + parsed += ' %s\n' % (key, ) if value is not None: options = value.all() parsed += ' \n' @@ -130,6 +130,7 @@ def parsed2tree(docblock): return parsed + def expected2tree(docblock): # Note: this sucks, but we can't rely on etree.tostring() to generate useable output :( diff --git a/tests/warn/warningtester.py b/tests/warn/warningtester.py index 27e0a5e6..fc3c3d2d 100644 --- a/tests/warn/warningtester.py +++ b/tests/warn/warningtester.py @@ -61,23 +61,24 @@ def _diff(a, b): for line in a[i1:i2]: for l in line.split('\n'): if l != '': - retval += ' ' + l + '\n' + retval += ' ' + l + '\n' continue if tag in ('replace', 'delete'): for line in a[i1:i2]: for l in line.split('\n'): if l != '': - retval += '-' + l + '\n' + retval += '-' + l + '\n' if tag in ('replace', 'insert'): for line in b[j1:j2]: for l in line.split('\n'): if l != '': - retval += '+' + l + '\n' + retval += '+' + l + '\n' return retval + def _extract_expected(filename): fd = open(filename, 'rU') data = fd.read() @@ -92,6 +93,7 @@ def _extract_expected(filename): return retval + def check(args): filename = args[0] @@ -133,10 +135,10 @@ def check(args): if len(expected_warnings) != len(emitted_warnings): raise SystemExit('ERROR in %r: %d warnings were emitted, ' - 'expected %d:\n%s' %(os.path.basename(filename), - len(emitted_warnings), - len(expected_warnings), - _diff(expected_warnings, emitted_warnings))) + 'expected %d:\n%s' % (os.path.basename(filename), + len(emitted_warnings), + len(expected_warnings), + _diff(expected_warnings, emitted_warnings))) for emitted_warning, expected_warning in zip(emitted_warnings, expected_warnings): if expected_warning != emitted_warning: -- cgit v1.2.1