summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2007-07-23 09:02:25 +0000
committerGeorg Brandl <georg@python.org>2007-07-23 09:02:25 +0000
commitd60ca8d49d1654968886612883a2affa9dccfaa8 (patch)
treeeccdde90eb4474e1fab81461048de618c4fc6b80 /utils
parent6219bccc477caba671e7c4a3a92a6626b33d5006 (diff)
downloadsphinx-git-d60ca8d49d1654968886612883a2affa9dccfaa8.tar.gz
Initial import of the doc tools.
Diffstat (limited to 'utils')
-rwxr-xr-xutils/check_sources.py241
-rw-r--r--utils/pylintrc301
-rwxr-xr-xutils/reindent.py291
3 files changed, 833 insertions, 0 deletions
diff --git a/utils/check_sources.py b/utils/check_sources.py
new file mode 100755
index 000000000..761f05ae0
--- /dev/null
+++ b/utils/check_sources.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+ Checker for file headers
+ ~~~~~~~~~~~~~~~~~~~~~~~~
+
+ Make sure each Python file has a correct file header
+ including copyright and license information.
+
+ :copyright: 2006-2007 by Georg Brandl.
+ :license: GNU GPL, see LICENSE for more details.
+"""
+
+import sys, os, re
+import getopt
+import cStringIO
+from os.path import join, splitext, abspath
+
+
+checkers = {}
+
+def checker(*suffixes, **kwds):
+ only_pkg = kwds.pop('only_pkg', False)
+ def deco(func):
+ for suffix in suffixes:
+ checkers.setdefault(suffix, []).append(func)
+ func.only_pkg = only_pkg
+ return func
+ return deco
+
+
+name_mail_re = r'[\w ]+(<.*?>)?'
+copyright_re = re.compile(r'^ :copyright: 200\d(-200\d)? by %s(, %s)*[,.]$' %
+ (name_mail_re, name_mail_re))
+license_re = re.compile(r" :license: (.*?).\n")
+copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' %
+ (name_mail_re, name_mail_re))
+coding_re = re.compile(r'coding[:=]\s*([-\w.]+)')
+not_ix_re = re.compile(r'\bnot\s+\S+?\s+i[sn]\s\S+')
+is_const_re = re.compile(r'if.*?==\s+(None|False|True)\b')
+
+misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING
+ "informations"] # ALLOW-MISSPELLING
+
+
+@checker('.py')
+def check_syntax(fn, lines):
+ try:
+ compile(''.join(lines), fn, "exec")
+ except SyntaxError, err:
+ yield 0, "not compilable: %s" % err
+
+
+@checker('.py')
+def check_style_and_encoding(fn, lines):
+ encoding = 'ascii'
+ for lno, line in enumerate(lines):
+ if len(line) > 90:
+ yield lno+1, "line too long"
+ m = not_ix_re.search(line)
+ if m:
+ yield lno+1, '"' + m.group() + '"'
+ if is_const_re.search(line):
+ yield lno+1, 'using == None/True/False'
+ if lno < 2:
+ co = coding_re.search(line)
+ if co:
+ encoding = co.group(1)
+ try:
+ line.decode(encoding)
+ except UnicodeDecodeError, err:
+ yield lno+1, "not decodable: %s\n Line: %r" % (err, line)
+ except LookupError, err:
+ yield 0, "unknown encoding: %s" % encoding
+ encoding = 'latin1'
+
+
+@checker('.py', only_pkg=True)
+def check_fileheader(fn, lines):
+ # line number correction
+ c = 1
+ if lines[0:1] == ['#!/usr/bin/env python\n']:
+ lines = lines[1:]
+ c = 2
+
+ llist = []
+ docopen = False
+ for lno, l in enumerate(lines):
+ llist.append(l)
+ if lno == 0:
+ if l == '# -*- coding: rot13 -*-\n':
+ # special-case pony package
+ return
+ elif l != '# -*- coding: utf-8 -*-\n':
+ yield 1, "missing coding declaration"
+ elif lno == 1:
+ if l != '"""\n' and l != 'r"""\n':
+ yield 2, 'missing docstring begin (""")'
+ else:
+ docopen = True
+ elif docopen:
+ if l == '"""\n':
+ # end of docstring
+ if lno <= 4:
+ yield lno+c, "missing module name in docstring"
+ break
+
+ if l != "\n" and l[:4] != ' ' and docopen:
+ yield lno+c, "missing correct docstring indentation"
+
+ if lno == 2:
+ # if not in package, don't check the module name
+ modname = fn[:-3].replace('/', '.').replace('.__init__', '')
+ while modname:
+ if l.lower()[4:-1] == modname:
+ break
+ modname = '.'.join(modname.split('.')[1:])
+ else:
+ yield 3, "wrong module name in docstring heading"
+ modnamelen = len(l.strip())
+ elif lno == 3:
+ if l.strip() != modnamelen * "~":
+ yield 4, "wrong module name underline, should be ~~~...~"
+
+ else:
+ yield 0, "missing end and/or start of docstring..."
+
+ # check for copyright and license fields
+ license = llist[-2:-1]
+ if not license or not license_re.match(license[0]):
+ yield 0, "no correct license info"
+
+ ci = -3
+ copyright = llist[ci:ci+1]
+ while copyright and copyright_2_re.match(copyright[0]):
+ ci -= 1
+ copyright = llist[ci:ci+1]
+ if not copyright or not copyright_re.match(copyright[0]):
+ yield 0, "no correct copyright info"
+
+
+@checker('.py', '.html', '.js')
+def check_whitespace_and_spelling(fn, lines):
+ for lno, line in enumerate(lines):
+ if "\t" in line:
+ yield lno+1, "OMG TABS!!!1 "
+ if line[:-1].rstrip(' \t') != line[:-1]:
+ yield lno+1, "trailing whitespace"
+ for word in misspellings:
+ if word in line and 'ALLOW-MISSPELLING' not in line:
+ yield lno+1, '"%s" used' % word
+
+
+bad_tags = ('<b>', '<i>', '<u>', '<s>', '<strike>'
+ '<center>', '<big>', '<small>', '<font')
+
+@checker('.html')
+def check_xhtml(fn, lines):
+ for lno, line in enumerate(lines):
+ for bad_tag in bad_tags:
+ if bad_tag in line:
+ yield lno+1, "used " + bad_tag
+
+
+def main(argv):
+ try:
+ gopts, args = getopt.getopt(argv[1:], "vi:")
+ except getopt.GetoptError:
+ print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0]
+ return 2
+ opts = {}
+ for opt, val in gopts:
+ if opt == '-i':
+ val = abspath(val)
+ opts.setdefault(opt, []).append(val)
+
+ if len(args) == 0:
+ path = '.'
+ elif len(args) == 1:
+ path = args[0]
+ else:
+ print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0]
+ return 2
+
+ verbose = '-v' in opts
+
+ num = 0
+ out = cStringIO.StringIO()
+
+ # TODO: replace os.walk run with iteration over output of
+ # `svn list -R`.
+
+ for root, dirs, files in os.walk(path):
+ if '.svn' in dirs:
+ dirs.remove('.svn')
+ if '-i' in opts and abspath(root) in opts['-i']:
+ del dirs[:]
+ continue
+ in_check_pkg = root.startswith('./sphinx')
+ for fn in files:
+
+ fn = join(root, fn)
+ if fn[:2] == './': fn = fn[2:]
+
+ if '-i' in opts and abspath(fn) in opts['-i']:
+ continue
+
+ ext = splitext(fn)[1]
+ checkerlist = checkers.get(ext, None)
+ if not checkerlist:
+ continue
+
+ if verbose:
+ print "Checking %s..." % fn
+
+ try:
+ f = open(fn, 'r')
+ lines = list(f)
+ except (IOError, OSError), err:
+ print "%s: cannot open: %s" % (fn, err)
+ num += 1
+ continue
+
+ for checker in checkerlist:
+ if not in_check_pkg and checker.only_pkg:
+ continue
+ for lno, msg in checker(fn, lines):
+ print >>out, "%s:%d: %s" % (fn, lno, msg)
+ num += 1
+ if verbose:
+ print
+ if num == 0:
+ print "No errors found."
+ else:
+ print out.getvalue().rstrip('\n')
+ print "%d error%s found." % (num, num > 1 and "s" or "")
+ return int(num > 0)
+
+
+if __name__ == '__main__':
+ sys.exit(main(sys.argv))
diff --git a/utils/pylintrc b/utils/pylintrc
new file mode 100644
index 000000000..aa04e12e5
--- /dev/null
+++ b/utils/pylintrc
@@ -0,0 +1,301 @@
+# lint Python modules using external checkers.
+#
+# This is the main checker controling the other ones and the reports
+# generation. It is itself both a raw checker and an astng checker in order
+# to:
+# * handle message activation / deactivation at the module level
+# * handle some basic but necessary stats'data (number of classes, methods...)
+#
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Profiled execution.
+profile=no
+
+# Add <file or directory> to the black list. It should be a base name, not a
+# path. You may set this option multiple times.
+ignore=.svn
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Set the cache size for astng objects.
+cache-size=500
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+
+[MESSAGES CONTROL]
+
+# Enable only checker(s) with the given id(s). This option conflict with the
+# disable-checker option
+#enable-checker=
+
+# Enable all checker(s) except those with the given id(s). This option conflict
+# with the disable-checker option
+#disable-checker=
+
+# Enable all messages in the listed categories.
+#enable-msg-cat=
+
+# Disable all messages in the listed categories.
+#disable-msg-cat=
+
+# Enable the message(s) with the given id(s).
+#enable-msg=
+
+# Disable the message(s) with the given id(s).
+disable-msg=C0323,W0142,C0301,C0103,C0111,E0213,C0302,C0203,W0703,R0201
+
+
+[REPORTS]
+
+# set the output format. Available formats are text, parseable, colorized and
+# html
+output-format=colorized
+
+# Include message's id in output
+include-ids=yes
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells wether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note).You have access to the variables errors warning, statement which
+# respectivly contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (R0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (R0004).
+comment=no
+
+# Enable the report(s) with the given id(s).
+#enable-report=
+
+# Disable the report(s) with the given id(s).
+#disable-report=
+
+
+# checks for
+# * unused variables / imports
+# * undefined variables
+# * redefinition of variable from builtins or from an outer scope
+# * use of variable before assigment
+#
+[VARIABLES]
+
+# Tells wether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching names used for dummy variables (i.e. not used).
+dummy-variables-rgx=_|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+
+# try to find bugs in the code using type inference
+#
+[TYPECHECK]
+
+# Tells wether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# When zope mode is activated, consider the acquired-members option to ignore
+# access to some undefined attributes.
+zope=no
+
+# List of members which are usually get through zope's acquisition mecanism and
+# so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
+acquired-members=REQUEST,acl_users,aq_parent
+
+
+# checks for :
+# * doc strings
+# * modules / classes / functions / methods / arguments / variables name
+# * number of arguments, local variables, branchs, returns and statements in
+# functions, methods
+# * required module attributes
+# * dangerous default values as arguments
+# * redefinition of function / method / class
+# * uses of the global statement
+#
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=apply,input
+
+
+# checks for sign of poor/misdesign:
+# * number of methods, attributes, local variables...
+# * size, complexity of functions, methods
+#
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=12
+
+# Maximum number of locals for function / method body
+max-locals=30
+
+# Maximum number of return / yield for function / method body
+max-returns=12
+
+# Maximum number of branch for function / method body
+max-branchs=30
+
+# Maximum number of statements in function / method body
+max-statements=60
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=20
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=0
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+# checks for
+# * external modules dependencies
+# * relative / wildcard imports
+# * cyclic imports
+# * uses of deprecated modules
+#
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report R0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report R0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report R0402 must
+# not be disabled)
+int-import-graph=
+
+
+# checks for :
+# * methods without self as first argument
+# * overridden methods signature
+# * access only to existant members via self
+# * attributes not defined in the __init__ method
+# * supported interfaces implementation
+# * unreachable code
+#
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+
+# checks for similarities and duplicated code. This computation may be
+# memory / CPU intensive, so you should disable it if you experiments some
+# problems.
+#
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=10
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+# checks for:
+# * warning notes in the code like FIXME, XXX
+# * PEP 263: source code with non ascii character but no encoding declaration
+#
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+# checks for :
+# * unauthorized constructions
+# * strict indentation
+# * line length
+# * use of <> instead of !=
+#
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=90
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string=' '
diff --git a/utils/reindent.py b/utils/reindent.py
new file mode 100755
index 000000000..e6ee82872
--- /dev/null
+++ b/utils/reindent.py
@@ -0,0 +1,291 @@
+#! /usr/bin/env python
+
+# Released to the public domain, by Tim Peters, 03 October 2000.
+# -B option added by Georg Brandl, 2006.
+
+"""reindent [-d][-r][-v] [ path ... ]
+
+-d (--dryrun) Dry run. Analyze, but don't make any changes to files.
+-r (--recurse) Recurse. Search for all .py files in subdirectories too.
+-B (--no-backup) Don't write .bak backup files.
+-v (--verbose) Verbose. Print informative msgs; else only names of changed files.
+-h (--help) Help. Print this usage information and exit.
+
+Change Python (.py) files to use 4-space indents and no hard tab characters.
+Also trim excess spaces and tabs from ends of lines, and remove empty lines
+at the end of files. Also ensure the last line ends with a newline.
+
+If no paths are given on the command line, reindent operates as a filter,
+reading a single source file from standard input and writing the transformed
+source to standard output. In this case, the -d, -r and -v flags are
+ignored.
+
+You can pass one or more file and/or directory paths. When a directory
+path, all .py files within the directory will be examined, and, if the -r
+option is given, likewise recursively for subdirectories.
+
+If output is not to standard output, reindent overwrites files in place,
+renaming the originals with a .bak extension. If it finds nothing to
+change, the file is left alone. If reindent does change a file, the changed
+file is a fixed-point for future runs (i.e., running reindent on the
+resulting .py file won't change it again).
+
+The hard part of reindenting is figuring out what to do with comment
+lines. So long as the input files get a clean bill of health from
+tabnanny.py, reindent should do a good job.
+"""
+
+__version__ = "1"
+
+import tokenize
+import os
+import sys
+
+verbose = 0
+recurse = 0
+dryrun = 0
+no_backup = 0
+
+def usage(msg=None):
+ if msg is not None:
+ print >> sys.stderr, msg
+ print >> sys.stderr, __doc__
+
+def errprint(*args):
+ sep = ""
+ for arg in args:
+ sys.stderr.write(sep + str(arg))
+ sep = " "
+ sys.stderr.write("\n")
+
+def main():
+ import getopt
+ global verbose, recurse, dryrun, no_backup
+
+ try:
+ opts, args = getopt.getopt(sys.argv[1:], "drvhB",
+ ["dryrun", "recurse", "verbose", "help",
+ "no-backup"])
+ except getopt.error, msg:
+ usage(msg)
+ return
+ for o, a in opts:
+ if o in ('-d', '--dryrun'):
+ dryrun += 1
+ elif o in ('-r', '--recurse'):
+ recurse += 1
+ elif o in ('-v', '--verbose'):
+ verbose += 1
+ elif o in ('-B', '--no-backup'):
+ no_backup += 1
+ elif o in ('-h', '--help'):
+ usage()
+ return
+ if not args:
+ r = Reindenter(sys.stdin)
+ r.run()
+ r.write(sys.stdout)
+ return
+ for arg in args:
+ check(arg)
+
+def check(file):
+ if os.path.isdir(file) and not os.path.islink(file):
+ if verbose:
+ print "listing directory", file
+ names = os.listdir(file)
+ for name in names:
+ fullname = os.path.join(file, name)
+ if ((recurse and os.path.isdir(fullname) and
+ not os.path.islink(fullname))
+ or name.lower().endswith(".py")):
+ check(fullname)
+ return
+
+ if verbose:
+ print "checking", file, "...",
+ try:
+ f = open(file)
+ except IOError, msg:
+ errprint("%s: I/O Error: %s" % (file, str(msg)))
+ return
+
+ r = Reindenter(f)
+ f.close()
+ if r.run():
+ if verbose:
+ print "changed."
+ if dryrun:
+ print "But this is a dry run, so leaving it alone."
+ else:
+ print "reindented", file, (dryrun and "(dry run => not really)" or "")
+ if not dryrun:
+ if not no_backup:
+ bak = file + ".bak"
+ if os.path.exists(bak):
+ os.remove(bak)
+ os.rename(file, bak)
+ if verbose:
+ print "renamed", file, "to", bak
+ f = open(file, "w")
+ r.write(f)
+ f.close()
+ if verbose:
+ print "wrote new", file
+ else:
+ if verbose:
+ print "unchanged."
+
+
+class Reindenter:
+
+ def __init__(self, f):
+ self.find_stmt = 1 # next token begins a fresh stmt?
+ self.level = 0 # current indent level
+
+ # Raw file lines.
+ self.raw = f.readlines()
+
+ # File lines, rstripped & tab-expanded. Dummy at start is so
+ # that we can use tokenize's 1-based line numbering easily.
+ # Note that a line is all-blank iff it's "\n".
+ self.lines = [line.rstrip('\n \t').expandtabs() + "\n"
+ for line in self.raw]
+ self.lines.insert(0, None)
+ self.index = 1 # index into self.lines of next line
+
+ # List of (lineno, indentlevel) pairs, one for each stmt and
+ # comment line. indentlevel is -1 for comment lines, as a
+ # signal that tokenize doesn't know what to do about them;
+ # indeed, they're our headache!
+ self.stats = []
+
+ def run(self):
+ tokenize.tokenize(self.getline, self.tokeneater)
+ # Remove trailing empty lines.
+ lines = self.lines
+ while lines and lines[-1] == "\n":
+ lines.pop()
+ # Sentinel.
+ stats = self.stats
+ stats.append((len(lines), 0))
+ # Map count of leading spaces to # we want.
+ have2want = {}
+ # Program after transformation.
+ after = self.after = []
+ # Copy over initial empty lines -- there's nothing to do until
+ # we see a line with *something* on it.
+ i = stats[0][0]
+ after.extend(lines[1:i])
+ for i in range(len(stats)-1):
+ thisstmt, thislevel = stats[i]
+ nextstmt = stats[i+1][0]
+ have = getlspace(lines[thisstmt])
+ want = thislevel * 4
+ if want < 0:
+ # A comment line.
+ if have:
+ # An indented comment line. If we saw the same
+ # indentation before, reuse what it most recently
+ # mapped to.
+ want = have2want.get(have, -1)
+ if want < 0:
+ # Then it probably belongs to the next real stmt.
+ for j in xrange(i+1, len(stats)-1):
+ jline, jlevel = stats[j]
+ if jlevel >= 0:
+ if have == getlspace(lines[jline]):
+ want = jlevel * 4
+ break
+ if want < 0: # Maybe it's a hanging
+ # comment like this one,
+ # in which case we should shift it like its base
+ # line got shifted.
+ for j in xrange(i-1, -1, -1):
+ jline, jlevel = stats[j]
+ if jlevel >= 0:
+ want = have + getlspace(after[jline-1]) - \
+ getlspace(lines[jline])
+ break
+ if want < 0:
+ # Still no luck -- leave it alone.
+ want = have
+ else:
+ want = 0
+ assert want >= 0
+ have2want[have] = want
+ diff = want - have
+ if diff == 0 or have == 0:
+ after.extend(lines[thisstmt:nextstmt])
+ else:
+ for line in lines[thisstmt:nextstmt]:
+ if diff > 0:
+ if line == "\n":
+ after.append(line)
+ else:
+ after.append(" " * diff + line)
+ else:
+ remove = min(getlspace(line), -diff)
+ after.append(line[remove:])
+ return self.raw != self.after
+
+ def write(self, f):
+ f.writelines(self.after)
+
+ # Line-getter for tokenize.
+ def getline(self):
+ if self.index >= len(self.lines):
+ line = ""
+ else:
+ line = self.lines[self.index]
+ self.index += 1
+ return line
+
+ # Line-eater for tokenize.
+ def tokeneater(self, type, token, (sline, scol), end, line,
+ INDENT=tokenize.INDENT,
+ DEDENT=tokenize.DEDENT,
+ NEWLINE=tokenize.NEWLINE,
+ COMMENT=tokenize.COMMENT,
+ NL=tokenize.NL):
+
+ if type == NEWLINE:
+ # A program statement, or ENDMARKER, will eventually follow,
+ # after some (possibly empty) run of tokens of the form
+ # (NL | COMMENT)* (INDENT | DEDENT+)?
+ self.find_stmt = 1
+
+ elif type == INDENT:
+ self.find_stmt = 1
+ self.level += 1
+
+ elif type == DEDENT:
+ self.find_stmt = 1
+ self.level -= 1
+
+ elif type == COMMENT:
+ if self.find_stmt:
+ self.stats.append((sline, -1))
+ # but we're still looking for a new stmt, so leave
+ # find_stmt alone
+
+ elif type == NL:
+ pass
+
+ elif self.find_stmt:
+ # This is the first "real token" following a NEWLINE, so it
+ # must be the first token of the next program statement, or an
+ # ENDMARKER.
+ self.find_stmt = 0
+ if line: # not endmarker
+ self.stats.append((sline, self.level))
+
+# Count number of leading blanks.
+def getlspace(line):
+ i, n = 0, len(line)
+ while i < n and line[i] == " ":
+ i += 1
+ return i
+
+if __name__ == '__main__':
+ main()