Initial import of the doc tools.

author: Georg Brandl <georg@python.org> 2007-07-23 09:02:25 +0000
committer: Georg Brandl <georg@python.org> 2007-07-23 09:02:25 +0000
commit: d60ca8d49d1654968886612883a2affa9dccfaa8 (patch)
tree: eccdde90eb4474e1fab81461048de618c4fc6b80 /utils
parent: 6219bccc477caba671e7c4a3a92a6626b33d5006 (diff)
download: sphinx-git-d60ca8d49d1654968886612883a2affa9dccfaa8.tar.gz
3 files changed, 833 insertions, 0 deletions
diff --git a/utils/check_sources.py b/utils/check_sources.py
new file mode 100755
index 000000000..761f05ae0
--- /dev/null
+++ b/utils/check_sources.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+    Checker for file headers
+    ~~~~~~~~~~~~~~~~~~~~~~~~
+
+    Make sure each Python file has a correct file header
+    including copyright and license information.
+
+    :copyright: 2006-2007 by Georg Brandl.
+    :license: GNU GPL, see LICENSE for more details.
+"""
+
+import sys, os, re
+import getopt
+import cStringIO
+from os.path import join, splitext, abspath
+
+
+checkers = {}
+
+def checker(*suffixes, **kwds):
+    only_pkg = kwds.pop('only_pkg', False)
+    def deco(func):
+        for suffix in suffixes:
+            checkers.setdefault(suffix, []).append(func)
+        func.only_pkg = only_pkg
+        return func
+    return deco
+
+
+name_mail_re = r'[\w ]+(<.*?>)?'
+copyright_re = re.compile(r'^    :copyright: 200\d(-200\d)? by %s(, %s)*[,.]$' %
+                          (name_mail_re, name_mail_re))
+license_re = re.compile(r"    :license: (.*?).\n")
+copyright_2_re = re.compile(r'^                %s(, %s)*[,.]$' %
+                            (name_mail_re, name_mail_re))
+coding_re    = re.compile(r'coding[:=]\s*([-\w.]+)')
+not_ix_re    = re.compile(r'\bnot\s+\S+?\s+i[sn]\s\S+')
+is_const_re  = re.compile(r'if.*?==\s+(None|False|True)\b')
+
+misspellings = ["developement", "adress", "verificate",  # ALLOW-MISSPELLING
+                "informations"]                          # ALLOW-MISSPELLING
+
+
+@checker('.py')
+def check_syntax(fn, lines):
+    try:
+        compile(''.join(lines), fn, "exec")
+    except SyntaxError, err:
+        yield 0, "not compilable: %s" % err
+
+
+@checker('.py')
+def check_style_and_encoding(fn, lines):
+    encoding = 'ascii'
+    for lno, line in enumerate(lines):
+        if len(line) > 90:
+            yield lno+1, "line too long"
+        m = not_ix_re.search(line)
+        if m:
+            yield lno+1, '"' + m.group() + '"'
+        if is_const_re.search(line):
+            yield lno+1, 'using == None/True/False'
+        if lno < 2:
+            co = coding_re.search(line)
+            if co:
+                encoding = co.group(1)
+        try:
+            line.decode(encoding)
+        except UnicodeDecodeError, err:
+            yield lno+1, "not decodable: %s\n   Line: %r" % (err, line)
+        except LookupError, err:
+            yield 0, "unknown encoding: %s" % encoding
+            encoding = 'latin1'
+
+
+@checker('.py', only_pkg=True)
+def check_fileheader(fn, lines):
+    # line number correction
+    c = 1
+    if lines[0:1] == ['#!/usr/bin/env python\n']:
+        lines = lines[1:]
+        c = 2
+
+    llist = []
+    docopen = False
+    for lno, l in enumerate(lines):
+        llist.append(l)
+        if lno == 0:
+            if l == '# -*- coding: rot13 -*-\n':
+                # special-case pony package
+                return
+            elif l != '# -*- coding: utf-8 -*-\n':
+                yield 1, "missing coding declaration"
+        elif lno == 1:
+            if l != '"""\n' and l != 'r"""\n':
+                yield 2, 'missing docstring begin (""")'
+            else:
+                docopen = True
+        elif docopen:
+            if l == '"""\n':
+                # end of docstring
+                if lno <= 4:
+                    yield lno+c, "missing module name in docstring"
+                break
+
+            if l != "\n" and l[:4] != '    ' and docopen:
+                yield lno+c, "missing correct docstring indentation"
+
+            if lno == 2:
+                # if not in package, don't check the module name
+                modname = fn[:-3].replace('/', '.').replace('.__init__', '')
+                while modname:
+                    if l.lower()[4:-1] == modname:
+                        break
+                    modname = '.'.join(modname.split('.')[1:])
+                else:
+                    yield 3, "wrong module name in docstring heading"
+                modnamelen = len(l.strip())
+            elif lno == 3:
+                if l.strip() != modnamelen * "~":
+                    yield 4, "wrong module name underline, should be ~~~...~"
+
+    else:
+        yield 0, "missing end and/or start of docstring..."
+
+    # check for copyright and license fields
+    license = llist[-2:-1]
+    if not license or not license_re.match(license[0]):
+        yield 0, "no correct license info"
+
+    ci = -3
+    copyright = llist[ci:ci+1]
+    while copyright and copyright_2_re.match(copyright[0]):
+        ci -= 1
+        copyright = llist[ci:ci+1]
+    if not copyright or not copyright_re.match(copyright[0]):
+        yield 0, "no correct copyright info"
+
+
+@checker('.py', '.html', '.js')
+def check_whitespace_and_spelling(fn, lines):
+    for lno, line in enumerate(lines):
+        if "\t" in line:
+            yield lno+1, "OMG TABS!!!1 "
+        if line[:-1].rstrip(' \t') != line[:-1]:
+            yield lno+1, "trailing whitespace"
+        for word in misspellings:
+            if word in line and 'ALLOW-MISSPELLING' not in line:
+                yield lno+1, '"%s" used' % word
+
+
+bad_tags = ('<b>', '<i>', '<u>', '<s>', '<strike>'
+            '<center>', '<big>', '<small>', '<font')
+
+@checker('.html')
+def check_xhtml(fn, lines):
+    for lno, line in enumerate(lines):
+        for bad_tag in bad_tags:
+            if bad_tag in line:
+                yield lno+1, "used " + bad_tag
+
+
+def main(argv):
+    try:
+        gopts, args = getopt.getopt(argv[1:], "vi:")
+    except getopt.GetoptError:
+        print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0]
+        return 2
+    opts = {}
+    for opt, val in gopts:
+        if opt == '-i':
+            val = abspath(val)
+        opts.setdefault(opt, []).append(val)
+
+    if len(args) == 0:
+        path = '.'
+    elif len(args) == 1:
+        path = args[0]
+    else:
+        print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0]
+        return 2
+
+    verbose = '-v' in opts
+
+    num = 0
+    out = cStringIO.StringIO()
+
+    # TODO: replace os.walk run with iteration over output of
+    #       `svn list -R`.
+
+    for root, dirs, files in os.walk(path):
+        if '.svn' in dirs:
+            dirs.remove('.svn')
+        if '-i' in opts and abspath(root) in opts['-i']:
+            del dirs[:]
+            continue
+        in_check_pkg = root.startswith('./sphinx')
+        for fn in files:
+
+            fn = join(root, fn)
+            if fn[:2] == './': fn = fn[2:]
+
+            if '-i' in opts and abspath(fn) in opts['-i']:
+                continue
+
+            ext = splitext(fn)[1]
+            checkerlist = checkers.get(ext, None)
+            if not checkerlist:
+                continue
+
+            if verbose:
+                print "Checking %s..." % fn
+
+            try:
+                f = open(fn, 'r')
+                lines = list(f)
+            except (IOError, OSError), err:
+                print "%s: cannot open: %s" % (fn, err)
+                num += 1
+                continue
+
+            for checker in checkerlist:
+                if not in_check_pkg and checker.only_pkg:
+                    continue
+                for lno, msg in checker(fn, lines):
+                    print >>out, "%s:%d: %s" % (fn, lno, msg)
+                    num += 1
+    if verbose:
+        print
+    if num == 0:
+        print "No errors found."
+    else:
+        print out.getvalue().rstrip('\n')
+        print "%d error%s found." % (num, num > 1 and "s" or "")
+    return int(num > 0)
+
+
+if __name__ == '__main__':
+    sys.exit(main(sys.argv))
diff --git a/utils/pylintrc b/utils/pylintrc
new file mode 100644
index 000000000..aa04e12e5
--- /dev/null
+++ b/utils/pylintrc
@@ -0,0 +1,301 @@
+# lint Python modules using external checkers.
+# 
+# This is the main checker controling the other ones and the reports
+# generation. It is itself both a raw checker and an astng checker in order
+# to:
+# * handle message activation / deactivation at the module level
+# * handle some basic but necessary stats'data (number of classes, methods...)
+# 
+[MASTER]
+
+# Specify a configuration file.
+#rcfile=
+
+# Profiled execution.
+profile=no
+
+# Add <file or directory> to the black list. It should be a base name, not a
+# path. You may set this option multiple times.
+ignore=.svn
+
+# Pickle collected data for later comparisons.
+persistent=yes
+
+# Set the cache size for astng objects.
+cache-size=500
+
+# List of plugins (as comma separated values of python modules names) to load,
+# usually to register additional checkers.
+load-plugins=
+
+
+[MESSAGES CONTROL]
+
+# Enable only checker(s) with the given id(s). This option conflict with the
+# disable-checker option
+#enable-checker=
+
+# Enable all checker(s) except those with the given id(s). This option conflict
+# with the disable-checker option
+#disable-checker=
+
+# Enable all messages in the listed categories.
+#enable-msg-cat=
+
+# Disable all messages in the listed categories.
+#disable-msg-cat=
+
+# Enable the message(s) with the given id(s).
+#enable-msg=
+
+# Disable the message(s) with the given id(s).
+disable-msg=C0323,W0142,C0301,C0103,C0111,E0213,C0302,C0203,W0703,R0201
+
+
+[REPORTS]
+
+# set the output format. Available formats are text, parseable, colorized and
+# html
+output-format=colorized
+
+# Include message's id in output
+include-ids=yes
+
+# Put messages in a separate file for each module / package specified on the
+# command line instead of printing them on stdout. Reports (if any) will be
+# written in a file name "pylint_global.[txt|html]".
+files-output=no
+
+# Tells wether to display a full report or only the messages
+reports=yes
+
+# Python expression which should return a note less than 10 (10 is the highest
+# note).You have access to the variables errors warning, statement which
+# respectivly contain the number of errors / warnings messages and the total
+# number of statements analyzed. This is used by the global evaluation report
+# (R0004).
+evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
+
+# Add a comment according to your evaluation note. This is used by the global
+# evaluation report (R0004).
+comment=no
+
+# Enable the report(s) with the given id(s).
+#enable-report=
+
+# Disable the report(s) with the given id(s).
+#disable-report=
+
+
+# checks for
+# * unused variables / imports
+# * undefined variables
+# * redefinition of variable from builtins or from an outer scope
+# * use of variable before assigment
+# 
+[VARIABLES]
+
+# Tells wether we should check for unused import in __init__ files.
+init-import=no
+
+# A regular expression matching names used for dummy variables (i.e. not used).
+dummy-variables-rgx=_|dummy
+
+# List of additional names supposed to be defined in builtins. Remember that
+# you should avoid to define new builtins when possible.
+additional-builtins=
+
+
+# try to find bugs in the code using type inference
+# 
+[TYPECHECK]
+
+# Tells wether missing members accessed in mixin class should be ignored. A
+# mixin class is detected if its name ends with "mixin" (case insensitive).
+ignore-mixin-members=yes
+
+# When zope mode is activated, consider the acquired-members option to ignore
+# access to some undefined attributes.
+zope=no
+
+# List of members which are usually get through zope's acquisition mecanism and
+# so shouldn't trigger E0201 when accessed (need zope=yes to be considered).
+acquired-members=REQUEST,acl_users,aq_parent
+
+
+# checks for :
+# * doc strings
+# * modules / classes / functions / methods / arguments / variables name
+# * number of arguments, local variables, branchs, returns and statements in
+# functions, methods
+# * required module attributes
+# * dangerous default values as arguments
+# * redefinition of function / method / class
+# * uses of the global statement
+# 
+[BASIC]
+
+# Required attributes for module, separated by a comma
+required-attributes=
+
+# Regular expression which should only match functions or classes name which do
+# not require a docstring
+no-docstring-rgx=__.*__
+
+# Regular expression which should only match correct module names
+module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$
+
+# Regular expression which should only match correct module level names
+const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$
+
+# Regular expression which should only match correct class names
+class-rgx=[A-Z_][a-zA-Z0-9]+$
+
+# Regular expression which should only match correct function names
+function-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct method names
+method-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct instance attribute names
+attr-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct argument names
+argument-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct variable names
+variable-rgx=[a-z_][a-z0-9_]{2,30}$
+
+# Regular expression which should only match correct list comprehension /
+# generator expression variable names
+inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$
+
+# Good variable names which should always be accepted, separated by a comma
+good-names=i,j,k,ex,Run,_
+
+# Bad variable names which should always be refused, separated by a comma
+bad-names=foo,bar,baz,toto,tutu,tata
+
+# List of builtins function names that should not be used, separated by a comma
+bad-functions=apply,input
+
+
+# checks for sign of poor/misdesign:
+# * number of methods, attributes, local variables...
+# * size, complexity of functions, methods
+# 
+[DESIGN]
+
+# Maximum number of arguments for function / method
+max-args=12
+
+# Maximum number of locals for function / method body
+max-locals=30
+
+# Maximum number of return / yield for function / method body
+max-returns=12
+
+# Maximum number of branch for function / method body
+max-branchs=30
+
+# Maximum number of statements in function / method body
+max-statements=60
+
+# Maximum number of parents for a class (see R0901).
+max-parents=7
+
+# Maximum number of attributes for a class (see R0902).
+max-attributes=20
+
+# Minimum number of public methods for a class (see R0903).
+min-public-methods=0
+
+# Maximum number of public methods for a class (see R0904).
+max-public-methods=20
+
+
+# checks for
+# * external modules dependencies
+# * relative / wildcard imports
+# * cyclic imports
+# * uses of deprecated modules
+# 
+[IMPORTS]
+
+# Deprecated modules which should not be used, separated by a comma
+deprecated-modules=regsub,string,TERMIOS,Bastion,rexec
+
+# Create a graph of every (i.e. internal and external) dependencies in the
+# given file (report R0402 must not be disabled)
+import-graph=
+
+# Create a graph of external dependencies in the given file (report R0402 must
+# not be disabled)
+ext-import-graph=
+
+# Create a graph of internal dependencies in the given file (report R0402 must
+# not be disabled)
+int-import-graph=
+
+
+# checks for :
+# * methods without self as first argument
+# * overridden methods signature
+# * access only to existant members via self
+# * attributes not defined in the __init__ method
+# * supported interfaces implementation
+# * unreachable code
+# 
+[CLASSES]
+
+# List of interface methods to ignore, separated by a comma. This is used for
+# instance to not check methods defines in Zope's Interface base class.
+ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by
+
+# List of method names used to declare (i.e. assign) instance attributes.
+defining-attr-methods=__init__,__new__,setUp
+
+
+# checks for similarities and duplicated code. This computation may be
+# memory / CPU intensive, so you should disable it if you experiments some
+# problems.
+# 
+[SIMILARITIES]
+
+# Minimum lines number of a similarity.
+min-similarity-lines=10
+
+# Ignore comments when computing similarities.
+ignore-comments=yes
+
+# Ignore docstrings when computing similarities.
+ignore-docstrings=yes
+
+
+# checks for:
+# * warning notes in the code like FIXME, XXX
+# * PEP 263: source code with non ascii character but no encoding declaration
+# 
+[MISCELLANEOUS]
+
+# List of note tags to take in consideration, separated by a comma.
+notes=FIXME,XXX,TODO
+
+
+# checks for :
+# * unauthorized constructions
+# * strict indentation
+# * line length
+# * use of <> instead of !=
+# 
+[FORMAT]
+
+# Maximum number of characters on a single line.
+max-line-length=90
+
+# Maximum number of lines in a module
+max-module-lines=1000
+
+# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
+# tab).
+indent-string='    '
diff --git a/utils/reindent.py b/utils/reindent.py
new file mode 100755
index 000000000..e6ee82872
--- /dev/null
+++ b/utils/reindent.py
@@ -0,0 +1,291 @@
+#! /usr/bin/env python
+
+# Released to the public domain, by Tim Peters, 03 October 2000.
+# -B option added by Georg Brandl, 2006.
+
+"""reindent [-d][-r][-v] [ path ... ]
+
+-d (--dryrun)  Dry run.  Analyze, but don't make any changes to files.
+-r (--recurse) Recurse.  Search for all .py files in subdirectories too.
+-B (--no-backup)         Don't write .bak backup files.
+-v (--verbose) Verbose.  Print informative msgs; else only names of changed files.
+-h (--help)    Help.     Print this usage information and exit.
+
+Change Python (.py) files to use 4-space indents and no hard tab characters.
+Also trim excess spaces and tabs from ends of lines, and remove empty lines
+at the end of files.  Also ensure the last line ends with a newline.
+
+If no paths are given on the command line, reindent operates as a filter,
+reading a single source file from standard input and writing the transformed
+source to standard output.  In this case, the -d, -r and -v flags are
+ignored.
+
+You can pass one or more file and/or directory paths.  When a directory
+path, all .py files within the directory will be examined, and, if the -r
+option is given, likewise recursively for subdirectories.
+
+If output is not to standard output, reindent overwrites files in place,
+renaming the originals with a .bak extension.  If it finds nothing to
+change, the file is left alone.  If reindent does change a file, the changed
+file is a fixed-point for future runs (i.e., running reindent on the
+resulting .py file won't change it again).
+
+The hard part of reindenting is figuring out what to do with comment
+lines.  So long as the input files get a clean bill of health from
+tabnanny.py, reindent should do a good job.
+"""
+
+__version__ = "1"
+
+import tokenize
+import os
+import sys
+
+verbose = 0
+recurse = 0
+dryrun  = 0
+no_backup = 0
+
+def usage(msg=None):
+    if msg is not None:
+        print >> sys.stderr, msg
+    print >> sys.stderr, __doc__
+
+def errprint(*args):
+    sep = ""
+    for arg in args:
+        sys.stderr.write(sep + str(arg))
+        sep = " "
+    sys.stderr.write("\n")
+
+def main():
+    import getopt
+    global verbose, recurse, dryrun, no_backup
+
+    try:
+        opts, args = getopt.getopt(sys.argv[1:], "drvhB",
+                                   ["dryrun", "recurse", "verbose", "help",
+                                    "no-backup"])
+    except getopt.error, msg:
+        usage(msg)
+        return
+    for o, a in opts:
+        if o in ('-d', '--dryrun'):
+            dryrun += 1
+        elif o in ('-r', '--recurse'):
+            recurse += 1
+        elif o in ('-v', '--verbose'):
+            verbose += 1
+        elif o in ('-B', '--no-backup'):
+            no_backup += 1
+        elif o in ('-h', '--help'):
+            usage()
+            return
+    if not args:
+        r = Reindenter(sys.stdin)
+        r.run()
+        r.write(sys.stdout)
+        return
+    for arg in args:
+        check(arg)
+
+def check(file):
+    if os.path.isdir(file) and not os.path.islink(file):
+        if verbose:
+            print "listing directory", file
+        names = os.listdir(file)
+        for name in names:
+            fullname = os.path.join(file, name)
+            if ((recurse and os.path.isdir(fullname) and
+                 not os.path.islink(fullname))
+                or name.lower().endswith(".py")):
+                check(fullname)
+        return
+
+    if verbose:
+        print "checking", file, "...",
+    try:
+        f = open(file)
+    except IOError, msg:
+        errprint("%s: I/O Error: %s" % (file, str(msg)))
+        return
+
+    r = Reindenter(f)
+    f.close()
+    if r.run():
+        if verbose:
+            print "changed."
+            if dryrun:
+                print "But this is a dry run, so leaving it alone."
+        else:
+            print "reindented", file, (dryrun and "(dry run => not really)" or "")
+        if not dryrun:
+            if not no_backup:
+                bak = file + ".bak"
+                if os.path.exists(bak):
+                    os.remove(bak)
+                os.rename(file, bak)
+                if verbose:
+                    print "renamed", file, "to", bak
+            f = open(file, "w")
+            r.write(f)
+            f.close()
+            if verbose:
+                print "wrote new", file
+    else:
+        if verbose:
+            print "unchanged."
+
+
+class Reindenter:
+
+    def __init__(self, f):
+        self.find_stmt = 1  # next token begins a fresh stmt?
+        self.level = 0      # current indent level
+
+        # Raw file lines.
+        self.raw = f.readlines()
+
+        # File lines, rstripped & tab-expanded.  Dummy at start is so
+        # that we can use tokenize's 1-based line numbering easily.
+        # Note that a line is all-blank iff it's "\n".
+        self.lines = [line.rstrip('\n \t').expandtabs() + "\n"
+                      for line in self.raw]
+        self.lines.insert(0, None)
+        self.index = 1  # index into self.lines of next line
+
+        # List of (lineno, indentlevel) pairs, one for each stmt and
+        # comment line.  indentlevel is -1 for comment lines, as a
+        # signal that tokenize doesn't know what to do about them;
+        # indeed, they're our headache!
+        self.stats = []
+
+    def run(self):
+        tokenize.tokenize(self.getline, self.tokeneater)
+        # Remove trailing empty lines.
+        lines = self.lines
+        while lines and lines[-1] == "\n":
+            lines.pop()
+        # Sentinel.
+        stats = self.stats
+        stats.append((len(lines), 0))
+        # Map count of leading spaces to # we want.
+        have2want = {}
+        # Program after transformation.
+        after = self.after = []
+        # Copy over initial empty lines -- there's nothing to do until
+        # we see a line with *something* on it.
+        i = stats[0][0]
+        after.extend(lines[1:i])
+        for i in range(len(stats)-1):
+            thisstmt, thislevel = stats[i]
+            nextstmt = stats[i+1][0]
+            have = getlspace(lines[thisstmt])
+            want = thislevel * 4
+            if want < 0:
+                # A comment line.
+                if have:
+                    # An indented comment line.  If we saw the same
+                    # indentation before, reuse what it most recently
+                    # mapped to.
+                    want = have2want.get(have, -1)
+                    if want < 0:
+                        # Then it probably belongs to the next real stmt.
+                        for j in xrange(i+1, len(stats)-1):
+                            jline, jlevel = stats[j]
+                            if jlevel >= 0:
+                                if have == getlspace(lines[jline]):
+                                    want = jlevel * 4
+                                break
+                    if want < 0:           # Maybe it's a hanging
+                                           # comment like this one,
+                        # in which case we should shift it like its base
+                        # line got shifted.
+                        for j in xrange(i-1, -1, -1):
+                            jline, jlevel = stats[j]
+                            if jlevel >= 0:
+                                want = have + getlspace(after[jline-1]) - \
+                                       getlspace(lines[jline])
+                                break
+                    if want < 0:
+                        # Still no luck -- leave it alone.
+                        want = have
+                else:
+                    want = 0
+            assert want >= 0
+            have2want[have] = want
+            diff = want - have
+            if diff == 0 or have == 0:
+                after.extend(lines[thisstmt:nextstmt])
+            else:
+                for line in lines[thisstmt:nextstmt]:
+                    if diff > 0:
+                        if line == "\n":
+                            after.append(line)
+                        else:
+                            after.append(" " * diff + line)
+                    else:
+                        remove = min(getlspace(line), -diff)
+                        after.append(line[remove:])
+        return self.raw != self.after
+
+    def write(self, f):
+        f.writelines(self.after)
+
+    # Line-getter for tokenize.
+    def getline(self):
+        if self.index >= len(self.lines):
+            line = ""
+        else:
+            line = self.lines[self.index]
+            self.index += 1
+        return line
+
+    # Line-eater for tokenize.
+    def tokeneater(self, type, token, (sline, scol), end, line,
+                   INDENT=tokenize.INDENT,
+                   DEDENT=tokenize.DEDENT,
+                   NEWLINE=tokenize.NEWLINE,
+                   COMMENT=tokenize.COMMENT,
+                   NL=tokenize.NL):
+
+        if type == NEWLINE:
+            # A program statement, or ENDMARKER, will eventually follow,
+            # after some (possibly empty) run of tokens of the form
+            #     (NL | COMMENT)* (INDENT | DEDENT+)?
+            self.find_stmt = 1
+
+        elif type == INDENT:
+            self.find_stmt = 1
+            self.level += 1
+
+        elif type == DEDENT:
+            self.find_stmt = 1
+            self.level -= 1
+
+        elif type == COMMENT:
+            if self.find_stmt:
+                self.stats.append((sline, -1))
+                # but we're still looking for a new stmt, so leave
+                # find_stmt alone
+
+        elif type == NL:
+            pass
+
+        elif self.find_stmt:
+            # This is the first "real token" following a NEWLINE, so it
+            # must be the first token of the next program statement, or an
+            # ENDMARKER.
+            self.find_stmt = 0
+            if line:   # not endmarker
+                self.stats.append((sline, self.level))
+
+# Count number of leading blanks.
+def getlspace(line):
+    i, n = 0, len(line)
+    while i < n and line[i] == " ":
+        i += 1
+    return i
+
+if __name__ == '__main__':
+    main()
author	Georg Brandl <georg@python.org>	2007-07-23 09:02:25 +0000
committer	Georg Brandl <georg@python.org>	2007-07-23 09:02:25 +0000
commit	d60ca8d49d1654968886612883a2affa9dccfaa8 (patch)
tree	eccdde90eb4474e1fab81461048de618c4fc6b80 /utils
parent	6219bccc477caba671e7c4a3a92a6626b33d5006 (diff)
download	sphinx-git-d60ca8d49d1654968886612883a2affa9dccfaa8.tar.gz