diff options
| author | Georg Brandl <georg@python.org> | 2007-07-23 09:02:25 +0000 |
|---|---|---|
| committer | Georg Brandl <georg@python.org> | 2007-07-23 09:02:25 +0000 |
| commit | d60ca8d49d1654968886612883a2affa9dccfaa8 (patch) | |
| tree | eccdde90eb4474e1fab81461048de618c4fc6b80 /utils | |
| parent | 6219bccc477caba671e7c4a3a92a6626b33d5006 (diff) | |
| download | sphinx-git-d60ca8d49d1654968886612883a2affa9dccfaa8.tar.gz | |
Initial import of the doc tools.
Diffstat (limited to 'utils')
| -rwxr-xr-x | utils/check_sources.py | 241 | ||||
| -rw-r--r-- | utils/pylintrc | 301 | ||||
| -rwxr-xr-x | utils/reindent.py | 291 |
3 files changed, 833 insertions, 0 deletions
diff --git a/utils/check_sources.py b/utils/check_sources.py new file mode 100755 index 000000000..761f05ae0 --- /dev/null +++ b/utils/check_sources.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +""" + Checker for file headers + ~~~~~~~~~~~~~~~~~~~~~~~~ + + Make sure each Python file has a correct file header + including copyright and license information. + + :copyright: 2006-2007 by Georg Brandl. + :license: GNU GPL, see LICENSE for more details. +""" + +import sys, os, re +import getopt +import cStringIO +from os.path import join, splitext, abspath + + +checkers = {} + +def checker(*suffixes, **kwds): + only_pkg = kwds.pop('only_pkg', False) + def deco(func): + for suffix in suffixes: + checkers.setdefault(suffix, []).append(func) + func.only_pkg = only_pkg + return func + return deco + + +name_mail_re = r'[\w ]+(<.*?>)?' +copyright_re = re.compile(r'^ :copyright: 200\d(-200\d)? by %s(, %s)*[,.]$' % + (name_mail_re, name_mail_re)) +license_re = re.compile(r" :license: (.*?).\n") +copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' % + (name_mail_re, name_mail_re)) +coding_re = re.compile(r'coding[:=]\s*([-\w.]+)') +not_ix_re = re.compile(r'\bnot\s+\S+?\s+i[sn]\s\S+') +is_const_re = re.compile(r'if.*?==\s+(None|False|True)\b') + +misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING + "informations"] # ALLOW-MISSPELLING + + +@checker('.py') +def check_syntax(fn, lines): + try: + compile(''.join(lines), fn, "exec") + except SyntaxError, err: + yield 0, "not compilable: %s" % err + + +@checker('.py') +def check_style_and_encoding(fn, lines): + encoding = 'ascii' + for lno, line in enumerate(lines): + if len(line) > 90: + yield lno+1, "line too long" + m = not_ix_re.search(line) + if m: + yield lno+1, '"' + m.group() + '"' + if is_const_re.search(line): + yield lno+1, 'using == None/True/False' + if lno < 2: + co = coding_re.search(line) + if co: + encoding = co.group(1) + try: + line.decode(encoding) + except UnicodeDecodeError, err: + yield lno+1, "not decodable: %s\n Line: %r" % (err, line) + except LookupError, err: + yield 0, "unknown encoding: %s" % encoding + encoding = 'latin1' + + +@checker('.py', only_pkg=True) +def check_fileheader(fn, lines): + # line number correction + c = 1 + if lines[0:1] == ['#!/usr/bin/env python\n']: + lines = lines[1:] + c = 2 + + llist = [] + docopen = False + for lno, l in enumerate(lines): + llist.append(l) + if lno == 0: + if l == '# -*- coding: rot13 -*-\n': + # special-case pony package + return + elif l != '# -*- coding: utf-8 -*-\n': + yield 1, "missing coding declaration" + elif lno == 1: + if l != '"""\n' and l != 'r"""\n': + yield 2, 'missing docstring begin (""")' + else: + docopen = True + elif docopen: + if l == '"""\n': + # end of docstring + if lno <= 4: + yield lno+c, "missing module name in docstring" + break + + if l != "\n" and l[:4] != ' ' and docopen: + yield lno+c, "missing correct docstring indentation" + + if lno == 2: + # if not in package, don't check the module name + modname = fn[:-3].replace('/', '.').replace('.__init__', '') + while modname: + if l.lower()[4:-1] == modname: + break + modname = '.'.join(modname.split('.')[1:]) + else: + yield 3, "wrong module name in docstring heading" + modnamelen = len(l.strip()) + elif lno == 3: + if l.strip() != modnamelen * "~": + yield 4, "wrong module name underline, should be ~~~...~" + + else: + yield 0, "missing end and/or start of docstring..." + + # check for copyright and license fields + license = llist[-2:-1] + if not license or not license_re.match(license[0]): + yield 0, "no correct license info" + + ci = -3 + copyright = llist[ci:ci+1] + while copyright and copyright_2_re.match(copyright[0]): + ci -= 1 + copyright = llist[ci:ci+1] + if not copyright or not copyright_re.match(copyright[0]): + yield 0, "no correct copyright info" + + +@checker('.py', '.html', '.js') +def check_whitespace_and_spelling(fn, lines): + for lno, line in enumerate(lines): + if "\t" in line: + yield lno+1, "OMG TABS!!!1 " + if line[:-1].rstrip(' \t') != line[:-1]: + yield lno+1, "trailing whitespace" + for word in misspellings: + if word in line and 'ALLOW-MISSPELLING' not in line: + yield lno+1, '"%s" used' % word + + +bad_tags = ('<b>', '<i>', '<u>', '<s>', '<strike>' + '<center>', '<big>', '<small>', '<font') + +@checker('.html') +def check_xhtml(fn, lines): + for lno, line in enumerate(lines): + for bad_tag in bad_tags: + if bad_tag in line: + yield lno+1, "used " + bad_tag + + +def main(argv): + try: + gopts, args = getopt.getopt(argv[1:], "vi:") + except getopt.GetoptError: + print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0] + return 2 + opts = {} + for opt, val in gopts: + if opt == '-i': + val = abspath(val) + opts.setdefault(opt, []).append(val) + + if len(args) == 0: + path = '.' + elif len(args) == 1: + path = args[0] + else: + print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0] + return 2 + + verbose = '-v' in opts + + num = 0 + out = cStringIO.StringIO() + + # TODO: replace os.walk run with iteration over output of + # `svn list -R`. + + for root, dirs, files in os.walk(path): + if '.svn' in dirs: + dirs.remove('.svn') + if '-i' in opts and abspath(root) in opts['-i']: + del dirs[:] + continue + in_check_pkg = root.startswith('./sphinx') + for fn in files: + + fn = join(root, fn) + if fn[:2] == './': fn = fn[2:] + + if '-i' in opts and abspath(fn) in opts['-i']: + continue + + ext = splitext(fn)[1] + checkerlist = checkers.get(ext, None) + if not checkerlist: + continue + + if verbose: + print "Checking %s..." % fn + + try: + f = open(fn, 'r') + lines = list(f) + except (IOError, OSError), err: + print "%s: cannot open: %s" % (fn, err) + num += 1 + continue + + for checker in checkerlist: + if not in_check_pkg and checker.only_pkg: + continue + for lno, msg in checker(fn, lines): + print >>out, "%s:%d: %s" % (fn, lno, msg) + num += 1 + if verbose: + print + if num == 0: + print "No errors found." + else: + print out.getvalue().rstrip('\n') + print "%d error%s found." % (num, num > 1 and "s" or "") + return int(num > 0) + + +if __name__ == '__main__': + sys.exit(main(sys.argv)) diff --git a/utils/pylintrc b/utils/pylintrc new file mode 100644 index 000000000..aa04e12e5 --- /dev/null +++ b/utils/pylintrc @@ -0,0 +1,301 @@ +# lint Python modules using external checkers. +# +# This is the main checker controling the other ones and the reports +# generation. It is itself both a raw checker and an astng checker in order +# to: +# * handle message activation / deactivation at the module level +# * handle some basic but necessary stats'data (number of classes, methods...) +# +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Profiled execution. +profile=no + +# Add <file or directory> to the black list. It should be a base name, not a +# path. You may set this option multiple times. +ignore=.svn + +# Pickle collected data for later comparisons. +persistent=yes + +# Set the cache size for astng objects. +cache-size=500 + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + + +[MESSAGES CONTROL] + +# Enable only checker(s) with the given id(s). This option conflict with the +# disable-checker option +#enable-checker= + +# Enable all checker(s) except those with the given id(s). This option conflict +# with the disable-checker option +#disable-checker= + +# Enable all messages in the listed categories. +#enable-msg-cat= + +# Disable all messages in the listed categories. +#disable-msg-cat= + +# Enable the message(s) with the given id(s). +#enable-msg= + +# Disable the message(s) with the given id(s). +disable-msg=C0323,W0142,C0301,C0103,C0111,E0213,C0302,C0203,W0703,R0201 + + +[REPORTS] + +# set the output format. Available formats are text, parseable, colorized and +# html +output-format=colorized + +# Include message's id in output +include-ids=yes + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells wether to display a full report or only the messages +reports=yes + +# Python expression which should return a note less than 10 (10 is the highest +# note).You have access to the variables errors warning, statement which +# respectivly contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (R0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Add a comment according to your evaluation note. This is used by the global +# evaluation report (R0004). +comment=no + +# Enable the report(s) with the given id(s). +#enable-report= + +# Disable the report(s) with the given id(s). +#disable-report= + + +# checks for +# * unused variables / imports +# * undefined variables +# * redefinition of variable from builtins or from an outer scope +# * use of variable before assigment +# +[VARIABLES] + +# Tells wether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching names used for dummy variables (i.e. not used). +dummy-variables-rgx=_|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + + +# try to find bugs in the code using type inference +# +[TYPECHECK] + +# Tells wether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# When zope mode is activated, consider the acquired-members option to ignore +# access to some undefined attributes. +zope=no + +# List of members which are usually get through zope's acquisition mecanism and +# so shouldn't trigger E0201 when accessed (need zope=yes to be considered). +acquired-members=REQUEST,acl_users,aq_parent + + +# checks for : +# * doc strings +# * modules / classes / functions / methods / arguments / variables name +# * number of arguments, local variables, branchs, returns and statements in +# functions, methods +# * required module attributes +# * dangerous default values as arguments +# * redefinition of function / method / class +# * uses of the global statement +# +[BASIC] + +# Required attributes for module, separated by a comma +required-attributes= + +# Regular expression which should only match functions or classes name which do +# not require a docstring +no-docstring-rgx=__.*__ + +# Regular expression which should only match correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression which should only match correct module level names +const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$ + +# Regular expression which should only match correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression which should only match correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct instance attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct list comprehension / +# generator expression variable names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# List of builtins function names that should not be used, separated by a comma +bad-functions=apply,input + + +# checks for sign of poor/misdesign: +# * number of methods, attributes, local variables... +# * size, complexity of functions, methods +# +[DESIGN] + +# Maximum number of arguments for function / method +max-args=12 + +# Maximum number of locals for function / method body +max-locals=30 + +# Maximum number of return / yield for function / method body +max-returns=12 + +# Maximum number of branch for function / method body +max-branchs=30 + +# Maximum number of statements in function / method body +max-statements=60 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=20 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=0 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + + +# checks for +# * external modules dependencies +# * relative / wildcard imports +# * cyclic imports +# * uses of deprecated modules +# +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,string,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report R0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report R0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report R0402 must +# not be disabled) +int-import-graph= + + +# checks for : +# * methods without self as first argument +# * overridden methods signature +# * access only to existant members via self +# * attributes not defined in the __init__ method +# * supported interfaces implementation +# * unreachable code +# +[CLASSES] + +# List of interface methods to ignore, separated by a comma. This is used for +# instance to not check methods defines in Zope's Interface base class. +ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + + +# checks for similarities and duplicated code. This computation may be +# memory / CPU intensive, so you should disable it if you experiments some +# problems. +# +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=10 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + + +# checks for: +# * warning notes in the code like FIXME, XXX +# * PEP 263: source code with non ascii character but no encoding declaration +# +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +# checks for : +# * unauthorized constructions +# * strict indentation +# * line length +# * use of <> instead of != +# +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=90 + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' diff --git a/utils/reindent.py b/utils/reindent.py new file mode 100755 index 000000000..e6ee82872 --- /dev/null +++ b/utils/reindent.py @@ -0,0 +1,291 @@ +#! /usr/bin/env python + +# Released to the public domain, by Tim Peters, 03 October 2000. +# -B option added by Georg Brandl, 2006. + +"""reindent [-d][-r][-v] [ path ... ] + +-d (--dryrun) Dry run. Analyze, but don't make any changes to files. +-r (--recurse) Recurse. Search for all .py files in subdirectories too. +-B (--no-backup) Don't write .bak backup files. +-v (--verbose) Verbose. Print informative msgs; else only names of changed files. +-h (--help) Help. Print this usage information and exit. + +Change Python (.py) files to use 4-space indents and no hard tab characters. +Also trim excess spaces and tabs from ends of lines, and remove empty lines +at the end of files. Also ensure the last line ends with a newline. + +If no paths are given on the command line, reindent operates as a filter, +reading a single source file from standard input and writing the transformed +source to standard output. In this case, the -d, -r and -v flags are +ignored. + +You can pass one or more file and/or directory paths. When a directory +path, all .py files within the directory will be examined, and, if the -r +option is given, likewise recursively for subdirectories. + +If output is not to standard output, reindent overwrites files in place, +renaming the originals with a .bak extension. If it finds nothing to +change, the file is left alone. If reindent does change a file, the changed +file is a fixed-point for future runs (i.e., running reindent on the +resulting .py file won't change it again). + +The hard part of reindenting is figuring out what to do with comment +lines. So long as the input files get a clean bill of health from +tabnanny.py, reindent should do a good job. +""" + +__version__ = "1" + +import tokenize +import os +import sys + +verbose = 0 +recurse = 0 +dryrun = 0 +no_backup = 0 + +def usage(msg=None): + if msg is not None: + print >> sys.stderr, msg + print >> sys.stderr, __doc__ + +def errprint(*args): + sep = "" + for arg in args: + sys.stderr.write(sep + str(arg)) + sep = " " + sys.stderr.write("\n") + +def main(): + import getopt + global verbose, recurse, dryrun, no_backup + + try: + opts, args = getopt.getopt(sys.argv[1:], "drvhB", + ["dryrun", "recurse", "verbose", "help", + "no-backup"]) + except getopt.error, msg: + usage(msg) + return + for o, a in opts: + if o in ('-d', '--dryrun'): + dryrun += 1 + elif o in ('-r', '--recurse'): + recurse += 1 + elif o in ('-v', '--verbose'): + verbose += 1 + elif o in ('-B', '--no-backup'): + no_backup += 1 + elif o in ('-h', '--help'): + usage() + return + if not args: + r = Reindenter(sys.stdin) + r.run() + r.write(sys.stdout) + return + for arg in args: + check(arg) + +def check(file): + if os.path.isdir(file) and not os.path.islink(file): + if verbose: + print "listing directory", file + names = os.listdir(file) + for name in names: + fullname = os.path.join(file, name) + if ((recurse and os.path.isdir(fullname) and + not os.path.islink(fullname)) + or name.lower().endswith(".py")): + check(fullname) + return + + if verbose: + print "checking", file, "...", + try: + f = open(file) + except IOError, msg: + errprint("%s: I/O Error: %s" % (file, str(msg))) + return + + r = Reindenter(f) + f.close() + if r.run(): + if verbose: + print "changed." + if dryrun: + print "But this is a dry run, so leaving it alone." + else: + print "reindented", file, (dryrun and "(dry run => not really)" or "") + if not dryrun: + if not no_backup: + bak = file + ".bak" + if os.path.exists(bak): + os.remove(bak) + os.rename(file, bak) + if verbose: + print "renamed", file, "to", bak + f = open(file, "w") + r.write(f) + f.close() + if verbose: + print "wrote new", file + else: + if verbose: + print "unchanged." + + +class Reindenter: + + def __init__(self, f): + self.find_stmt = 1 # next token begins a fresh stmt? + self.level = 0 # current indent level + + # Raw file lines. + self.raw = f.readlines() + + # File lines, rstripped & tab-expanded. Dummy at start is so + # that we can use tokenize's 1-based line numbering easily. + # Note that a line is all-blank iff it's "\n". + self.lines = [line.rstrip('\n \t').expandtabs() + "\n" + for line in self.raw] + self.lines.insert(0, None) + self.index = 1 # index into self.lines of next line + + # List of (lineno, indentlevel) pairs, one for each stmt and + # comment line. indentlevel is -1 for comment lines, as a + # signal that tokenize doesn't know what to do about them; + # indeed, they're our headache! + self.stats = [] + + def run(self): + tokenize.tokenize(self.getline, self.tokeneater) + # Remove trailing empty lines. + lines = self.lines + while lines and lines[-1] == "\n": + lines.pop() + # Sentinel. + stats = self.stats + stats.append((len(lines), 0)) + # Map count of leading spaces to # we want. + have2want = {} + # Program after transformation. + after = self.after = [] + # Copy over initial empty lines -- there's nothing to do until + # we see a line with *something* on it. + i = stats[0][0] + after.extend(lines[1:i]) + for i in range(len(stats)-1): + thisstmt, thislevel = stats[i] + nextstmt = stats[i+1][0] + have = getlspace(lines[thisstmt]) + want = thislevel * 4 + if want < 0: + # A comment line. + if have: + # An indented comment line. If we saw the same + # indentation before, reuse what it most recently + # mapped to. + want = have2want.get(have, -1) + if want < 0: + # Then it probably belongs to the next real stmt. + for j in xrange(i+1, len(stats)-1): + jline, jlevel = stats[j] + if jlevel >= 0: + if have == getlspace(lines[jline]): + want = jlevel * 4 + break + if want < 0: # Maybe it's a hanging + # comment like this one, + # in which case we should shift it like its base + # line got shifted. + for j in xrange(i-1, -1, -1): + jline, jlevel = stats[j] + if jlevel >= 0: + want = have + getlspace(after[jline-1]) - \ + getlspace(lines[jline]) + break + if want < 0: + # Still no luck -- leave it alone. + want = have + else: + want = 0 + assert want >= 0 + have2want[have] = want + diff = want - have + if diff == 0 or have == 0: + after.extend(lines[thisstmt:nextstmt]) + else: + for line in lines[thisstmt:nextstmt]: + if diff > 0: + if line == "\n": + after.append(line) + else: + after.append(" " * diff + line) + else: + remove = min(getlspace(line), -diff) + after.append(line[remove:]) + return self.raw != self.after + + def write(self, f): + f.writelines(self.after) + + # Line-getter for tokenize. + def getline(self): + if self.index >= len(self.lines): + line = "" + else: + line = self.lines[self.index] + self.index += 1 + return line + + # Line-eater for tokenize. + def tokeneater(self, type, token, (sline, scol), end, line, + INDENT=tokenize.INDENT, + DEDENT=tokenize.DEDENT, + NEWLINE=tokenize.NEWLINE, + COMMENT=tokenize.COMMENT, + NL=tokenize.NL): + + if type == NEWLINE: + # A program statement, or ENDMARKER, will eventually follow, + # after some (possibly empty) run of tokens of the form + # (NL | COMMENT)* (INDENT | DEDENT+)? + self.find_stmt = 1 + + elif type == INDENT: + self.find_stmt = 1 + self.level += 1 + + elif type == DEDENT: + self.find_stmt = 1 + self.level -= 1 + + elif type == COMMENT: + if self.find_stmt: + self.stats.append((sline, -1)) + # but we're still looking for a new stmt, so leave + # find_stmt alone + + elif type == NL: + pass + + elif self.find_stmt: + # This is the first "real token" following a NEWLINE, so it + # must be the first token of the next program statement, or an + # ENDMARKER. + self.find_stmt = 0 + if line: # not endmarker + self.stats.append((sline, self.level)) + +# Count number of leading blanks. +def getlspace(line): + i, n = 0, len(line) + while i < n and line[i] == " ": + i += 1 + return i + +if __name__ == '__main__': + main() |
