diff options
author | Georg Brandl <georg@python.org> | 2014-03-04 14:17:10 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2014-03-04 14:17:10 +0100 |
commit | 18348a61d7e90b03a624fdc78fafdcb46b92307d (patch) | |
tree | e4fe1541ad9e2ada2de394eb2e020e2a0916ce94 /scripts | |
parent | cd9c0b70635f2a6c65ea97d042537478a0a95b7a (diff) | |
parent | 27895fe85076d2f1b44e7d30387b3f459fc60281 (diff) | |
download | pygments-18348a61d7e90b03a624fdc78fafdcb46b92307d.tar.gz |
merge with raichoo/pygments-main (pull request #210)
Diffstat (limited to 'scripts')
-rwxr-xr-x | scripts/check_sources.py | 49 | ||||
-rw-r--r-- | scripts/detect_missing_analyse_text.py | 9 | ||||
-rwxr-xr-x | scripts/find_codetags.py | 46 | ||||
-rwxr-xr-x | scripts/find_error.py | 29 | ||||
-rw-r--r-- | scripts/get_vimkw.py | 6 | ||||
-rwxr-xr-x | scripts/reindent.py | 291 | ||||
-rwxr-xr-x[-rw-r--r--] | scripts/vim2pygments.py | 16 |
7 files changed, 88 insertions, 358 deletions
diff --git a/scripts/check_sources.py b/scripts/check_sources.py index d9e5c2ae..71aff299 100755 --- a/scripts/check_sources.py +++ b/scripts/check_sources.py @@ -7,13 +7,17 @@ Make sure each Python file has a correct file header including copyright and license information. - :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -import sys, os, re +from __future__ import print_function + +import io +import os +import re +import sys import getopt -import cStringIO from os.path import join, splitext, abspath @@ -30,7 +34,7 @@ def checker(*suffixes, **kwds): name_mail_re = r'[\w ]+(<.*?>)?' -copyright_re = re.compile(r'^ :copyright: Copyright 2006-2013 by ' +copyright_re = re.compile(r'^ :copyright: Copyright 2006-2014 by ' r'the Pygments team, see AUTHORS\.$', re.UNICODE) copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' % (name_mail_re, name_mail_re), re.UNICODE) @@ -46,7 +50,7 @@ misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING def check_syntax(fn, lines): try: compile(''.join(lines), fn, "exec") - except SyntaxError, err: + except SyntaxError as err: yield 0, "not compilable: %s" % err @@ -67,9 +71,12 @@ def check_style_and_encoding(fn, lines): encoding = co.group(1) try: line.decode(encoding) - except UnicodeDecodeError, err: + except AttributeError: + # Python 3 - encoding was already checked + pass + except UnicodeDecodeError as err: yield lno+1, "not decodable: %s\n Line: %r" % (err, line) - except LookupError, err: + except LookupError as err: yield 0, "unknown encoding: %s" % encoding encoding = 'latin1' @@ -130,7 +137,7 @@ def check_fileheader(fn, lines): yield 0, "no correct license info" ci = -3 - copyright = [s.decode('utf-8') for s in llist[ci:ci+1]] + copyright = llist[ci:ci+1] while copyright and copyright_2_re.match(copyright[0]): ci -= 1 copyright = llist[ci:ci+1] @@ -165,7 +172,7 @@ def main(argv): try: gopts, args = getopt.getopt(argv[1:], "vi:") except getopt.GetoptError: - print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0] + print("Usage: %s [-v] [-i ignorepath]* [path]" % argv[0]) return 2 opts = {} for opt, val in gopts: @@ -178,20 +185,20 @@ def main(argv): elif len(args) == 1: path = args[0] else: - print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0] + print("Usage: %s [-v] [-i ignorepath]* [path]" % argv[0]) return 2 verbose = '-v' in opts num = 0 - out = cStringIO.StringIO() + out = io.StringIO() # TODO: replace os.walk run with iteration over output of # `svn list -R`. for root, dirs, files in os.walk(path): - if '.svn' in dirs: - dirs.remove('.svn') + if '.hg' in dirs: + dirs.remove('.hg') if '-i' in opts and abspath(root) in opts['-i']: del dirs[:] continue @@ -212,13 +219,13 @@ def main(argv): continue if verbose: - print "Checking %s..." % fn + print("Checking %s..." % fn) try: f = open(fn, 'r') lines = list(f) - except (IOError, OSError), err: - print "%s: cannot open: %s" % (fn, err) + except (IOError, OSError) as err: + print("%s: cannot open: %s" % (fn, err)) num += 1 continue @@ -226,15 +233,15 @@ def main(argv): if not in_pocoo_pkg and checker.only_pkg: continue for lno, msg in checker(fn, lines): - print >>out, "%s:%d: %s" % (fn, lno, msg) + print(u"%s:%d: %s" % (fn, lno, msg), file=out) num += 1 if verbose: - print + print() if num == 0: - print "No errors found." + print("No errors found.") else: - print out.getvalue().rstrip('\n') - print "%d error%s found." % (num, num > 1 and "s" or "") + print(out.getvalue().rstrip('\n')) + print("%d error%s found." % (num, num > 1 and "s" or "")) return int(num > 0) diff --git a/scripts/detect_missing_analyse_text.py b/scripts/detect_missing_analyse_text.py index 1312648f..ab58558e 100644 --- a/scripts/detect_missing_analyse_text.py +++ b/scripts/detect_missing_analyse_text.py @@ -1,3 +1,4 @@ +from __future__ import print_function import sys from pygments.lexers import get_all_lexers, find_lexer_class @@ -9,22 +10,22 @@ def main(): for name, aliases, filenames, mimetypes in get_all_lexers(): cls = find_lexer_class(name) if not cls.aliases: - print cls, "has no aliases" + print(cls, "has no aliases") for f in filenames: if f not in uses: uses[f] = [] uses[f].append(cls) ret = 0 - for k, v in uses.iteritems(): + for k, v in uses.items(): if len(v) > 1: #print "Multiple for", k, v for i in v: if i.analyse_text is None: - print i, "has a None analyse_text" + print(i, "has a None analyse_text") ret |= 1 elif Lexer.analyse_text.__doc__ == i.analyse_text.__doc__: - print i, "needs analyse_text, multiple lexers for", k + print(i, "needs analyse_text, multiple lexers for", k) ret |= 2 return ret diff --git a/scripts/find_codetags.py b/scripts/find_codetags.py index 2fb18333..f8204e6e 100755 --- a/scripts/find_codetags.py +++ b/scripts/find_codetags.py @@ -7,11 +7,15 @@ Find code tags in specified files and/or directories and create a report in HTML format. - :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -import sys, os, re +from __future__ import print_function + +import os +import re +import sys import getopt from os.path import join, abspath, isdir, isfile @@ -73,8 +77,8 @@ def main(): try: gopts, args = getopt.getopt(sys.argv[1:], "vo:i:") except getopt.GetoptError: - print ("Usage: %s [-v] [-i ignoredir]* [-o reportfile.html] " - "path ..." % sys.argv[0]) + print(("Usage: %s [-v] [-i ignoredir]* [-o reportfile.html] " + "path ..." % sys.argv[0])) return 2 opts = {} for opt, val in gopts: @@ -97,18 +101,18 @@ def main(): num = 0 for path in args: - print "Searching for code tags in %s, please wait." % path + print("Searching for code tags in %s, please wait." % path) if isfile(path): gnum += 1 if process_file(store, path): if verbose: - print path + ": found %d tags" % \ - (path in store and len(store[path]) or 0) + print(path + ": found %d tags" % \ + (path in store and len(store[path]) or 0)) num += 1 else: if verbose: - print path + ": binary or not readable" + print(path + ": binary or not readable") continue elif not isdir(path): continue @@ -117,11 +121,15 @@ def main(): if '-i' in opts and abspath(root) in opts['-i']: del dirs[:] continue - if '.svn' in dirs: - dirs.remove('.svn') + if '.hg' in dirs: + dirs.remove('.hg') + if 'examplefiles' in dirs: + dirs.remove('examplefiles') + if 'dist' in dirs: + dirs.remove('dist') for fn in files: gnum += 1 - if gnum % 50 == 0 and not verbose: + if gnum % 25 == 0 and not verbose: sys.stdout.write('.') sys.stdout.flush() @@ -137,16 +145,16 @@ def main(): if fn[:2] == './': fn = fn[2:] if process_file(store, fn): if verbose: - print fn + ": found %d tags" % \ - (fn in store and len(store[fn]) or 0) + print(fn + ": found %d tags" % \ + (fn in store and len(store[fn]) or 0)) num += 1 else: if verbose: - print fn + ": binary or not readable" - print + print(fn + ": binary or not readable") + print() - print "Processed %d of %d files. Found %d tags in %d files." % ( - num, gnum, sum(len(fitem) for fitem in store.itervalues()), len(store)) + print("Processed %d of %d files. Found %d tags in %d files." % ( + num, gnum, sum(len(fitem) for fitem in store.values()), len(store))) if not store: return 0 @@ -190,7 +198,7 @@ td { padding: 2px 5px 2px 5px; '<td class="tag %%(tag)s">%%(tag)s</td>' '<td class="who">%%(who)s</td><td class="what">%%(what)s</td></tr>') - f = file(output, 'w') + f = open(output, 'w') table = '\n'.join(TABLE % fname + '\n'.join(TR % (no % 2,) % entry for no, entry in enumerate(store[fname])) @@ -198,7 +206,7 @@ td { padding: 2px 5px 2px 5px; f.write(HTML % (', '.join(map(abspath, args)), table)) f.close() - print "Report written to %s." % output + print("Report written to %s." % output) return 0 if __name__ == '__main__': diff --git a/scripts/find_error.py b/scripts/find_error.py index 00923569..7aaa9bee 100755 --- a/scripts/find_error.py +++ b/scripts/find_error.py @@ -8,11 +8,14 @@ the text where Error tokens are being generated, along with some context. - :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ -import sys, os +from __future__ import print_function + +import os +import sys # always prefer Pygments from source if exists srcpath = os.path.join(os.path.dirname(__file__), '..') @@ -104,36 +107,36 @@ def main(fn, lexer=None, options={}): # already debugged before debug_lexer = True lno = 1 - text = file(fn, 'U').read() + text = open(fn, 'U').read() text = text.strip('\n') + '\n' tokens = [] states = [] def show_token(tok, state): reprs = map(repr, tok) - print ' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], + print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ') if debug_lexer: - print ' ' + ' ' * (29-len(reprs[0])) + repr(state), - print + print(' ' + ' ' * (29-len(reprs[0])) + repr(state), end=' ') + print() for type, val in lx.get_tokens(text): lno += val.count('\n') if type == Error: - print 'Error parsing', fn, 'on line', lno - print 'Previous tokens' + (debug_lexer and ' and states' or '') + ':' + print('Error parsing', fn, 'on line', lno) + print('Previous tokens' + (debug_lexer and ' and states' or '') + ':') if showall: for tok, state in map(None, tokens, states): show_token(tok, state) else: for i in range(max(len(tokens) - num, 0), len(tokens)): show_token(tokens[i], states[i]) - print 'Error token:' + print('Error token:') l = len(repr(val)) - print ' ' + repr(val), + print(' ' + repr(val), end=' ') if debug_lexer and hasattr(lx, 'statestack'): - print ' ' * (60-l) + repr(lx.statestack), - print - print + print(' ' * (60-l) + repr(lx.statestack), end=' ') + print() + print() return 1 tokens.append((type, val)) if debug_lexer: diff --git a/scripts/get_vimkw.py b/scripts/get_vimkw.py index 153c88c3..4ea302f4 100644 --- a/scripts/get_vimkw.py +++ b/scripts/get_vimkw.py @@ -1,5 +1,5 @@ +from __future__ import print_function import re -from pprint import pprint r_line = re.compile(r"^(syn keyword vimCommand contained|syn keyword vimOption " r"contained|syn keyword vimAutoEvent contained)\s+(.*)") @@ -31,12 +31,12 @@ def getkw(input, output): for a, b in output_info.items(): b.sort() - print >>out, '%s=[%s]' % (a, ','.join(b)) + print('%s=[%s]' % (a, ','.join(b)), file=out) def is_keyword(w, keywords): for i in range(len(w), 0, -1): if w[:i] in keywords: - return signals[w[:i]][:len(w)] == w + return keywords[w[:i]][:len(w)] == w return False if __name__ == "__main__": diff --git a/scripts/reindent.py b/scripts/reindent.py deleted file mode 100755 index e6ee8287..00000000 --- a/scripts/reindent.py +++ /dev/null @@ -1,291 +0,0 @@ -#! /usr/bin/env python - -# Released to the public domain, by Tim Peters, 03 October 2000. -# -B option added by Georg Brandl, 2006. - -"""reindent [-d][-r][-v] [ path ... ] - --d (--dryrun) Dry run. Analyze, but don't make any changes to files. --r (--recurse) Recurse. Search for all .py files in subdirectories too. --B (--no-backup) Don't write .bak backup files. --v (--verbose) Verbose. Print informative msgs; else only names of changed files. --h (--help) Help. Print this usage information and exit. - -Change Python (.py) files to use 4-space indents and no hard tab characters. -Also trim excess spaces and tabs from ends of lines, and remove empty lines -at the end of files. Also ensure the last line ends with a newline. - -If no paths are given on the command line, reindent operates as a filter, -reading a single source file from standard input and writing the transformed -source to standard output. In this case, the -d, -r and -v flags are -ignored. - -You can pass one or more file and/or directory paths. When a directory -path, all .py files within the directory will be examined, and, if the -r -option is given, likewise recursively for subdirectories. - -If output is not to standard output, reindent overwrites files in place, -renaming the originals with a .bak extension. If it finds nothing to -change, the file is left alone. If reindent does change a file, the changed -file is a fixed-point for future runs (i.e., running reindent on the -resulting .py file won't change it again). - -The hard part of reindenting is figuring out what to do with comment -lines. So long as the input files get a clean bill of health from -tabnanny.py, reindent should do a good job. -""" - -__version__ = "1" - -import tokenize -import os -import sys - -verbose = 0 -recurse = 0 -dryrun = 0 -no_backup = 0 - -def usage(msg=None): - if msg is not None: - print >> sys.stderr, msg - print >> sys.stderr, __doc__ - -def errprint(*args): - sep = "" - for arg in args: - sys.stderr.write(sep + str(arg)) - sep = " " - sys.stderr.write("\n") - -def main(): - import getopt - global verbose, recurse, dryrun, no_backup - - try: - opts, args = getopt.getopt(sys.argv[1:], "drvhB", - ["dryrun", "recurse", "verbose", "help", - "no-backup"]) - except getopt.error, msg: - usage(msg) - return - for o, a in opts: - if o in ('-d', '--dryrun'): - dryrun += 1 - elif o in ('-r', '--recurse'): - recurse += 1 - elif o in ('-v', '--verbose'): - verbose += 1 - elif o in ('-B', '--no-backup'): - no_backup += 1 - elif o in ('-h', '--help'): - usage() - return - if not args: - r = Reindenter(sys.stdin) - r.run() - r.write(sys.stdout) - return - for arg in args: - check(arg) - -def check(file): - if os.path.isdir(file) and not os.path.islink(file): - if verbose: - print "listing directory", file - names = os.listdir(file) - for name in names: - fullname = os.path.join(file, name) - if ((recurse and os.path.isdir(fullname) and - not os.path.islink(fullname)) - or name.lower().endswith(".py")): - check(fullname) - return - - if verbose: - print "checking", file, "...", - try: - f = open(file) - except IOError, msg: - errprint("%s: I/O Error: %s" % (file, str(msg))) - return - - r = Reindenter(f) - f.close() - if r.run(): - if verbose: - print "changed." - if dryrun: - print "But this is a dry run, so leaving it alone." - else: - print "reindented", file, (dryrun and "(dry run => not really)" or "") - if not dryrun: - if not no_backup: - bak = file + ".bak" - if os.path.exists(bak): - os.remove(bak) - os.rename(file, bak) - if verbose: - print "renamed", file, "to", bak - f = open(file, "w") - r.write(f) - f.close() - if verbose: - print "wrote new", file - else: - if verbose: - print "unchanged." - - -class Reindenter: - - def __init__(self, f): - self.find_stmt = 1 # next token begins a fresh stmt? - self.level = 0 # current indent level - - # Raw file lines. - self.raw = f.readlines() - - # File lines, rstripped & tab-expanded. Dummy at start is so - # that we can use tokenize's 1-based line numbering easily. - # Note that a line is all-blank iff it's "\n". - self.lines = [line.rstrip('\n \t').expandtabs() + "\n" - for line in self.raw] - self.lines.insert(0, None) - self.index = 1 # index into self.lines of next line - - # List of (lineno, indentlevel) pairs, one for each stmt and - # comment line. indentlevel is -1 for comment lines, as a - # signal that tokenize doesn't know what to do about them; - # indeed, they're our headache! - self.stats = [] - - def run(self): - tokenize.tokenize(self.getline, self.tokeneater) - # Remove trailing empty lines. - lines = self.lines - while lines and lines[-1] == "\n": - lines.pop() - # Sentinel. - stats = self.stats - stats.append((len(lines), 0)) - # Map count of leading spaces to # we want. - have2want = {} - # Program after transformation. - after = self.after = [] - # Copy over initial empty lines -- there's nothing to do until - # we see a line with *something* on it. - i = stats[0][0] - after.extend(lines[1:i]) - for i in range(len(stats)-1): - thisstmt, thislevel = stats[i] - nextstmt = stats[i+1][0] - have = getlspace(lines[thisstmt]) - want = thislevel * 4 - if want < 0: - # A comment line. - if have: - # An indented comment line. If we saw the same - # indentation before, reuse what it most recently - # mapped to. - want = have2want.get(have, -1) - if want < 0: - # Then it probably belongs to the next real stmt. - for j in xrange(i+1, len(stats)-1): - jline, jlevel = stats[j] - if jlevel >= 0: - if have == getlspace(lines[jline]): - want = jlevel * 4 - break - if want < 0: # Maybe it's a hanging - # comment like this one, - # in which case we should shift it like its base - # line got shifted. - for j in xrange(i-1, -1, -1): - jline, jlevel = stats[j] - if jlevel >= 0: - want = have + getlspace(after[jline-1]) - \ - getlspace(lines[jline]) - break - if want < 0: - # Still no luck -- leave it alone. - want = have - else: - want = 0 - assert want >= 0 - have2want[have] = want - diff = want - have - if diff == 0 or have == 0: - after.extend(lines[thisstmt:nextstmt]) - else: - for line in lines[thisstmt:nextstmt]: - if diff > 0: - if line == "\n": - after.append(line) - else: - after.append(" " * diff + line) - else: - remove = min(getlspace(line), -diff) - after.append(line[remove:]) - return self.raw != self.after - - def write(self, f): - f.writelines(self.after) - - # Line-getter for tokenize. - def getline(self): - if self.index >= len(self.lines): - line = "" - else: - line = self.lines[self.index] - self.index += 1 - return line - - # Line-eater for tokenize. - def tokeneater(self, type, token, (sline, scol), end, line, - INDENT=tokenize.INDENT, - DEDENT=tokenize.DEDENT, - NEWLINE=tokenize.NEWLINE, - COMMENT=tokenize.COMMENT, - NL=tokenize.NL): - - if type == NEWLINE: - # A program statement, or ENDMARKER, will eventually follow, - # after some (possibly empty) run of tokens of the form - # (NL | COMMENT)* (INDENT | DEDENT+)? - self.find_stmt = 1 - - elif type == INDENT: - self.find_stmt = 1 - self.level += 1 - - elif type == DEDENT: - self.find_stmt = 1 - self.level -= 1 - - elif type == COMMENT: - if self.find_stmt: - self.stats.append((sline, -1)) - # but we're still looking for a new stmt, so leave - # find_stmt alone - - elif type == NL: - pass - - elif self.find_stmt: - # This is the first "real token" following a NEWLINE, so it - # must be the first token of the next program statement, or an - # ENDMARKER. - self.find_stmt = 0 - if line: # not endmarker - self.stats.append((sline, self.level)) - -# Count number of leading blanks. -def getlspace(line): - i, n = 0, len(line) - while i < n and line[i] == " ": - i += 1 - return i - -if __name__ == '__main__': - main() diff --git a/scripts/vim2pygments.py b/scripts/vim2pygments.py index 80f0ada2..42af0bbe 100644..100755 --- a/scripts/vim2pygments.py +++ b/scripts/vim2pygments.py @@ -11,10 +11,12 @@ :license: BSD, see LICENSE for details. """ +from __future__ import print_function + import sys import re from os import path -from cStringIO import StringIO +from io import StringIO split_re = re.compile(r'(?<!\\)\s+') @@ -765,7 +767,7 @@ TOKENS = { } TOKEN_TYPES = set() -for token in TOKENS.itervalues(): +for token in TOKENS.values(): if not isinstance(token, tuple): token = (token,) for token in token: @@ -836,7 +838,7 @@ def find_colors(code): colors['Normal']['bgcolor'] = bg_color color_map = {} - for token, styles in colors.iteritems(): + for token, styles in colors.items(): if token in TOKENS: tmp = [] if styles.get('noinherit'): @@ -879,7 +881,7 @@ class StyleWriter(object): def write(self, out): self.write_header(out) default_token, tokens = find_colors(self.code) - tokens = tokens.items() + tokens = list(tokens.items()) tokens.sort(lambda a, b: cmp(len(a[0]), len(a[1]))) bg_color = [x[3:] for x in default_token.split() if x.startswith('bg:')] if bg_color: @@ -916,14 +918,14 @@ def convert(filename, stream=None): def main(): if len(sys.argv) != 2 or sys.argv[1] in ('-h', '--help'): - print 'Usage: %s <filename.vim>' % sys.argv[0] + print('Usage: %s <filename.vim>' % sys.argv[0]) return 2 if sys.argv[1] in ('-v', '--version'): - print '%s %s' % (SCRIPT_NAME, SCRIPT_VERSION) + print('%s %s' % (SCRIPT_NAME, SCRIPT_VERSION)) return filename = sys.argv[1] if not (path.exists(filename) and path.isfile(filename)): - print 'Error: %s not found' % filename + print('Error: %s not found' % filename) return 1 convert(filename, sys.stdout) sys.stdout.write('\n') |