summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2014-03-04 14:17:10 +0100
committerGeorg Brandl <georg@python.org>2014-03-04 14:17:10 +0100
commit18348a61d7e90b03a624fdc78fafdcb46b92307d (patch)
treee4fe1541ad9e2ada2de394eb2e020e2a0916ce94 /scripts
parentcd9c0b70635f2a6c65ea97d042537478a0a95b7a (diff)
parent27895fe85076d2f1b44e7d30387b3f459fc60281 (diff)
downloadpygments-18348a61d7e90b03a624fdc78fafdcb46b92307d.tar.gz
merge with raichoo/pygments-main (pull request #210)
Diffstat (limited to 'scripts')
-rwxr-xr-xscripts/check_sources.py49
-rw-r--r--scripts/detect_missing_analyse_text.py9
-rwxr-xr-xscripts/find_codetags.py46
-rwxr-xr-xscripts/find_error.py29
-rw-r--r--scripts/get_vimkw.py6
-rwxr-xr-xscripts/reindent.py291
-rwxr-xr-x[-rw-r--r--]scripts/vim2pygments.py16
7 files changed, 88 insertions, 358 deletions
diff --git a/scripts/check_sources.py b/scripts/check_sources.py
index d9e5c2ae..71aff299 100755
--- a/scripts/check_sources.py
+++ b/scripts/check_sources.py
@@ -7,13 +7,17 @@
Make sure each Python file has a correct file header
including copyright and license information.
- :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
-import sys, os, re
+from __future__ import print_function
+
+import io
+import os
+import re
+import sys
import getopt
-import cStringIO
from os.path import join, splitext, abspath
@@ -30,7 +34,7 @@ def checker(*suffixes, **kwds):
name_mail_re = r'[\w ]+(<.*?>)?'
-copyright_re = re.compile(r'^ :copyright: Copyright 2006-2013 by '
+copyright_re = re.compile(r'^ :copyright: Copyright 2006-2014 by '
r'the Pygments team, see AUTHORS\.$', re.UNICODE)
copyright_2_re = re.compile(r'^ %s(, %s)*[,.]$' %
(name_mail_re, name_mail_re), re.UNICODE)
@@ -46,7 +50,7 @@ misspellings = ["developement", "adress", "verificate", # ALLOW-MISSPELLING
def check_syntax(fn, lines):
try:
compile(''.join(lines), fn, "exec")
- except SyntaxError, err:
+ except SyntaxError as err:
yield 0, "not compilable: %s" % err
@@ -67,9 +71,12 @@ def check_style_and_encoding(fn, lines):
encoding = co.group(1)
try:
line.decode(encoding)
- except UnicodeDecodeError, err:
+ except AttributeError:
+ # Python 3 - encoding was already checked
+ pass
+ except UnicodeDecodeError as err:
yield lno+1, "not decodable: %s\n Line: %r" % (err, line)
- except LookupError, err:
+ except LookupError as err:
yield 0, "unknown encoding: %s" % encoding
encoding = 'latin1'
@@ -130,7 +137,7 @@ def check_fileheader(fn, lines):
yield 0, "no correct license info"
ci = -3
- copyright = [s.decode('utf-8') for s in llist[ci:ci+1]]
+ copyright = llist[ci:ci+1]
while copyright and copyright_2_re.match(copyright[0]):
ci -= 1
copyright = llist[ci:ci+1]
@@ -165,7 +172,7 @@ def main(argv):
try:
gopts, args = getopt.getopt(argv[1:], "vi:")
except getopt.GetoptError:
- print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0]
+ print("Usage: %s [-v] [-i ignorepath]* [path]" % argv[0])
return 2
opts = {}
for opt, val in gopts:
@@ -178,20 +185,20 @@ def main(argv):
elif len(args) == 1:
path = args[0]
else:
- print "Usage: %s [-v] [-i ignorepath]* [path]" % argv[0]
+ print("Usage: %s [-v] [-i ignorepath]* [path]" % argv[0])
return 2
verbose = '-v' in opts
num = 0
- out = cStringIO.StringIO()
+ out = io.StringIO()
# TODO: replace os.walk run with iteration over output of
# `svn list -R`.
for root, dirs, files in os.walk(path):
- if '.svn' in dirs:
- dirs.remove('.svn')
+ if '.hg' in dirs:
+ dirs.remove('.hg')
if '-i' in opts and abspath(root) in opts['-i']:
del dirs[:]
continue
@@ -212,13 +219,13 @@ def main(argv):
continue
if verbose:
- print "Checking %s..." % fn
+ print("Checking %s..." % fn)
try:
f = open(fn, 'r')
lines = list(f)
- except (IOError, OSError), err:
- print "%s: cannot open: %s" % (fn, err)
+ except (IOError, OSError) as err:
+ print("%s: cannot open: %s" % (fn, err))
num += 1
continue
@@ -226,15 +233,15 @@ def main(argv):
if not in_pocoo_pkg and checker.only_pkg:
continue
for lno, msg in checker(fn, lines):
- print >>out, "%s:%d: %s" % (fn, lno, msg)
+ print(u"%s:%d: %s" % (fn, lno, msg), file=out)
num += 1
if verbose:
- print
+ print()
if num == 0:
- print "No errors found."
+ print("No errors found.")
else:
- print out.getvalue().rstrip('\n')
- print "%d error%s found." % (num, num > 1 and "s" or "")
+ print(out.getvalue().rstrip('\n'))
+ print("%d error%s found." % (num, num > 1 and "s" or ""))
return int(num > 0)
diff --git a/scripts/detect_missing_analyse_text.py b/scripts/detect_missing_analyse_text.py
index 1312648f..ab58558e 100644
--- a/scripts/detect_missing_analyse_text.py
+++ b/scripts/detect_missing_analyse_text.py
@@ -1,3 +1,4 @@
+from __future__ import print_function
import sys
from pygments.lexers import get_all_lexers, find_lexer_class
@@ -9,22 +10,22 @@ def main():
for name, aliases, filenames, mimetypes in get_all_lexers():
cls = find_lexer_class(name)
if not cls.aliases:
- print cls, "has no aliases"
+ print(cls, "has no aliases")
for f in filenames:
if f not in uses:
uses[f] = []
uses[f].append(cls)
ret = 0
- for k, v in uses.iteritems():
+ for k, v in uses.items():
if len(v) > 1:
#print "Multiple for", k, v
for i in v:
if i.analyse_text is None:
- print i, "has a None analyse_text"
+ print(i, "has a None analyse_text")
ret |= 1
elif Lexer.analyse_text.__doc__ == i.analyse_text.__doc__:
- print i, "needs analyse_text, multiple lexers for", k
+ print(i, "needs analyse_text, multiple lexers for", k)
ret |= 2
return ret
diff --git a/scripts/find_codetags.py b/scripts/find_codetags.py
index 2fb18333..f8204e6e 100755
--- a/scripts/find_codetags.py
+++ b/scripts/find_codetags.py
@@ -7,11 +7,15 @@
Find code tags in specified files and/or directories
and create a report in HTML format.
- :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
-import sys, os, re
+from __future__ import print_function
+
+import os
+import re
+import sys
import getopt
from os.path import join, abspath, isdir, isfile
@@ -73,8 +77,8 @@ def main():
try:
gopts, args = getopt.getopt(sys.argv[1:], "vo:i:")
except getopt.GetoptError:
- print ("Usage: %s [-v] [-i ignoredir]* [-o reportfile.html] "
- "path ..." % sys.argv[0])
+ print(("Usage: %s [-v] [-i ignoredir]* [-o reportfile.html] "
+ "path ..." % sys.argv[0]))
return 2
opts = {}
for opt, val in gopts:
@@ -97,18 +101,18 @@ def main():
num = 0
for path in args:
- print "Searching for code tags in %s, please wait." % path
+ print("Searching for code tags in %s, please wait." % path)
if isfile(path):
gnum += 1
if process_file(store, path):
if verbose:
- print path + ": found %d tags" % \
- (path in store and len(store[path]) or 0)
+ print(path + ": found %d tags" % \
+ (path in store and len(store[path]) or 0))
num += 1
else:
if verbose:
- print path + ": binary or not readable"
+ print(path + ": binary or not readable")
continue
elif not isdir(path):
continue
@@ -117,11 +121,15 @@ def main():
if '-i' in opts and abspath(root) in opts['-i']:
del dirs[:]
continue
- if '.svn' in dirs:
- dirs.remove('.svn')
+ if '.hg' in dirs:
+ dirs.remove('.hg')
+ if 'examplefiles' in dirs:
+ dirs.remove('examplefiles')
+ if 'dist' in dirs:
+ dirs.remove('dist')
for fn in files:
gnum += 1
- if gnum % 50 == 0 and not verbose:
+ if gnum % 25 == 0 and not verbose:
sys.stdout.write('.')
sys.stdout.flush()
@@ -137,16 +145,16 @@ def main():
if fn[:2] == './': fn = fn[2:]
if process_file(store, fn):
if verbose:
- print fn + ": found %d tags" % \
- (fn in store and len(store[fn]) or 0)
+ print(fn + ": found %d tags" % \
+ (fn in store and len(store[fn]) or 0))
num += 1
else:
if verbose:
- print fn + ": binary or not readable"
- print
+ print(fn + ": binary or not readable")
+ print()
- print "Processed %d of %d files. Found %d tags in %d files." % (
- num, gnum, sum(len(fitem) for fitem in store.itervalues()), len(store))
+ print("Processed %d of %d files. Found %d tags in %d files." % (
+ num, gnum, sum(len(fitem) for fitem in store.values()), len(store)))
if not store:
return 0
@@ -190,7 +198,7 @@ td { padding: 2px 5px 2px 5px;
'<td class="tag %%(tag)s">%%(tag)s</td>'
'<td class="who">%%(who)s</td><td class="what">%%(what)s</td></tr>')
- f = file(output, 'w')
+ f = open(output, 'w')
table = '\n'.join(TABLE % fname +
'\n'.join(TR % (no % 2,) % entry
for no, entry in enumerate(store[fname]))
@@ -198,7 +206,7 @@ td { padding: 2px 5px 2px 5px;
f.write(HTML % (', '.join(map(abspath, args)), table))
f.close()
- print "Report written to %s." % output
+ print("Report written to %s." % output)
return 0
if __name__ == '__main__':
diff --git a/scripts/find_error.py b/scripts/find_error.py
index 00923569..7aaa9bee 100755
--- a/scripts/find_error.py
+++ b/scripts/find_error.py
@@ -8,11 +8,14 @@
the text where Error tokens are being generated, along
with some context.
- :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS.
+ :copyright: Copyright 2006-2014 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
-import sys, os
+from __future__ import print_function
+
+import os
+import sys
# always prefer Pygments from source if exists
srcpath = os.path.join(os.path.dirname(__file__), '..')
@@ -104,36 +107,36 @@ def main(fn, lexer=None, options={}):
# already debugged before
debug_lexer = True
lno = 1
- text = file(fn, 'U').read()
+ text = open(fn, 'U').read()
text = text.strip('\n') + '\n'
tokens = []
states = []
def show_token(tok, state):
reprs = map(repr, tok)
- print ' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0],
+ print(' ' + reprs[1] + ' ' + ' ' * (29-len(reprs[1])) + reprs[0], end=' ')
if debug_lexer:
- print ' ' + ' ' * (29-len(reprs[0])) + repr(state),
- print
+ print(' ' + ' ' * (29-len(reprs[0])) + repr(state), end=' ')
+ print()
for type, val in lx.get_tokens(text):
lno += val.count('\n')
if type == Error:
- print 'Error parsing', fn, 'on line', lno
- print 'Previous tokens' + (debug_lexer and ' and states' or '') + ':'
+ print('Error parsing', fn, 'on line', lno)
+ print('Previous tokens' + (debug_lexer and ' and states' or '') + ':')
if showall:
for tok, state in map(None, tokens, states):
show_token(tok, state)
else:
for i in range(max(len(tokens) - num, 0), len(tokens)):
show_token(tokens[i], states[i])
- print 'Error token:'
+ print('Error token:')
l = len(repr(val))
- print ' ' + repr(val),
+ print(' ' + repr(val), end=' ')
if debug_lexer and hasattr(lx, 'statestack'):
- print ' ' * (60-l) + repr(lx.statestack),
- print
- print
+ print(' ' * (60-l) + repr(lx.statestack), end=' ')
+ print()
+ print()
return 1
tokens.append((type, val))
if debug_lexer:
diff --git a/scripts/get_vimkw.py b/scripts/get_vimkw.py
index 153c88c3..4ea302f4 100644
--- a/scripts/get_vimkw.py
+++ b/scripts/get_vimkw.py
@@ -1,5 +1,5 @@
+from __future__ import print_function
import re
-from pprint import pprint
r_line = re.compile(r"^(syn keyword vimCommand contained|syn keyword vimOption "
r"contained|syn keyword vimAutoEvent contained)\s+(.*)")
@@ -31,12 +31,12 @@ def getkw(input, output):
for a, b in output_info.items():
b.sort()
- print >>out, '%s=[%s]' % (a, ','.join(b))
+ print('%s=[%s]' % (a, ','.join(b)), file=out)
def is_keyword(w, keywords):
for i in range(len(w), 0, -1):
if w[:i] in keywords:
- return signals[w[:i]][:len(w)] == w
+ return keywords[w[:i]][:len(w)] == w
return False
if __name__ == "__main__":
diff --git a/scripts/reindent.py b/scripts/reindent.py
deleted file mode 100755
index e6ee8287..00000000
--- a/scripts/reindent.py
+++ /dev/null
@@ -1,291 +0,0 @@
-#! /usr/bin/env python
-
-# Released to the public domain, by Tim Peters, 03 October 2000.
-# -B option added by Georg Brandl, 2006.
-
-"""reindent [-d][-r][-v] [ path ... ]
-
--d (--dryrun) Dry run. Analyze, but don't make any changes to files.
--r (--recurse) Recurse. Search for all .py files in subdirectories too.
--B (--no-backup) Don't write .bak backup files.
--v (--verbose) Verbose. Print informative msgs; else only names of changed files.
--h (--help) Help. Print this usage information and exit.
-
-Change Python (.py) files to use 4-space indents and no hard tab characters.
-Also trim excess spaces and tabs from ends of lines, and remove empty lines
-at the end of files. Also ensure the last line ends with a newline.
-
-If no paths are given on the command line, reindent operates as a filter,
-reading a single source file from standard input and writing the transformed
-source to standard output. In this case, the -d, -r and -v flags are
-ignored.
-
-You can pass one or more file and/or directory paths. When a directory
-path, all .py files within the directory will be examined, and, if the -r
-option is given, likewise recursively for subdirectories.
-
-If output is not to standard output, reindent overwrites files in place,
-renaming the originals with a .bak extension. If it finds nothing to
-change, the file is left alone. If reindent does change a file, the changed
-file is a fixed-point for future runs (i.e., running reindent on the
-resulting .py file won't change it again).
-
-The hard part of reindenting is figuring out what to do with comment
-lines. So long as the input files get a clean bill of health from
-tabnanny.py, reindent should do a good job.
-"""
-
-__version__ = "1"
-
-import tokenize
-import os
-import sys
-
-verbose = 0
-recurse = 0
-dryrun = 0
-no_backup = 0
-
-def usage(msg=None):
- if msg is not None:
- print >> sys.stderr, msg
- print >> sys.stderr, __doc__
-
-def errprint(*args):
- sep = ""
- for arg in args:
- sys.stderr.write(sep + str(arg))
- sep = " "
- sys.stderr.write("\n")
-
-def main():
- import getopt
- global verbose, recurse, dryrun, no_backup
-
- try:
- opts, args = getopt.getopt(sys.argv[1:], "drvhB",
- ["dryrun", "recurse", "verbose", "help",
- "no-backup"])
- except getopt.error, msg:
- usage(msg)
- return
- for o, a in opts:
- if o in ('-d', '--dryrun'):
- dryrun += 1
- elif o in ('-r', '--recurse'):
- recurse += 1
- elif o in ('-v', '--verbose'):
- verbose += 1
- elif o in ('-B', '--no-backup'):
- no_backup += 1
- elif o in ('-h', '--help'):
- usage()
- return
- if not args:
- r = Reindenter(sys.stdin)
- r.run()
- r.write(sys.stdout)
- return
- for arg in args:
- check(arg)
-
-def check(file):
- if os.path.isdir(file) and not os.path.islink(file):
- if verbose:
- print "listing directory", file
- names = os.listdir(file)
- for name in names:
- fullname = os.path.join(file, name)
- if ((recurse and os.path.isdir(fullname) and
- not os.path.islink(fullname))
- or name.lower().endswith(".py")):
- check(fullname)
- return
-
- if verbose:
- print "checking", file, "...",
- try:
- f = open(file)
- except IOError, msg:
- errprint("%s: I/O Error: %s" % (file, str(msg)))
- return
-
- r = Reindenter(f)
- f.close()
- if r.run():
- if verbose:
- print "changed."
- if dryrun:
- print "But this is a dry run, so leaving it alone."
- else:
- print "reindented", file, (dryrun and "(dry run => not really)" or "")
- if not dryrun:
- if not no_backup:
- bak = file + ".bak"
- if os.path.exists(bak):
- os.remove(bak)
- os.rename(file, bak)
- if verbose:
- print "renamed", file, "to", bak
- f = open(file, "w")
- r.write(f)
- f.close()
- if verbose:
- print "wrote new", file
- else:
- if verbose:
- print "unchanged."
-
-
-class Reindenter:
-
- def __init__(self, f):
- self.find_stmt = 1 # next token begins a fresh stmt?
- self.level = 0 # current indent level
-
- # Raw file lines.
- self.raw = f.readlines()
-
- # File lines, rstripped & tab-expanded. Dummy at start is so
- # that we can use tokenize's 1-based line numbering easily.
- # Note that a line is all-blank iff it's "\n".
- self.lines = [line.rstrip('\n \t').expandtabs() + "\n"
- for line in self.raw]
- self.lines.insert(0, None)
- self.index = 1 # index into self.lines of next line
-
- # List of (lineno, indentlevel) pairs, one for each stmt and
- # comment line. indentlevel is -1 for comment lines, as a
- # signal that tokenize doesn't know what to do about them;
- # indeed, they're our headache!
- self.stats = []
-
- def run(self):
- tokenize.tokenize(self.getline, self.tokeneater)
- # Remove trailing empty lines.
- lines = self.lines
- while lines and lines[-1] == "\n":
- lines.pop()
- # Sentinel.
- stats = self.stats
- stats.append((len(lines), 0))
- # Map count of leading spaces to # we want.
- have2want = {}
- # Program after transformation.
- after = self.after = []
- # Copy over initial empty lines -- there's nothing to do until
- # we see a line with *something* on it.
- i = stats[0][0]
- after.extend(lines[1:i])
- for i in range(len(stats)-1):
- thisstmt, thislevel = stats[i]
- nextstmt = stats[i+1][0]
- have = getlspace(lines[thisstmt])
- want = thislevel * 4
- if want < 0:
- # A comment line.
- if have:
- # An indented comment line. If we saw the same
- # indentation before, reuse what it most recently
- # mapped to.
- want = have2want.get(have, -1)
- if want < 0:
- # Then it probably belongs to the next real stmt.
- for j in xrange(i+1, len(stats)-1):
- jline, jlevel = stats[j]
- if jlevel >= 0:
- if have == getlspace(lines[jline]):
- want = jlevel * 4
- break
- if want < 0: # Maybe it's a hanging
- # comment like this one,
- # in which case we should shift it like its base
- # line got shifted.
- for j in xrange(i-1, -1, -1):
- jline, jlevel = stats[j]
- if jlevel >= 0:
- want = have + getlspace(after[jline-1]) - \
- getlspace(lines[jline])
- break
- if want < 0:
- # Still no luck -- leave it alone.
- want = have
- else:
- want = 0
- assert want >= 0
- have2want[have] = want
- diff = want - have
- if diff == 0 or have == 0:
- after.extend(lines[thisstmt:nextstmt])
- else:
- for line in lines[thisstmt:nextstmt]:
- if diff > 0:
- if line == "\n":
- after.append(line)
- else:
- after.append(" " * diff + line)
- else:
- remove = min(getlspace(line), -diff)
- after.append(line[remove:])
- return self.raw != self.after
-
- def write(self, f):
- f.writelines(self.after)
-
- # Line-getter for tokenize.
- def getline(self):
- if self.index >= len(self.lines):
- line = ""
- else:
- line = self.lines[self.index]
- self.index += 1
- return line
-
- # Line-eater for tokenize.
- def tokeneater(self, type, token, (sline, scol), end, line,
- INDENT=tokenize.INDENT,
- DEDENT=tokenize.DEDENT,
- NEWLINE=tokenize.NEWLINE,
- COMMENT=tokenize.COMMENT,
- NL=tokenize.NL):
-
- if type == NEWLINE:
- # A program statement, or ENDMARKER, will eventually follow,
- # after some (possibly empty) run of tokens of the form
- # (NL | COMMENT)* (INDENT | DEDENT+)?
- self.find_stmt = 1
-
- elif type == INDENT:
- self.find_stmt = 1
- self.level += 1
-
- elif type == DEDENT:
- self.find_stmt = 1
- self.level -= 1
-
- elif type == COMMENT:
- if self.find_stmt:
- self.stats.append((sline, -1))
- # but we're still looking for a new stmt, so leave
- # find_stmt alone
-
- elif type == NL:
- pass
-
- elif self.find_stmt:
- # This is the first "real token" following a NEWLINE, so it
- # must be the first token of the next program statement, or an
- # ENDMARKER.
- self.find_stmt = 0
- if line: # not endmarker
- self.stats.append((sline, self.level))
-
-# Count number of leading blanks.
-def getlspace(line):
- i, n = 0, len(line)
- while i < n and line[i] == " ":
- i += 1
- return i
-
-if __name__ == '__main__':
- main()
diff --git a/scripts/vim2pygments.py b/scripts/vim2pygments.py
index 80f0ada2..42af0bbe 100644..100755
--- a/scripts/vim2pygments.py
+++ b/scripts/vim2pygments.py
@@ -11,10 +11,12 @@
:license: BSD, see LICENSE for details.
"""
+from __future__ import print_function
+
import sys
import re
from os import path
-from cStringIO import StringIO
+from io import StringIO
split_re = re.compile(r'(?<!\\)\s+')
@@ -765,7 +767,7 @@ TOKENS = {
}
TOKEN_TYPES = set()
-for token in TOKENS.itervalues():
+for token in TOKENS.values():
if not isinstance(token, tuple):
token = (token,)
for token in token:
@@ -836,7 +838,7 @@ def find_colors(code):
colors['Normal']['bgcolor'] = bg_color
color_map = {}
- for token, styles in colors.iteritems():
+ for token, styles in colors.items():
if token in TOKENS:
tmp = []
if styles.get('noinherit'):
@@ -879,7 +881,7 @@ class StyleWriter(object):
def write(self, out):
self.write_header(out)
default_token, tokens = find_colors(self.code)
- tokens = tokens.items()
+ tokens = list(tokens.items())
tokens.sort(lambda a, b: cmp(len(a[0]), len(a[1])))
bg_color = [x[3:] for x in default_token.split() if x.startswith('bg:')]
if bg_color:
@@ -916,14 +918,14 @@ def convert(filename, stream=None):
def main():
if len(sys.argv) != 2 or sys.argv[1] in ('-h', '--help'):
- print 'Usage: %s <filename.vim>' % sys.argv[0]
+ print('Usage: %s <filename.vim>' % sys.argv[0])
return 2
if sys.argv[1] in ('-v', '--version'):
- print '%s %s' % (SCRIPT_NAME, SCRIPT_VERSION)
+ print('%s %s' % (SCRIPT_NAME, SCRIPT_VERSION))
return
filename = sys.argv[1]
if not (path.exists(filename) and path.isfile(filename)):
- print 'Error: %s not found' % filename
+ print('Error: %s not found' % filename)
return 1
convert(filename, sys.stdout)
sys.stdout.write('\n')