diff options
Diffstat (limited to 'pygments/cmdline.py')
-rw-r--r-- | pygments/cmdline.py | 404 |
1 files changed, 246 insertions, 158 deletions
diff --git a/pygments/cmdline.py b/pygments/cmdline.py index c25204bf..00745edc 100644 --- a/pygments/cmdline.py +++ b/pygments/cmdline.py @@ -5,27 +5,33 @@ Command line interface. - :copyright: Copyright 2006-2013 by the Pygments team, see AUTHORS. + :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. :license: BSD, see LICENSE for details. """ + +from __future__ import print_function + import sys import getopt from textwrap import dedent from pygments import __version__, highlight -from pygments.util import ClassNotFound, OptionError, docstring_headline -from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \ - find_lexer_class, guess_lexer, TextLexer +from pygments.util import ClassNotFound, OptionError, docstring_headline, \ + guess_decode, guess_decode_from_terminal, terminal_encoding +from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \ + get_lexer_for_filename, find_lexer_class_for_filename +from pygments.lexers.special import TextLexer +from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter from pygments.formatters import get_all_formatters, get_formatter_by_name, \ - get_formatter_for_filename, find_formatter_class, \ - TerminalFormatter # pylint:disable-msg=E0611 + get_formatter_for_filename, find_formatter_class +from pygments.formatters.terminal import TerminalFormatter from pygments.filters import get_all_filters, find_filter_class from pygments.styles import get_all_styles, get_style_by_name USAGE = """\ Usage: %s [-l <lexer> | -g] [-F <filter>[:<options>]] [-f <formatter>] - [-O <options>] [-P <option=value>] [-o <outfile>] [<infile>] + [-O <options>] [-P <option=value>] [-s] [-v] [-o <outfile>] [<infile>] %s -S <style> -f <formatter> [-a <arg>] [-O <options>] [-P <option=value>] %s -L [<which> ...] @@ -37,6 +43,10 @@ Highlight the input file and write the result to <outfile>. If no input file is given, use stdin, if -o is not given, use stdout. +If -s is passed, lexing will be done in "streaming" mode, reading and +highlighting one line at a time. This will only work properly with +lexers that have no constructs spanning multiple lines! + <lexer> is a lexer name (query all lexer names with -L). If -l is not given, the lexer is guessed from the extension of the input file name (this obviously doesn't work if the input is stdin). If -g is passed, @@ -76,6 +86,14 @@ If no specific lexer can be determined "text" is returned. The -H option prints detailed help for the object <name> of type <type>, where <type> is one of "lexer", "formatter" or "filter". +The -s option processes lines one at a time until EOF, rather than +waiting to process the entire file. This only works for stdin, and +is intended for streaming input such as you get from 'tail -f'. +Example usage: "tail -f sql.log | pygmentize -s -l sql" + +The -v option prints a detailed traceback on unhandled exceptions, +which is useful for debugging and bug reports. + The -h option prints this help. The -V option prints the package version. """ @@ -86,13 +104,13 @@ def _parse_options(o_strs): if not o_strs: return opts for o_str in o_strs: - if not o_str: + if not o_str.strip(): continue o_args = o_str.split(',') for o_arg in o_args: o_arg = o_arg.strip() try: - o_key, o_val = o_arg.split('=') + o_key, o_val = o_arg.split('=', 1) o_key = o_key.strip() o_val = o_val.strip() except ValueError: @@ -118,26 +136,28 @@ def _parse_filters(f_strs): def _print_help(what, name): try: if what == 'lexer': - cls = find_lexer_class(name) - print "Help on the %s lexer:" % cls.name - print dedent(cls.__doc__) + cls = get_lexer_by_name(name) + print("Help on the %s lexer:" % cls.name) + print(dedent(cls.__doc__)) elif what == 'formatter': cls = find_formatter_class(name) - print "Help on the %s formatter:" % cls.name - print dedent(cls.__doc__) + print("Help on the %s formatter:" % cls.name) + print(dedent(cls.__doc__)) elif what == 'filter': cls = find_filter_class(name) - print "Help on the %s filter:" % name - print dedent(cls.__doc__) - except AttributeError: - print >>sys.stderr, "%s not found!" % what + print("Help on the %s filter:" % name) + print(dedent(cls.__doc__)) + return 0 + except (AttributeError, ValueError): + print("%s not found!" % what, file=sys.stderr) + return 1 def _print_list(what): if what == 'lexer': - print - print "Lexers:" - print "~~~~~~~" + print() + print("Lexers:") + print("~~~~~~~") info = [] for fullname, names, exts, _ in get_all_lexers(): @@ -146,12 +166,12 @@ def _print_list(what): info.append(tup) info.sort() for i in info: - print ('* %s\n %s %s') % i + print(('* %s\n %s %s') % i) elif what == 'formatter': - print - print "Formatters:" - print "~~~~~~~~~~~" + print() + print("Formatters:") + print("~~~~~~~~~~~") info = [] for cls in get_all_formatters(): @@ -161,50 +181,30 @@ def _print_list(what): info.append(tup) info.sort() for i in info: - print ('* %s\n %s %s') % i + print(('* %s\n %s %s') % i) elif what == 'filter': - print - print "Filters:" - print "~~~~~~~~" + print() + print("Filters:") + print("~~~~~~~~") for name in get_all_filters(): cls = find_filter_class(name) - print "* " + name + ':' - print " %s" % docstring_headline(cls) + print("* " + name + ':') + print(" %s" % docstring_headline(cls)) elif what == 'style': - print - print "Styles:" - print "~~~~~~~" + print() + print("Styles:") + print("~~~~~~~") for name in get_all_styles(): cls = get_style_by_name(name) - print "* " + name + ':' - print " %s" % docstring_headline(cls) - - -def main(args=sys.argv): - """ - Main command line entry point. - """ - # pylint: disable-msg=R0911,R0912,R0915 + print("* " + name + ':') + print(" %s" % docstring_headline(cls)) - usage = USAGE % ((args[0],) * 6) - if sys.platform in ['win32', 'cygwin']: - try: - # Provide coloring under Windows, if possible - import colorama - colorama.init() - except ImportError: - pass - - try: - popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:hVHg") - except getopt.GetoptError, err: - print >>sys.stderr, usage - return 2 +def main_inner(popts, args, usage): opts = {} O_opts = [] P_opts = [] @@ -218,23 +218,19 @@ def main(args=sys.argv): F_opts.append(arg) opts[opt] = arg - if not opts and not args: - print usage - return 0 - if opts.pop('-h', None) is not None: - print usage + print(usage) return 0 if opts.pop('-V', None) is not None: - print 'Pygments version %s, (c) 2006-2013 by Georg Brandl.' % __version__ + print('Pygments version %s, (c) 2006-2015 by Georg Brandl.' % __version__) return 0 # handle ``pygmentize -L`` L_opt = opts.pop('-L', None) if L_opt is not None: if opts: - print >>sys.stderr, usage + print(usage, file=sys.stderr) return 2 # print version @@ -249,16 +245,15 @@ def main(args=sys.argv): H_opt = opts.pop('-H', None) if H_opt is not None: if opts or len(args) != 2: - print >>sys.stderr, usage + print(usage, file=sys.stderr) return 2 - what, name = args + what, name = args # pylint: disable=unbalanced-tuple-unpacking if what not in ('lexer', 'formatter', 'filter'): - print >>sys.stderr, usage + print(usage, file=sys.stderr) return 2 - _print_help(what, name) - return 0 + return _print_help(what, name) # parse -O options parsed_opts = _parse_options(O_opts) @@ -274,18 +269,18 @@ def main(args=sys.argv): parsed_opts[name] = value opts.pop('-P', None) + # encodings + inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) + outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) + # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: - try: - lexer = get_lexer_for_filename(infn, **parsed_opts) - except ClassNotFound, err: - lexer = TextLexer() - except OptionError, err: - print >>sys.stderr, 'Error:', err - return 1 + lexer = find_lexer_class_for_filename(infn) + if lexer is None: + lexer = TextLexer - print lexer.aliases[0] + print(lexer.aliases[0]) return 0 # handle ``pygmentize -S`` @@ -294,148 +289,241 @@ def main(args=sys.argv): if S_opt is not None: f_opt = opts.pop('-f', None) if not f_opt: - print >>sys.stderr, usage + print(usage, file=sys.stderr) return 2 if opts or args: - print >>sys.stderr, usage + print(usage, file=sys.stderr) return 2 try: parsed_opts['style'] = S_opt fmter = get_formatter_by_name(f_opt, **parsed_opts) - except ClassNotFound, err: - print >>sys.stderr, err + except ClassNotFound as err: + print(err, file=sys.stderr) return 1 - arg = a_opt or '' - try: - print fmter.get_style_defs(arg) - except Exception, err: - print >>sys.stderr, 'Error:', err - return 1 + print(fmter.get_style_defs(a_opt or '')) return 0 # if no -S is given, -a is not allowed if a_opt is not None: - print >>sys.stderr, usage + print(usage, file=sys.stderr) return 2 # parse -F options F_opts = _parse_filters(F_opts) opts.pop('-F', None) - # select formatter - outfn = opts.pop('-o', None) - fmter = opts.pop('-f', None) - if fmter: - try: - fmter = get_formatter_by_name(fmter, **parsed_opts) - except (OptionError, ClassNotFound), err: - print >>sys.stderr, 'Error:', err - return 1 + # select lexer + lexer = None - if outfn: - if not fmter: - try: - fmter = get_formatter_for_filename(outfn, **parsed_opts) - except (OptionError, ClassNotFound), err: - print >>sys.stderr, 'Error:', err - return 1 + # given by name? + lexername = opts.pop('-l', None) + if lexername: try: - outfile = open(outfn, 'wb') - except Exception, err: - print >>sys.stderr, 'Error: cannot open outfile:', err + lexer = get_lexer_by_name(lexername, **parsed_opts) + except (OptionError, ClassNotFound) as err: + print('Error:', err, file=sys.stderr) return 1 - else: - if not fmter: - fmter = TerminalFormatter(**parsed_opts) - outfile = sys.stdout - # select lexer - lexer = opts.pop('-l', None) - if lexer: - try: - lexer = get_lexer_by_name(lexer, **parsed_opts) - except (OptionError, ClassNotFound), err: - print >>sys.stderr, 'Error:', err - return 1 + # read input code + code = None if args: if len(args) > 1: - print >>sys.stderr, usage + print(usage, file=sys.stderr) + return 2 + + if '-s' in opts: + print('Error: -s option not usable when input file specified', + file=sys.stderr) return 2 infn = args[0] try: - code = open(infn, 'rb').read() - except Exception, err: - print >>sys.stderr, 'Error: cannot read infile:', err + with open(infn, 'rb') as infp: + code = infp.read() + except Exception as err: + print('Error: cannot read infile:', err, file=sys.stderr) return 1 + if not inencoding: + code, inencoding = guess_decode(code) + # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) - except ClassNotFound, err: + except ClassNotFound as err: if '-g' in opts: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) else: - print >>sys.stderr, 'Error:', err + print('Error:', err, file=sys.stderr) return 1 - except OptionError, err: - print >>sys.stderr, 'Error:', err + except OptionError as err: + print('Error:', err, file=sys.stderr) return 1 - else: - if '-g' in opts: + elif '-s' not in opts: # treat stdin as full file (-s support is later) + # read code from terminal, always in binary mode since we want to + # decode ourselves and be tolerant with it + if sys.version_info > (3,): + # Python 3: we have to use .buffer to get a binary stream + code = sys.stdin.buffer.read() + else: code = sys.stdin.read() + if not inencoding: + code, inencoding = guess_decode_from_terminal(code, sys.stdin) + # else the lexer will do the decoding + if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) - elif not lexer: - print >>sys.stderr, 'Error: no lexer name given and reading ' + \ - 'from stdin (try using -g or -l <lexer>)' + + else: # -s option needs a lexer with -l + if not lexer: + print('Error: when using -s a lexer has to be selected with -l', + file=sys.stderr) return 2 + + # process filters + for fname, fopts in F_opts: + try: + lexer.add_filter(fname, **fopts) + except ClassNotFound as err: + print('Error:', err, file=sys.stderr) + return 1 + + # select formatter + outfn = opts.pop('-o', None) + fmter = opts.pop('-f', None) + if fmter: + try: + fmter = get_formatter_by_name(fmter, **parsed_opts) + except (OptionError, ClassNotFound) as err: + print('Error:', err, file=sys.stderr) + return 1 + + if outfn: + if not fmter: + try: + fmter = get_formatter_for_filename(outfn, **parsed_opts) + except (OptionError, ClassNotFound) as err: + print('Error:', err, file=sys.stderr) + return 1 + try: + outfile = open(outfn, 'wb') + except Exception as err: + print('Error: cannot open outfile:', err, file=sys.stderr) + return 1 + else: + if not fmter: + fmter = TerminalFormatter(**parsed_opts) + if sys.version_info > (3,): + # Python 3: we have to use .buffer to get a binary stream + outfile = sys.stdout.buffer else: - code = sys.stdin.read() + outfile = sys.stdout - # No encoding given? Use latin1 if output file given, - # stdin/stdout encoding otherwise. - # (This is a compromise, I'm not too happy with it...) - if 'encoding' not in parsed_opts and 'outencoding' not in parsed_opts: + # determine output encoding if not explicitly selected + if not outencoding: if outfn: - # encoding pass-through - fmter.encoding = 'latin1' + # output file? use lexer encoding for now (can still be None) + fmter.encoding = inencoding else: - if sys.version_info < (3,): - # use terminal encoding; Python 3's terminals already do that - lexer.encoding = getattr(sys.stdin, 'encoding', - None) or 'ascii' - fmter.encoding = getattr(sys.stdout, 'encoding', - None) or 'ascii' - elif not outfn and sys.version_info > (3,): - # output to terminal with encoding -> use .buffer - outfile = sys.stdout.buffer + # else use terminal encoding + fmter.encoding = terminal_encoding(sys.stdout) + + # provide coloring under Windows, if possible + if not outfn and sys.platform in ('win32', 'cygwin') and \ + fmter.name in ('Terminal', 'Terminal256'): # pragma: no cover + # unfortunately colorama doesn't support binary streams on Py3 + if sys.version_info > (3,): + from pygments.util import UnclosingTextIOWrapper + outfile = UnclosingTextIOWrapper(outfile, encoding=fmter.encoding) + fmter.encoding = None + try: + import colorama.initialise + except ImportError: + pass + else: + outfile = colorama.initialise.wrap_stream( + outfile, convert=None, strip=None, autoreset=False, wrap=True) + + # When using the LaTeX formatter and the option `escapeinside` is + # specified, we need a special lexer which collects escaped text + # before running the chosen language lexer. + escapeinside = parsed_opts.get('escapeinside', '') + if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): + left = escapeinside[0] + right = escapeinside[1] + lexer = LatexEmbeddedLexer(left, right, lexer) # ... and do it! - try: - # process filters - for fname, fopts in F_opts: - lexer.add_filter(fname, **fopts) + if '-s' not in opts: + # process whole input as per normal... highlight(code, lexer, fmter, outfile) - except Exception, err: + return 0 + else: + # line by line processing of stdin (eg: for 'tail -f')... + try: + while 1: + if sys.version_info > (3,): + # Python 3: we have to use .buffer to get a binary stream + line = sys.stdin.buffer.readline() + else: + line = sys.stdin.readline() + if not line: + break + if not inencoding: + line = guess_decode_from_terminal(line, sys.stdin)[0] + highlight(line, lexer, fmter, outfile) + if hasattr(outfile, 'flush'): + outfile.flush() + return 0 + except KeyboardInterrupt: # pragma: no cover + return 0 + + +def main(args=sys.argv): + """ + Main command line entry point. + """ + usage = USAGE % ((args[0],) * 6) + + try: + popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:vhVHgs") + except getopt.GetoptError: + print(usage, file=sys.stderr) + return 2 + + try: + return main_inner(popts, args, usage) + except Exception: + if '-v' in dict(popts): + print(file=sys.stderr) + print('*' * 65, file=sys.stderr) + print('An unhandled exception occurred while highlighting.', + file=sys.stderr) + print('Please report the whole traceback to the issue tracker at', + file=sys.stderr) + print('<https://bitbucket.org/birkenfeld/pygments-main/issues>.', + file=sys.stderr) + print('*' * 65, file=sys.stderr) + print(file=sys.stderr) + raise import traceback info = traceback.format_exception(*sys.exc_info()) msg = info[-1].strip() if len(info) >= 3: # extract relevant file and position info msg += '\n (f%s)' % info[-2].split('\n')[0].strip()[1:] - print >>sys.stderr - print >>sys.stderr, '*** Error while highlighting:' - print >>sys.stderr, msg + print(file=sys.stderr) + print('*** Error while highlighting:', file=sys.stderr) + print(msg, file=sys.stderr) + print('*** If this is a bug you want to report, please rerun with -v.', + file=sys.stderr) return 1 - - return 0 |