diff options
Diffstat (limited to 'pygments/cmdline.py')
-rw-r--r-- | pygments/cmdline.py | 118 |
1 files changed, 81 insertions, 37 deletions
diff --git a/pygments/cmdline.py b/pygments/cmdline.py index 7c23ebee..907c51f0 100644 --- a/pygments/cmdline.py +++ b/pygments/cmdline.py @@ -16,20 +16,21 @@ import getopt from textwrap import dedent from pygments import __version__, highlight -from pygments.util import ClassNotFound, OptionError, docstring_headline -from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \ - find_lexer_class, guess_lexer, TextLexer +from pygments.util import ClassNotFound, OptionError, docstring_headline, \ + guess_decode, guess_decode_from_terminal, terminal_encoding +from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \ + get_lexer_for_filename, find_lexer_class, TextLexer from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter from pygments.formatters import get_all_formatters, get_formatter_by_name, \ - get_formatter_for_filename, find_formatter_class, \ - TerminalFormatter # pylint:disable-msg=E0611 + get_formatter_for_filename, find_formatter_class, \ + TerminalFormatter # pylint:disable-msg=E0611 from pygments.filters import get_all_filters, find_filter_class from pygments.styles import get_all_styles, get_style_by_name USAGE = """\ Usage: %s [-l <lexer> | -g] [-F <filter>[:<options>]] [-f <formatter>] - [-O <options>] [-P <option=value>] [-o <outfile>] [<infile>] + [-O <options>] [-P <option=value>] [-s] [-o <outfile>] [<infile>] %s -S <style> -f <formatter> [-a <arg>] [-O <options>] [-P <option=value>] %s -L [<which> ...] @@ -41,6 +42,10 @@ Highlight the input file and write the result to <outfile>. If no input file is given, use stdin, if -o is not given, use stdout. +If -s is passed, lexing will be done in "streaming" mode, reading and +highlighting one line at a time. This will only work properly with +lexers that have no constructs spanning multiple lines! + <lexer> is a lexer name (query all lexer names with -L). If -l is not given, the lexer is guessed from the extension of the input file name (this obviously doesn't work if the input is stdin). If -g is passed, @@ -80,6 +85,11 @@ If no specific lexer can be determined "text" is returned. The -H option prints detailed help for the object <name> of type <type>, where <type> is one of "lexer", "formatter" or "filter". +The -s option processes lines one at a time until EOF, rather than +waiting to process the entire file. This only works for stdin, and +is intended for streaming input such as you get from 'tail -f'. +Example usage: "tail -f sql.log | pygmentize -s -l sql" + The -h option prints this help. The -V option prints the package version. """ @@ -205,7 +215,7 @@ def main(args=sys.argv): pass try: - popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:hVHg") + popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:hVHgs") except getopt.GetoptError: print(usage, file=sys.stderr) return 2 @@ -222,10 +232,6 @@ def main(args=sys.argv): F_opts.append(arg) opts[opt] = arg - if not opts and not args: - print(usage) - return 0 - if opts.pop('-h', None) is not None: print(usage) return 0 @@ -278,6 +284,10 @@ def main(args=sys.argv): parsed_opts[name] = value opts.pop('-P', None) + # encodings + inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding')) + outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding')) + # handle ``pygmentize -N`` infn = opts.pop('-N', None) if infn is not None: @@ -353,7 +363,11 @@ def main(args=sys.argv): else: if not fmter: fmter = TerminalFormatter(**parsed_opts) - outfile = sys.stdout + if sys.version_info > (3,): + # Python 3: we have to use .buffer to get a binary stream + outfile = sys.stdout.buffer + else: + outfile = sys.stdout # select lexer lexer = opts.pop('-l', None) @@ -364,18 +378,28 @@ def main(args=sys.argv): print('Error:', err, file=sys.stderr) return 1 + # read input code if args: if len(args) > 1: print(usage, file=sys.stderr) return 2 + if '-s' in opts: + print('Error: -s option not usable when input file specified', + file=sys.stderr) + return 1 + infn = args[0] try: - code = open(infn, 'rb').read() + with open(infn, 'rb') as infp: + code = infp.read() except Exception as err: print('Error: cannot read infile:', err, file=sys.stderr) return 1 + if not inencoding: + code, inencoding = guess_decode(code) + # do we have to guess the lexer? if not lexer: try: lexer = get_lexer_for_filename(infn, code, **parsed_opts) @@ -392,19 +416,22 @@ def main(args=sys.argv): print('Error:', err, file=sys.stderr) return 1 - else: - if '-g' in opts: + elif '-s' not in opts: # treat stdin as full file (-s support is later) + # read code from terminal, always in binary mode since we want to + # decode ourselves and be tolerant with it + if sys.version_info > (3,): + # Python 3: we have to use .buffer to get a binary stream + code = sys.stdin.buffer.read() + else: code = sys.stdin.read() + if not inencoding: + code, inencoding = guess_decode_from_terminal(code, sys.stdin) + # else the lexer will do the decoding + if not lexer: try: lexer = guess_lexer(code, **parsed_opts) except ClassNotFound: lexer = TextLexer(**parsed_opts) - elif not lexer: - print('Error: no lexer name given and reading ' + \ - 'from stdin (try using -g or -l <lexer>)', file=sys.stderr) - return 2 - else: - code = sys.stdin.read() # When using the LaTeX formatter and the option `escapeinside` is # specified, we need a special lexer which collects escaped text @@ -415,30 +442,47 @@ def main(args=sys.argv): right = escapeinside[1] lexer = LatexEmbeddedLexer(left, right, lexer) - # No encoding given? Use latin1 if output file given, - # stdin/stdout encoding otherwise. - # (This is a compromise, I'm not too happy with it...) - if 'encoding' not in parsed_opts and 'outencoding' not in parsed_opts: + # determine output encoding if not explicitly selected + if not outencoding: if outfn: - # encoding pass-through - fmter.encoding = 'latin1' + # output file? -> encoding pass-through + fmter.encoding = inencoding else: - if sys.version_info < (3,): - # use terminal encoding; Python 3's terminals already do that - lexer.encoding = getattr(sys.stdin, 'encoding', - None) or 'ascii' - fmter.encoding = getattr(sys.stdout, 'encoding', - None) or 'ascii' - elif not outfn and sys.version_info > (3,): - # output to terminal with encoding -> use .buffer - outfile = sys.stdout.buffer + # else use terminal encoding + fmter.encoding = terminal_encoding(sys.stdout) # ... and do it! try: # process filters for fname, fopts in F_opts: lexer.add_filter(fname, **fopts) - highlight(code, lexer, fmter, outfile) + + if '-s' not in opts: + # process whole input as per normal... + highlight(code, lexer, fmter, outfile) + else: + if not lexer: + print('Error: when using -s a lexer has to be selected with -l', + file=sys.stderr) + return 1 + # line by line processing of stdin (eg: for 'tail -f')... + try: + while 1: + if sys.version_info > (3,): + # Python 3: we have to use .buffer to get a binary stream + line = sys.stdin.buffer.readline() + else: + line = sys.stdin.readline() + if not line: + break + if not inencoding: + line = guess_decode_from_terminal(line, sys.stdin)[0] + highlight(line, lexer, fmter, outfile) + if hasattr(outfile, 'flush'): + outfile.flush() + except KeyboardInterrupt: + return 0 + except Exception: import traceback info = traceback.format_exception(*sys.exc_info()) |