summaryrefslogtreecommitdiff
path: root/pygments/cmdline.py
diff options
context:
space:
mode:
Diffstat (limited to 'pygments/cmdline.py')
-rw-r--r--pygments/cmdline.py118
1 files changed, 81 insertions, 37 deletions
diff --git a/pygments/cmdline.py b/pygments/cmdline.py
index 7c23ebee..907c51f0 100644
--- a/pygments/cmdline.py
+++ b/pygments/cmdline.py
@@ -16,20 +16,21 @@ import getopt
from textwrap import dedent
from pygments import __version__, highlight
-from pygments.util import ClassNotFound, OptionError, docstring_headline
-from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \
- find_lexer_class, guess_lexer, TextLexer
+from pygments.util import ClassNotFound, OptionError, docstring_headline, \
+ guess_decode, guess_decode_from_terminal, terminal_encoding
+from pygments.lexers import get_all_lexers, get_lexer_by_name, guess_lexer, \
+ get_lexer_for_filename, find_lexer_class, TextLexer
from pygments.formatters.latex import LatexEmbeddedLexer, LatexFormatter
from pygments.formatters import get_all_formatters, get_formatter_by_name, \
- get_formatter_for_filename, find_formatter_class, \
- TerminalFormatter # pylint:disable-msg=E0611
+ get_formatter_for_filename, find_formatter_class, \
+ TerminalFormatter # pylint:disable-msg=E0611
from pygments.filters import get_all_filters, find_filter_class
from pygments.styles import get_all_styles, get_style_by_name
USAGE = """\
Usage: %s [-l <lexer> | -g] [-F <filter>[:<options>]] [-f <formatter>]
- [-O <options>] [-P <option=value>] [-o <outfile>] [<infile>]
+ [-O <options>] [-P <option=value>] [-s] [-o <outfile>] [<infile>]
%s -S <style> -f <formatter> [-a <arg>] [-O <options>] [-P <option=value>]
%s -L [<which> ...]
@@ -41,6 +42,10 @@ Highlight the input file and write the result to <outfile>.
If no input file is given, use stdin, if -o is not given, use stdout.
+If -s is passed, lexing will be done in "streaming" mode, reading and
+highlighting one line at a time. This will only work properly with
+lexers that have no constructs spanning multiple lines!
+
<lexer> is a lexer name (query all lexer names with -L). If -l is not
given, the lexer is guessed from the extension of the input file name
(this obviously doesn't work if the input is stdin). If -g is passed,
@@ -80,6 +85,11 @@ If no specific lexer can be determined "text" is returned.
The -H option prints detailed help for the object <name> of type <type>,
where <type> is one of "lexer", "formatter" or "filter".
+The -s option processes lines one at a time until EOF, rather than
+waiting to process the entire file. This only works for stdin, and
+is intended for streaming input such as you get from 'tail -f'.
+Example usage: "tail -f sql.log | pygmentize -s -l sql"
+
The -h option prints this help.
The -V option prints the package version.
"""
@@ -205,7 +215,7 @@ def main(args=sys.argv):
pass
try:
- popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:hVHg")
+ popts, args = getopt.getopt(args[1:], "l:f:F:o:O:P:LS:a:N:hVHgs")
except getopt.GetoptError:
print(usage, file=sys.stderr)
return 2
@@ -222,10 +232,6 @@ def main(args=sys.argv):
F_opts.append(arg)
opts[opt] = arg
- if not opts and not args:
- print(usage)
- return 0
-
if opts.pop('-h', None) is not None:
print(usage)
return 0
@@ -278,6 +284,10 @@ def main(args=sys.argv):
parsed_opts[name] = value
opts.pop('-P', None)
+ # encodings
+ inencoding = parsed_opts.get('inencoding', parsed_opts.get('encoding'))
+ outencoding = parsed_opts.get('outencoding', parsed_opts.get('encoding'))
+
# handle ``pygmentize -N``
infn = opts.pop('-N', None)
if infn is not None:
@@ -353,7 +363,11 @@ def main(args=sys.argv):
else:
if not fmter:
fmter = TerminalFormatter(**parsed_opts)
- outfile = sys.stdout
+ if sys.version_info > (3,):
+ # Python 3: we have to use .buffer to get a binary stream
+ outfile = sys.stdout.buffer
+ else:
+ outfile = sys.stdout
# select lexer
lexer = opts.pop('-l', None)
@@ -364,18 +378,28 @@ def main(args=sys.argv):
print('Error:', err, file=sys.stderr)
return 1
+ # read input code
if args:
if len(args) > 1:
print(usage, file=sys.stderr)
return 2
+ if '-s' in opts:
+ print('Error: -s option not usable when input file specified',
+ file=sys.stderr)
+ return 1
+
infn = args[0]
try:
- code = open(infn, 'rb').read()
+ with open(infn, 'rb') as infp:
+ code = infp.read()
except Exception as err:
print('Error: cannot read infile:', err, file=sys.stderr)
return 1
+ if not inencoding:
+ code, inencoding = guess_decode(code)
+ # do we have to guess the lexer?
if not lexer:
try:
lexer = get_lexer_for_filename(infn, code, **parsed_opts)
@@ -392,19 +416,22 @@ def main(args=sys.argv):
print('Error:', err, file=sys.stderr)
return 1
- else:
- if '-g' in opts:
+ elif '-s' not in opts: # treat stdin as full file (-s support is later)
+ # read code from terminal, always in binary mode since we want to
+ # decode ourselves and be tolerant with it
+ if sys.version_info > (3,):
+ # Python 3: we have to use .buffer to get a binary stream
+ code = sys.stdin.buffer.read()
+ else:
code = sys.stdin.read()
+ if not inencoding:
+ code, inencoding = guess_decode_from_terminal(code, sys.stdin)
+ # else the lexer will do the decoding
+ if not lexer:
try:
lexer = guess_lexer(code, **parsed_opts)
except ClassNotFound:
lexer = TextLexer(**parsed_opts)
- elif not lexer:
- print('Error: no lexer name given and reading ' + \
- 'from stdin (try using -g or -l <lexer>)', file=sys.stderr)
- return 2
- else:
- code = sys.stdin.read()
# When using the LaTeX formatter and the option `escapeinside` is
# specified, we need a special lexer which collects escaped text
@@ -415,30 +442,47 @@ def main(args=sys.argv):
right = escapeinside[1]
lexer = LatexEmbeddedLexer(left, right, lexer)
- # No encoding given? Use latin1 if output file given,
- # stdin/stdout encoding otherwise.
- # (This is a compromise, I'm not too happy with it...)
- if 'encoding' not in parsed_opts and 'outencoding' not in parsed_opts:
+ # determine output encoding if not explicitly selected
+ if not outencoding:
if outfn:
- # encoding pass-through
- fmter.encoding = 'latin1'
+ # output file? -> encoding pass-through
+ fmter.encoding = inencoding
else:
- if sys.version_info < (3,):
- # use terminal encoding; Python 3's terminals already do that
- lexer.encoding = getattr(sys.stdin, 'encoding',
- None) or 'ascii'
- fmter.encoding = getattr(sys.stdout, 'encoding',
- None) or 'ascii'
- elif not outfn and sys.version_info > (3,):
- # output to terminal with encoding -> use .buffer
- outfile = sys.stdout.buffer
+ # else use terminal encoding
+ fmter.encoding = terminal_encoding(sys.stdout)
# ... and do it!
try:
# process filters
for fname, fopts in F_opts:
lexer.add_filter(fname, **fopts)
- highlight(code, lexer, fmter, outfile)
+
+ if '-s' not in opts:
+ # process whole input as per normal...
+ highlight(code, lexer, fmter, outfile)
+ else:
+ if not lexer:
+ print('Error: when using -s a lexer has to be selected with -l',
+ file=sys.stderr)
+ return 1
+ # line by line processing of stdin (eg: for 'tail -f')...
+ try:
+ while 1:
+ if sys.version_info > (3,):
+ # Python 3: we have to use .buffer to get a binary stream
+ line = sys.stdin.buffer.readline()
+ else:
+ line = sys.stdin.readline()
+ if not line:
+ break
+ if not inencoding:
+ line = guess_decode_from_terminal(line, sys.stdin)[0]
+ highlight(line, lexer, fmter, outfile)
+ if hasattr(outfile, 'flush'):
+ outfile.flush()
+ except KeyboardInterrupt:
+ return 0
+
except Exception:
import traceback
info = traceback.format_exception(*sys.exc_info())