diff options
author | Jonathan Protzenko <jonathan.protzenko@gmail.com> | 2013-08-21 18:37:00 +0200 |
---|---|---|
committer | Jonathan Protzenko <jonathan.protzenko@gmail.com> | 2013-08-21 18:37:00 +0200 |
commit | b570f7c18d9dea76f6122bf8491aa2370e9264e6 (patch) | |
tree | 73b12b043bc80cab01815be1ae9128134056e176 | |
parent | 1ea0fa53d253eae501f0a48611dd01493240b34d (diff) | |
download | pygments-b570f7c18d9dea76f6122bf8491aa2370e9264e6.tar.gz |
Update the patch from #493 to work with tip.
-rw-r--r-- | pygments/cmdline.py | 10 | ||||
-rw-r--r-- | pygments/formatters/latex.py | 88 | ||||
-rw-r--r-- | pygments/token.py | 2 |
3 files changed, 97 insertions, 3 deletions
diff --git a/pygments/cmdline.py b/pygments/cmdline.py index c25204bf..0957b9c1 100644 --- a/pygments/cmdline.py +++ b/pygments/cmdline.py @@ -16,6 +16,7 @@ from pygments import __version__, highlight from pygments.util import ClassNotFound, OptionError, docstring_headline from pygments.lexers import get_all_lexers, get_lexer_by_name, get_lexer_for_filename, \ find_lexer_class, guess_lexer, TextLexer +from pygments.formatters.latex import LatexEmbededLexer, LatexFormatter from pygments.formatters import get_all_formatters, get_formatter_by_name, \ get_formatter_for_filename, find_formatter_class, \ TerminalFormatter # pylint:disable-msg=E0611 @@ -402,6 +403,15 @@ def main(args=sys.argv): else: code = sys.stdin.read() + # When using the LaTeX formatter and the option `escapeinside` is + # specified, we need a special lexer which collects escaped text + # before running the chosen language lexer. + escapeinside = parsed_opts.get('escapeinside', '') + if len(escapeinside) == 2 and isinstance(fmter, LatexFormatter): + left = escapeinside[0] + right = escapeinside[1] + lexer = LatexEmbededLexer(left, right, lexer) + # No encoding given? Use latin1 if output file given, # stdin/stdout encoding otherwise. # (This is a compromise, I'm not too happy with it...) diff --git a/pygments/formatters/latex.py b/pygments/formatters/latex.py index 47fd1239..d4e4dd36 100644 --- a/pygments/formatters/latex.py +++ b/pygments/formatters/latex.py @@ -12,9 +12,10 @@ from pygments.formatter import Formatter from pygments.token import Token, STANDARD_TYPES from pygments.util import get_bool_opt, get_int_opt, StringIO +from pygments.lexer import Lexer -__all__ = ['LatexFormatter'] +__all__ = ['LatexEmbededLexer', 'LatexFormatter'] def escape_tex(text, commandprefix): @@ -218,6 +219,13 @@ class LatexFormatter(Formatter): If set to ``True``, enables LaTeX math mode escape in comments. That is, ``'$...$'`` inside a comment will trigger math mode (default: ``False``). *New in Pygments 1.2.* + + `escapeinside` + If set to a string of length 2, enables escaping to LaTeX. Text + delimited by these 2 characters is read as LaTeX code and + typeset accordingly. It has no effect in string literals. It has + no effect in comments if `texcomments` or `mathescape` is + set. (default: ``''``). *New in Pygments 1.3.2* """ name = 'LaTeX' aliases = ['latex', 'tex'] @@ -235,6 +243,13 @@ class LatexFormatter(Formatter): self.commandprefix = options.get('commandprefix', 'PY') self.texcomments = get_bool_opt(options, 'texcomments', False) self.mathescape = get_bool_opt(options, 'mathescape', False) + self.escapeinside = options.get('escapeinside', '') + + if len(self.escapeinside) == 2: + self.left = self.escapeinside[0] + self.right = self.escapeinside[1] + else: + self.escapeinside = '' self._create_stylesheet() @@ -312,7 +327,7 @@ class LatexFormatter(Formatter): outfile.write(u',numbers=left' + (start and u',firstnumber=%d' % start or u'') + (step and u',stepnumber=%d' % step or u'')) - if self.mathescape or self.texcomments: + if self.mathescape or self.texcomments or self.escapeinside: outfile.write(ur',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') if self.verboptions: outfile.write(u',' + self.verboptions) @@ -342,9 +357,22 @@ class LatexFormatter(Formatter): parts[i] = escape_tex(part, self.commandprefix) in_math = not in_math value = '$'.join(parts) + elif self.escapeinside: + text = value + value = '' + while len(text) > 0: + a,sep1,text = text.partition(self.left) + if len(sep1) > 0: + b,sep2,text = text.partition(self.right) + if len(sep2) > 0: + value = value + escape_tex(a, self.commandprefix) + b + else: + value = value + escape_tex(a + sep1 + b, self.commandprefix) + else: + value = value + escape_tex(a, self.commandprefix) else: value = escape_tex(value, self.commandprefix) - else: + elif not (ttype in Token.Escape): value = escape_tex(value, self.commandprefix) styles = [] while ttype is not Token: @@ -376,3 +404,57 @@ class LatexFormatter(Formatter): encoding = self.encoding or 'latin1', styledefs = self.get_style_defs(), code = outfile.getvalue())) + + +class LatexEmbededLexer(Lexer): + r""" + + This lexer takes one lexer as argument, the lexer for the language + being formatted, and the left and right delimiters for escaped text. + + First everything is scanned using the language lexer to obtain + strings and comments. All other consecutive tokens are merged and + the resulting text is scanned for escaped segments, which are given + the Token.Escape type. Finally text that is not escaped is scanned + again with the language lexer. + """ + def __init__(self, left, right, lang, **options): + self.left = left + self.right = right + self.lang = lang + Lexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + buf = '' + for i, t, v in self.lang.get_tokens_unprocessed(text): + if t in Token.Comment or t in Token.String: + if len(buf) > 0: + for x in self.get_tokens_aux(idx, buf): + yield x + buf = '' + yield i, t, v + else: + if len(buf) == 0: + idx = i; + buf = buf + v + if len(buf) > 0: + for x in self.get_tokens_aux(idx, buf): + yield x + + def get_tokens_aux(self, index, text): + while len(text) > 0: + a, sep1, text = text.partition(self.left) + if len(a) > 0: + for i, t, v in self.lang.get_tokens_unprocessed(a): + yield index + i, t, v + index = index + len(a) + if len(sep1) > 0: + b, sep2, text = text.partition(self.right) + if len(sep2) > 0: + yield index + len(sep1), Token.Escape, b + index = index + len(sep1) + len(b) + len(sep2) + else: + yield index, Token.Error, sep1 + index = index + len(sep1) + text = b + diff --git a/pygments/token.py b/pygments/token.py index 19a83f2e..c31b7949 100644 --- a/pygments/token.py +++ b/pygments/token.py @@ -49,6 +49,7 @@ Token = _TokenType() # Special token types Text = Token.Text Whitespace = Text.Whitespace +Escape = Token.Escape Error = Token.Error # Text that doesn't belong to this lexer (e.g. HTML in PHP) Other = Token.Other @@ -116,6 +117,7 @@ STANDARD_TYPES = { Text: '', Whitespace: 'w', + Escape: 'esc', Error: 'err', Other: 'x', |