diff options
author | Tim Baumann <tim@timbaumann.info> | 2013-05-19 22:32:57 +0200 |
---|---|---|
committer | Tim Baumann <tim@timbaumann.info> | 2013-05-19 22:32:57 +0200 |
commit | 057a8da4c453d7507b2f879413c5de64b930391f (patch) | |
tree | 4911b52eba579a116a0c76534a91b3eb3411e539 | |
parent | 91aeb371752f8c10dda0bbc156452bcb6839bd21 (diff) | |
download | pygments-057a8da4c453d7507b2f879413c5de64b930391f.tar.gz |
Factored out LiterateLexer as a base class for both LiterateAgdaLexer and
LiterateHaskellLexer.
-rw-r--r-- | pygments/lexers/functional.py | 187 | ||||
-rw-r--r-- | tests/test_basic_api.py | 6 |
2 files changed, 98 insertions, 95 deletions
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index edd139c1..5a5097fa 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -22,6 +22,9 @@ __all__ = ['RacketLexer', 'SchemeLexer', 'CommonLispLexer', 'HaskellLexer', 'ElixirConsoleLexer', 'KokaLexer'] +line_re = re.compile('.*?\n') + + class RacketLexer(RegexLexer): """ Lexer for `Racket <http://racket-lang.org/>`_ source code (formerly known as @@ -1012,90 +1015,6 @@ class HaskellLexer(RegexLexer): } -line_re = re.compile('.*?\n') -bird_re = re.compile(r'(>[ \t]*)(.*\n)') - -# bird-style -def _bird_get_tokens_unprocessed(text, baselexer): - code = '' - insertions = [] - for match in line_re.finditer(text): - line = match.group() - m = bird_re.match(line) - if m: - insertions.append((len(code), - [(0, Comment.Special, m.group(1))])) - code += m.group(2) - else: - insertions.append((len(code), [(0, Text, line)])) - for item in do_insertions(insertions, baselexer.get_tokens_unprocessed(code)): - yield item - - -# latex-style -def _latex_get_tokens_unprocessed(text, baselexer, lxlexer): - code = '' - insertions = [] - - codelines = 0 - latex = '' - for match in line_re.finditer(text): - line = match.group() - if codelines: - if line.lstrip().startswith('\\end{code}'): - codelines = 0 - latex += line - else: - code += line - elif line.lstrip().startswith('\\begin{code}'): - codelines = 1 - latex += line - insertions.append((len(code), - list(lxlexer.get_tokens_unprocessed(latex)))) - latex = '' - else: - latex += line - insertions.append((len(code), - list(lxlexer.get_tokens_unprocessed(latex)))) - for item in do_insertions(insertions, baselexer.get_tokens_unprocessed(code)): - yield item - - -class LiterateHaskellLexer(Lexer): - """ - For Literate Haskell (Bird-style or LaTeX) source. - - Additional options accepted: - - `litstyle` - If given, must be ``"bird"`` or ``"latex"``. If not given, the style - is autodetected: if the first non-whitespace character in the source - is a backslash or percent character, LaTeX is assumed, else Bird. - - *New in Pygments 0.9.* - """ - name = 'Literate Haskell' - aliases = ['lhs', 'literate-haskell'] - filenames = ['*.lhs'] - mimetypes = ['text/x-literate-haskell'] - - def get_tokens_unprocessed(self, text): - hslexer = HaskellLexer(**self.options) - - style = self.options.get('litstyle') - if style is None: - style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' - - if style == 'bird': - for item in _bird_get_tokens_unprocessed(text, hslexer): - yield item - else: - from pygments.lexers.text import TexLexer - lxlexer = TexLexer(**self.options) - for item in _latex_get_tokens_unprocessed(text, hslexer, lxlexer): - yield item - - class AgdaLexer(RegexLexer): """ For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_ @@ -1167,7 +1086,95 @@ class AgdaLexer(RegexLexer): } -class LiterateAgdaLexer(Lexer): +class LiterateLexer(Lexer): + """ + Base class for lexers of literate file formats based on LaTeX or Bird-style + (prefixing each code line with ">"). + + Additional options accepted: + + `litstyle` + If given, must be ``"bird"`` or ``"latex"``. If not given, the style + is autodetected: if the first non-whitespace character in the source + is a backslash or percent character, LaTeX is assumed, else Bird. + """ + + bird_re = re.compile(r'(>[ \t]*)(.*\n)') + + def __init__(self, baselexer, **options): + self.baselexer = baselexer + Lexer.__init__(self, **options) + + def get_tokens_unprocessed(self, text): + style = self.options.get('litstyle') + if style is None: + style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' + + code = '' + insertions = [] + if style == 'bird': + # bird-style + for match in line_re.finditer(text): + line = match.group() + m = self.bird_re.match(line) + if m: + insertions.append((len(code), + [(0, Comment.Special, m.group(1))])) + code += m.group(2) + else: + insertions.append((len(code), [(0, Text, line)])) + else: + # latex-style + from pygments.lexers.text import TexLexer + lxlexer = TexLexer(**self.options) + codelines = 0 + latex = '' + for match in line_re.finditer(text): + line = match.group() + if codelines: + if line.lstrip().startswith('\\end{code}'): + codelines = 0 + latex += line + else: + code += line + elif line.lstrip().startswith('\\begin{code}'): + codelines = 1 + latex += line + insertions.append((len(code), + list(lxlexer.get_tokens_unprocessed(latex)))) + latex = '' + else: + latex += line + insertions.append((len(code), + list(lxlexer.get_tokens_unprocessed(latex)))) + for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)): + yield item + + +class LiterateHaskellLexer(LiterateLexer): + """ + For Literate Haskell (Bird-style or LaTeX) source. + + Additional options accepted: + + `litstyle` + If given, must be ``"bird"`` or ``"latex"``. If not given, the style + is autodetected: if the first non-whitespace character in the source + is a backslash or percent character, LaTeX is assumed, else Bird. + + *New in Pygments 0.9.* + """ + name = 'Literate Haskell' + aliases = ['lhs', 'literate-haskell'] + filenames = ['*.lhs'] + mimetypes = ['text/x-literate-haskell'] + + def __init__(self, **options): + hslexer = HaskellLexer(**options) + LiterateLexer.__init__(self, hslexer, **options) + + +class LiterateAgdaLexer(LiterateLexer): """ For Literate Agda source. """ @@ -1176,13 +1183,9 @@ class LiterateAgdaLexer(Lexer): filenames = ['*.lagda'] mimetypes = ['text/x-literate-agda'] - def get_tokens_unprocessed(self, text): - agdalexer = AgdaLexer(**self.options) - - from pygments.lexers.text import TexLexer - lxlexer = TexLexer(**self.options) - for item in _latex_get_tokens_unprocessed(text, agdalexer, lxlexer): - yield item + def __init__(self, **options): + agdalexer = AgdaLexer(**options) + LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options) class SMLLexer(RegexLexer): diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py index 00dc26f0..18ed8d64 100644 --- a/tests/test_basic_api.py +++ b/tests/test_basic_api.py @@ -92,9 +92,9 @@ def test_lexer_options(): if cls.__name__ not in ( 'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer', 'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer', - 'BashSessionLexer', 'LiterateHaskellLexer', 'PostgresConsoleLexer', - 'ElixirConsoleLexer', 'JuliaConsoleLexer', 'RobotFrameworkLexer', - 'DylanConsoleLexer', 'ShellSessionLexer'): + 'BashSessionLexer', 'LiterateHaskellLexer', 'LiterateAgdaLexer', + 'PostgresConsoleLexer', 'ElixirConsoleLexer', 'JuliaConsoleLexer', + 'RobotFrameworkLexer', 'DylanConsoleLexer', 'ShellSessionLexer'): inst = cls(ensurenl=False) ensure(inst.get_tokens('a\nb'), 'a\nb') inst = cls(ensurenl=False, stripall=True) |