Factored out LiterateLexer as a base class for both LiterateAgdaLexer and

LiterateHaskellLexer.
author: Tim Baumann <tim@timbaumann.info> 2013-05-19 22:32:57 +0200
committer: Tim Baumann <tim@timbaumann.info> 2013-05-19 22:32:57 +0200
commit: 057a8da4c453d7507b2f879413c5de64b930391f (patch)
tree: 4911b52eba579a116a0c76534a91b3eb3411e539
parent: 91aeb371752f8c10dda0bbc156452bcb6839bd21 (diff)
download: pygments-057a8da4c453d7507b2f879413c5de64b930391f.tar.gz
2 files changed, 98 insertions, 95 deletions
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index edd139c1..5a5097fa 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -22,6 +22,9 @@ __all__ = ['RacketLexer', 'SchemeLexer', 'CommonLispLexer', 'HaskellLexer',
            'ElixirConsoleLexer', 'KokaLexer']
 
 
+line_re = re.compile('.*?\n')
+
+
 class RacketLexer(RegexLexer):
     """
     Lexer for `Racket <http://racket-lang.org/>`_ source code (formerly known as
@@ -1012,90 +1015,6 @@ class HaskellLexer(RegexLexer):
     }
 
 
-line_re = re.compile('.*?\n')
-bird_re = re.compile(r'(>[ \t]*)(.*\n)')
-
-# bird-style
-def _bird_get_tokens_unprocessed(text, baselexer):
-    code = ''
-    insertions = []
-    for match in line_re.finditer(text):
-        line = match.group()
-        m = bird_re.match(line)
-        if m:
-            insertions.append((len(code),
-                               [(0, Comment.Special, m.group(1))]))
-            code += m.group(2)
-        else:
-            insertions.append((len(code), [(0, Text, line)]))
-    for item in do_insertions(insertions, baselexer.get_tokens_unprocessed(code)):
-        yield item
-
-
-# latex-style
-def _latex_get_tokens_unprocessed(text, baselexer, lxlexer):
-    code = ''
-    insertions = []
-
-    codelines = 0
-    latex = ''
-    for match in line_re.finditer(text):
-        line = match.group()
-        if codelines:
-            if line.lstrip().startswith('\\end{code}'):
-                codelines = 0
-                latex += line
-            else:
-                code += line
-        elif line.lstrip().startswith('\\begin{code}'):
-            codelines = 1
-            latex += line
-            insertions.append((len(code),
-                               list(lxlexer.get_tokens_unprocessed(latex))))
-            latex = ''
-        else:
-            latex += line
-    insertions.append((len(code),
-                       list(lxlexer.get_tokens_unprocessed(latex))))
-    for item in do_insertions(insertions, baselexer.get_tokens_unprocessed(code)):
-        yield item
-
-
-class LiterateHaskellLexer(Lexer):
-    """
-    For Literate Haskell (Bird-style or LaTeX) source.
-
-    Additional options accepted:
-
-    `litstyle`
-        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
-        is autodetected: if the first non-whitespace character in the source
-        is a backslash or percent character, LaTeX is assumed, else Bird.
-
-    *New in Pygments 0.9.*
-    """
-    name = 'Literate Haskell'
-    aliases = ['lhs', 'literate-haskell']
-    filenames = ['*.lhs']
-    mimetypes = ['text/x-literate-haskell']
-
-    def get_tokens_unprocessed(self, text):
-        hslexer = HaskellLexer(**self.options)
-
-        style = self.options.get('litstyle')
-        if style is None:
-            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
-
-        if style == 'bird':
-            for item in _bird_get_tokens_unprocessed(text, hslexer):
-                yield item
-        else:
-            from pygments.lexers.text import TexLexer
-            lxlexer = TexLexer(**self.options)
-            for item in _latex_get_tokens_unprocessed(text, hslexer, lxlexer):
-                yield item
-
-
 class AgdaLexer(RegexLexer):
     """
     For the `Agda <http://wiki.portal.chalmers.se/agda/pmwiki.php>`_
@@ -1167,7 +1086,95 @@ class AgdaLexer(RegexLexer):
     }
 
 
-class LiterateAgdaLexer(Lexer):
+class LiterateLexer(Lexer):
+    """
+    Base class for lexers of literate file formats based on LaTeX or Bird-style
+    (prefixing each code line with ">").
+
+    Additional options accepted:
+
+    `litstyle`
+        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
+        is autodetected: if the first non-whitespace character in the source
+        is a backslash or percent character, LaTeX is assumed, else Bird.
+    """
+
+    bird_re = re.compile(r'(>[ \t]*)(.*\n)')
+
+    def __init__(self, baselexer, **options):
+        self.baselexer = baselexer
+        Lexer.__init__(self, **options)
+
+    def get_tokens_unprocessed(self, text):
+        style = self.options.get('litstyle')
+        if style is None:
+            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
+
+        code = ''
+        insertions = []
+        if style == 'bird':
+            # bird-style
+            for match in line_re.finditer(text):
+                line = match.group()
+                m = self.bird_re.match(line)
+                if m:
+                    insertions.append((len(code),
+                                       [(0, Comment.Special, m.group(1))]))
+                    code += m.group(2)
+                else:
+                    insertions.append((len(code), [(0, Text, line)]))
+        else:
+            # latex-style
+            from pygments.lexers.text import TexLexer
+            lxlexer = TexLexer(**self.options)
+            codelines = 0
+            latex = ''
+            for match in line_re.finditer(text):
+                line = match.group()
+                if codelines:
+                    if line.lstrip().startswith('\\end{code}'):
+                        codelines = 0
+                        latex += line
+                    else:
+                        code += line
+                elif line.lstrip().startswith('\\begin{code}'):
+                    codelines = 1
+                    latex += line
+                    insertions.append((len(code),
+                                       list(lxlexer.get_tokens_unprocessed(latex))))
+                    latex = ''
+                else:
+                    latex += line
+            insertions.append((len(code),
+                               list(lxlexer.get_tokens_unprocessed(latex))))
+        for item in do_insertions(insertions, self.baselexer.get_tokens_unprocessed(code)):
+            yield item
+
+
+class LiterateHaskellLexer(LiterateLexer):
+    """
+    For Literate Haskell (Bird-style or LaTeX) source.
+
+    Additional options accepted:
+
+    `litstyle`
+        If given, must be ``"bird"`` or ``"latex"``.  If not given, the style
+        is autodetected: if the first non-whitespace character in the source
+        is a backslash or percent character, LaTeX is assumed, else Bird.
+
+    *New in Pygments 0.9.*
+    """
+    name = 'Literate Haskell'
+    aliases = ['lhs', 'literate-haskell']
+    filenames = ['*.lhs']
+    mimetypes = ['text/x-literate-haskell']
+
+    def __init__(self, **options):
+        hslexer = HaskellLexer(**options)
+        LiterateLexer.__init__(self, hslexer, **options)
+
+
+class LiterateAgdaLexer(LiterateLexer):
     """
     For Literate Agda source.
     """
@@ -1176,13 +1183,9 @@ class LiterateAgdaLexer(Lexer):
     filenames = ['*.lagda']
     mimetypes = ['text/x-literate-agda']
 
-    def get_tokens_unprocessed(self, text):
-        agdalexer = AgdaLexer(**self.options)
-
-        from pygments.lexers.text import TexLexer
-        lxlexer = TexLexer(**self.options)
-        for item in _latex_get_tokens_unprocessed(text, agdalexer, lxlexer):
-            yield item
+    def __init__(self, **options):
+        agdalexer = AgdaLexer(**options)
+        LiterateLexer.__init__(self, agdalexer, litstyle='latex', **options)
 
 
 class SMLLexer(RegexLexer):
diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py
index 00dc26f0..18ed8d64 100644
--- a/tests/test_basic_api.py
+++ b/tests/test_basic_api.py
@@ -92,9 +92,9 @@ def test_lexer_options():
         if cls.__name__ not in (
             'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer',
             'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer',
-            'BashSessionLexer', 'LiterateHaskellLexer', 'PostgresConsoleLexer',
-            'ElixirConsoleLexer', 'JuliaConsoleLexer', 'RobotFrameworkLexer',
-            'DylanConsoleLexer', 'ShellSessionLexer'):
+            'BashSessionLexer', 'LiterateHaskellLexer', 'LiterateAgdaLexer',
+            'PostgresConsoleLexer', 'ElixirConsoleLexer', 'JuliaConsoleLexer',
+            'RobotFrameworkLexer', 'DylanConsoleLexer', 'ShellSessionLexer'):
             inst = cls(ensurenl=False)
             ensure(inst.get_tokens('a\nb'), 'a\nb')
             inst = cls(ensurenl=False, stripall=True)
author	Tim Baumann <tim@timbaumann.info>	2013-05-19 22:32:57 +0200
committer	Tim Baumann <tim@timbaumann.info>	2013-05-19 22:32:57 +0200
commit	057a8da4c453d7507b2f879413c5de64b930391f (patch)
tree	4911b52eba579a116a0c76534a91b3eb3411e539
parent	91aeb371752f8c10dda0bbc156452bcb6839bd21 (diff)
download	pygments-057a8da4c453d7507b2f879413c5de64b930391f.tar.gz