- Added the ``ensurenl`` lexer option, which can be used to suppress the

automatic addition of a newline to the lexer input. - Fixed a bug in `do_insertions()` used for multi-lexer languages. - Added tests for standard lexer whitespace options.
author: Georg Brandl <georg@python.org> 2010-02-18 16:39:01 +0100
committer: Georg Brandl <georg@python.org> 2010-02-18 16:39:01 +0100
commit: 1fd98861b9d75594516ac0dbbfed6b28b5271c22 (patch)
tree: 78f148a89ee02cead2ccba52e23b0348819208cb
parent: e679aa7202885b20703921c0565db8b660e38986 (diff)
download: pygments-1fd98861b9d75594516ac0dbbfed6b28b5271c22.tar.gz
4 files changed, 44 insertions, 3 deletions
diff --git a/CHANGES b/CHANGES
index 51e1d890..6587f883 100644
--- a/CHANGES
+++ b/CHANGES
@@ -7,6 +7,9 @@ Version 1.3
 -----------
 (in development)
 
+- Added the ``ensurenl`` lexer option, which can be used to suppress the
+  automatic addition of a newline to the lexer input.
+
 - Lexers added:
 
   * Ada
@@ -17,6 +20,8 @@ Version 1.3
   * Haml and Sass
   * CoffeeScript
 
+- Fixed a bug in `do_insertions()` used for multi-lexer languages.
+
 - Gherkin lexer: Fixed single apostrophe bug and added new i18n keywords.
 
 
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 7e86841a..fbcc39a6 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -46,6 +46,10 @@ class Lexer(object):
     ``stripall``
         Strip all leading and trailing whitespace from the input
         (default: False).
+    ``ensurenl``
+        Make sure that the input ends with a newline (default: True).  This
+        is required for some lexers that consume input linewise.
+        *New in Pygments 1.3.*
     ``tabsize``
         If given and greater than 0, expand tabs in the input (default: 0).
     ``encoding``
@@ -77,6 +81,7 @@ class Lexer(object):
         self.options = options
         self.stripnl = get_bool_opt(options, 'stripnl', True)
         self.stripall = get_bool_opt(options, 'stripall', False)
+        self.ensurenl = get_bool_opt(options, 'ensurenl', True)
         self.tabsize = get_int_opt(options, 'tabsize', 0)
         self.encoding = options.get('encoding', 'latin1')
         # self.encoding = options.get('inencoding', None) or self.encoding
@@ -150,7 +155,7 @@ class Lexer(object):
             text = text.strip('\n')
         if self.tabsize > 0:
             text = text.expandtabs(self.tabsize)
-        if not text.endswith('\n'):
+        if self.ensurenl and not text.endswith('\n'):
             text += '\n'
 
         def streamer():
@@ -641,9 +646,15 @@ def do_insertions(insertions, tokens):
         realpos += len(v) - oldi
 
     # leftover tokens
-    if insleft:
+    while insleft:
         # no normal tokens, set realpos to zero
         realpos = realpos or 0
         for p, t, v in itokens:
             yield realpos, t, v
             realpos += len(v)
+        try:
+            index, itokens = insertions.next()
+        except StopIteration:
+            insleft = False
+            break  # not strictly necessary
+
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index ea449f90..ffbd7538 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -470,7 +470,7 @@ class LiterateHaskellLexer(Lexer):
 
         style = self.options.get('litstyle')
         if style is None:
-            style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird'
+            style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
 
         code = ''
         insertions = []
diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py
index 44a656b4..c0de925f 100644
--- a/tests/test_basic_api.py
+++ b/tests/test_basic_api.py
@@ -68,6 +68,31 @@ class LexersTest(unittest.TestCase):
             ae(txt, test_content, "%s lexer roundtrip failed: %r != %r" %
                     (lexer.name, test_content, txt))
 
+    def test_lexer_options(self):
+        # test that the basic options work
+        def ensure(tokens, output):
+            concatenated = ''.join(token[1] for token in tokens)
+            self.assertEquals(concatenated, output,
+                              '%s: %r != %r' % (lexer, concatenated, output))
+        for lexer in lexers._iter_lexerclasses():
+            if lexer.__name__ == 'RawTokenLexer':
+                # this one is special
+                continue
+            inst = lexer(stripnl=False)
+            ensure(inst.get_tokens('a\nb'), 'a\nb\n')
+            ensure(inst.get_tokens('\n\n\n'), '\n\n\n')
+            inst = lexer(stripall=True)
+            ensure(inst.get_tokens('   \n  b\n\n\n'), 'b\n')
+            # some lexers require full lines in input
+            if lexer.__name__ not in (
+                'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer',
+                'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer',
+                'BashSessionLexer', 'LiterateHaskellLexer'):
+                inst = lexer(ensurenl=False)
+                ensure(inst.get_tokens('a\nb'), 'a\nb')
+                inst = lexer(ensurenl=False, stripall=True)
+                ensure(inst.get_tokens('a\nb\n\n'), 'a\nb')
+
     def test_get_lexers(self):
         a = self.assert_
         ae = self.assertEquals
author	Georg Brandl <georg@python.org>	2010-02-18 16:39:01 +0100
committer	Georg Brandl <georg@python.org>	2010-02-18 16:39:01 +0100
commit	1fd98861b9d75594516ac0dbbfed6b28b5271c22 (patch)
tree	78f148a89ee02cead2ccba52e23b0348819208cb
parent	e679aa7202885b20703921c0565db8b660e38986 (diff)
download	pygments-1fd98861b9d75594516ac0dbbfed6b28b5271c22.tar.gz