summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2010-02-18 16:39:01 +0100
committerGeorg Brandl <georg@python.org>2010-02-18 16:39:01 +0100
commit1fd98861b9d75594516ac0dbbfed6b28b5271c22 (patch)
tree78f148a89ee02cead2ccba52e23b0348819208cb
parente679aa7202885b20703921c0565db8b660e38986 (diff)
downloadpygments-1fd98861b9d75594516ac0dbbfed6b28b5271c22.tar.gz
- Added the ``ensurenl`` lexer option, which can be used to suppress the
automatic addition of a newline to the lexer input. - Fixed a bug in `do_insertions()` used for multi-lexer languages. - Added tests for standard lexer whitespace options.
-rw-r--r--CHANGES5
-rw-r--r--pygments/lexer.py15
-rw-r--r--pygments/lexers/functional.py2
-rw-r--r--tests/test_basic_api.py25
4 files changed, 44 insertions, 3 deletions
diff --git a/CHANGES b/CHANGES
index 51e1d890..6587f883 100644
--- a/CHANGES
+++ b/CHANGES
@@ -7,6 +7,9 @@ Version 1.3
-----------
(in development)
+- Added the ``ensurenl`` lexer option, which can be used to suppress the
+ automatic addition of a newline to the lexer input.
+
- Lexers added:
* Ada
@@ -17,6 +20,8 @@ Version 1.3
* Haml and Sass
* CoffeeScript
+- Fixed a bug in `do_insertions()` used for multi-lexer languages.
+
- Gherkin lexer: Fixed single apostrophe bug and added new i18n keywords.
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 7e86841a..fbcc39a6 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -46,6 +46,10 @@ class Lexer(object):
``stripall``
Strip all leading and trailing whitespace from the input
(default: False).
+ ``ensurenl``
+ Make sure that the input ends with a newline (default: True). This
+ is required for some lexers that consume input linewise.
+ *New in Pygments 1.3.*
``tabsize``
If given and greater than 0, expand tabs in the input (default: 0).
``encoding``
@@ -77,6 +81,7 @@ class Lexer(object):
self.options = options
self.stripnl = get_bool_opt(options, 'stripnl', True)
self.stripall = get_bool_opt(options, 'stripall', False)
+ self.ensurenl = get_bool_opt(options, 'ensurenl', True)
self.tabsize = get_int_opt(options, 'tabsize', 0)
self.encoding = options.get('encoding', 'latin1')
# self.encoding = options.get('inencoding', None) or self.encoding
@@ -150,7 +155,7 @@ class Lexer(object):
text = text.strip('\n')
if self.tabsize > 0:
text = text.expandtabs(self.tabsize)
- if not text.endswith('\n'):
+ if self.ensurenl and not text.endswith('\n'):
text += '\n'
def streamer():
@@ -641,9 +646,15 @@ def do_insertions(insertions, tokens):
realpos += len(v) - oldi
# leftover tokens
- if insleft:
+ while insleft:
# no normal tokens, set realpos to zero
realpos = realpos or 0
for p, t, v in itokens:
yield realpos, t, v
realpos += len(v)
+ try:
+ index, itokens = insertions.next()
+ except StopIteration:
+ insleft = False
+ break # not strictly necessary
+
diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py
index ea449f90..ffbd7538 100644
--- a/pygments/lexers/functional.py
+++ b/pygments/lexers/functional.py
@@ -470,7 +470,7 @@ class LiterateHaskellLexer(Lexer):
style = self.options.get('litstyle')
if style is None:
- style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird'
+ style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird'
code = ''
insertions = []
diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py
index 44a656b4..c0de925f 100644
--- a/tests/test_basic_api.py
+++ b/tests/test_basic_api.py
@@ -68,6 +68,31 @@ class LexersTest(unittest.TestCase):
ae(txt, test_content, "%s lexer roundtrip failed: %r != %r" %
(lexer.name, test_content, txt))
+ def test_lexer_options(self):
+ # test that the basic options work
+ def ensure(tokens, output):
+ concatenated = ''.join(token[1] for token in tokens)
+ self.assertEquals(concatenated, output,
+ '%s: %r != %r' % (lexer, concatenated, output))
+ for lexer in lexers._iter_lexerclasses():
+ if lexer.__name__ == 'RawTokenLexer':
+ # this one is special
+ continue
+ inst = lexer(stripnl=False)
+ ensure(inst.get_tokens('a\nb'), 'a\nb\n')
+ ensure(inst.get_tokens('\n\n\n'), '\n\n\n')
+ inst = lexer(stripall=True)
+ ensure(inst.get_tokens(' \n b\n\n\n'), 'b\n')
+ # some lexers require full lines in input
+ if lexer.__name__ not in (
+ 'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer',
+ 'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer',
+ 'BashSessionLexer', 'LiterateHaskellLexer'):
+ inst = lexer(ensurenl=False)
+ ensure(inst.get_tokens('a\nb'), 'a\nb')
+ inst = lexer(ensurenl=False, stripall=True)
+ ensure(inst.get_tokens('a\nb\n\n'), 'a\nb')
+
def test_get_lexers(self):
a = self.assert_
ae = self.assertEquals