diff options
author | Georg Brandl <georg@python.org> | 2010-02-18 16:39:01 +0100 |
---|---|---|
committer | Georg Brandl <georg@python.org> | 2010-02-18 16:39:01 +0100 |
commit | 1fd98861b9d75594516ac0dbbfed6b28b5271c22 (patch) | |
tree | 78f148a89ee02cead2ccba52e23b0348819208cb | |
parent | e679aa7202885b20703921c0565db8b660e38986 (diff) | |
download | pygments-1fd98861b9d75594516ac0dbbfed6b28b5271c22.tar.gz |
- Added the ``ensurenl`` lexer option, which can be used to suppress the
automatic addition of a newline to the lexer input.
- Fixed a bug in `do_insertions()` used for multi-lexer languages.
- Added tests for standard lexer whitespace options.
-rw-r--r-- | CHANGES | 5 | ||||
-rw-r--r-- | pygments/lexer.py | 15 | ||||
-rw-r--r-- | pygments/lexers/functional.py | 2 | ||||
-rw-r--r-- | tests/test_basic_api.py | 25 |
4 files changed, 44 insertions, 3 deletions
@@ -7,6 +7,9 @@ Version 1.3 ----------- (in development) +- Added the ``ensurenl`` lexer option, which can be used to suppress the + automatic addition of a newline to the lexer input. + - Lexers added: * Ada @@ -17,6 +20,8 @@ Version 1.3 * Haml and Sass * CoffeeScript +- Fixed a bug in `do_insertions()` used for multi-lexer languages. + - Gherkin lexer: Fixed single apostrophe bug and added new i18n keywords. diff --git a/pygments/lexer.py b/pygments/lexer.py index 7e86841a..fbcc39a6 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -46,6 +46,10 @@ class Lexer(object): ``stripall`` Strip all leading and trailing whitespace from the input (default: False). + ``ensurenl`` + Make sure that the input ends with a newline (default: True). This + is required for some lexers that consume input linewise. + *New in Pygments 1.3.* ``tabsize`` If given and greater than 0, expand tabs in the input (default: 0). ``encoding`` @@ -77,6 +81,7 @@ class Lexer(object): self.options = options self.stripnl = get_bool_opt(options, 'stripnl', True) self.stripall = get_bool_opt(options, 'stripall', False) + self.ensurenl = get_bool_opt(options, 'ensurenl', True) self.tabsize = get_int_opt(options, 'tabsize', 0) self.encoding = options.get('encoding', 'latin1') # self.encoding = options.get('inencoding', None) or self.encoding @@ -150,7 +155,7 @@ class Lexer(object): text = text.strip('\n') if self.tabsize > 0: text = text.expandtabs(self.tabsize) - if not text.endswith('\n'): + if self.ensurenl and not text.endswith('\n'): text += '\n' def streamer(): @@ -641,9 +646,15 @@ def do_insertions(insertions, tokens): realpos += len(v) - oldi # leftover tokens - if insleft: + while insleft: # no normal tokens, set realpos to zero realpos = realpos or 0 for p, t, v in itokens: yield realpos, t, v realpos += len(v) + try: + index, itokens = insertions.next() + except StopIteration: + insleft = False + break # not strictly necessary + diff --git a/pygments/lexers/functional.py b/pygments/lexers/functional.py index ea449f90..ffbd7538 100644 --- a/pygments/lexers/functional.py +++ b/pygments/lexers/functional.py @@ -470,7 +470,7 @@ class LiterateHaskellLexer(Lexer): style = self.options.get('litstyle') if style is None: - style = (text.lstrip()[0] in '%\\') and 'latex' or 'bird' + style = (text.lstrip()[0:1] in '%\\') and 'latex' or 'bird' code = '' insertions = [] diff --git a/tests/test_basic_api.py b/tests/test_basic_api.py index 44a656b4..c0de925f 100644 --- a/tests/test_basic_api.py +++ b/tests/test_basic_api.py @@ -68,6 +68,31 @@ class LexersTest(unittest.TestCase): ae(txt, test_content, "%s lexer roundtrip failed: %r != %r" % (lexer.name, test_content, txt)) + def test_lexer_options(self): + # test that the basic options work + def ensure(tokens, output): + concatenated = ''.join(token[1] for token in tokens) + self.assertEquals(concatenated, output, + '%s: %r != %r' % (lexer, concatenated, output)) + for lexer in lexers._iter_lexerclasses(): + if lexer.__name__ == 'RawTokenLexer': + # this one is special + continue + inst = lexer(stripnl=False) + ensure(inst.get_tokens('a\nb'), 'a\nb\n') + ensure(inst.get_tokens('\n\n\n'), '\n\n\n') + inst = lexer(stripall=True) + ensure(inst.get_tokens(' \n b\n\n\n'), 'b\n') + # some lexers require full lines in input + if lexer.__name__ not in ( + 'PythonConsoleLexer', 'RConsoleLexer', 'RubyConsoleLexer', + 'SqliteConsoleLexer', 'MatlabSessionLexer', 'ErlangShellLexer', + 'BashSessionLexer', 'LiterateHaskellLexer'): + inst = lexer(ensurenl=False) + ensure(inst.get_tokens('a\nb'), 'a\nb') + inst = lexer(ensurenl=False, stripall=True) + ensure(inst.get_tokens('a\nb\n\n'), 'a\nb') + def test_get_lexers(self): a = self.assert_ ae = self.assertEquals |