From deea374f51f82b1ea16b27df3dce1749be93f614 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Mon, 6 May 2019 07:42:02 +0200 Subject: Never pop all states from the stack, even if a lexer wants to. fixes #1506 --- pygments/lexer.py | 22 ++++++++++++++++------ tests/test_regexlexer.py | 24 ++++++++++++++++++------ 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/pygments/lexer.py b/pygments/lexer.py index 90905ba5..62d66318 100644 --- a/pygments/lexer.py +++ b/pygments/lexer.py @@ -639,14 +639,20 @@ class RegexLexer(Lexer): if isinstance(new_state, tuple): for state in new_state: if state == '#pop': - statestack.pop() + if len(statestack) > 1: + statestack.pop() elif state == '#push': statestack.append(statestack[-1]) else: statestack.append(state) elif isinstance(new_state, int): - # pop - del statestack[new_state:] + # pop, but keep at least one state on the stack + # (random code leading to unexpected pops should + # not allow exceptions) + if abs(new_state) >= len(statestack): + del statestack[1:] + else: + del statestack[new_state:] elif new_state == '#push': statestack.append(statestack[-1]) else: @@ -724,14 +730,18 @@ class ExtendedRegexLexer(RegexLexer): if isinstance(new_state, tuple): for state in new_state: if state == '#pop': - ctx.stack.pop() + if len(ctx.stack) > 1: + ctx.stack.pop() elif state == '#push': ctx.stack.append(ctx.stack[-1]) else: ctx.stack.append(state) elif isinstance(new_state, int): - # pop - del ctx.stack[new_state:] + # see RegexLexer for why this check is made + if abs(new_state) >= len(ctx.stack): + del ctx.state[1:] + else: + del ctx.stack[new_state:] elif new_state == '#push': ctx.stack.append(ctx.stack[-1]) else: diff --git a/tests/test_regexlexer.py b/tests/test_regexlexer.py index d919a950..778f3d03 100644 --- a/tests/test_regexlexer.py +++ b/tests/test_regexlexer.py @@ -11,7 +11,6 @@ import unittest from pygments.token import Text from pygments.lexer import RegexLexer -from pygments.lexer import bygroups from pygments.lexer import default @@ -21,6 +20,8 @@ class TestLexer(RegexLexer): 'root': [ ('a', Text.Root, 'rag'), ('e', Text.Root), + ('#', Text.Root, '#pop'), + ('@', Text.Root, ('#pop', '#pop')), default(('beer', 'beer')) ], 'beer': [ @@ -37,18 +38,29 @@ class TupleTransTest(unittest.TestCase): def test(self): lx = TestLexer() toks = list(lx.get_tokens_unprocessed('abcde')) - self.assertEqual(toks, - [(0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'), + self.assertEqual(toks, [ + (0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'), (3, Text.Beer, 'd'), (4, Text.Root, 'e')]) def test_multiline(self): lx = TestLexer() toks = list(lx.get_tokens_unprocessed('a\ne')) - self.assertEqual(toks, - [(0, Text.Root, 'a'), (1, Text, u'\n'), - (2, Text.Root, 'e')]) + self.assertEqual(toks, [ + (0, Text.Root, 'a'), (1, Text, u'\n'), (2, Text.Root, 'e')]) def test_default(self): lx = TestLexer() toks = list(lx.get_tokens_unprocessed('d')) self.assertEqual(toks, [(0, Text.Beer, 'd')]) + + +class PopEmptyTest(unittest.TestCase): + def test_regular(self): + lx = TestLexer() + toks = list(lx.get_tokens_unprocessed('#e')) + self.assertEqual(toks, [(0, Text.Root, '#'), (1, Text.Root, 'e')]) + + def test_tuple(self): + lx = TestLexer() + toks = list(lx.get_tokens_unprocessed('@e')) + self.assertEqual(toks, [(0, Text.Root, '@'), (1, Text.Root, 'e')]) -- cgit v1.2.1