Never pop all states from the stack, even if a lexer wants to.

fixes #1506
author: Georg Brandl <georg@python.org> 2019-05-06 07:42:02 +0200
committer: Georg Brandl <georg@python.org> 2019-05-06 07:42:02 +0200
commit: deea374f51f82b1ea16b27df3dce1749be93f614 (patch)
tree: 2204585abebf3a19310e4e843df1c8f2f8240511
parent: 249e5feec73189cfff3a4c81d6a6c9fc821286cd (diff)
download: pygments-deea374f51f82b1ea16b27df3dce1749be93f614.tar.gz
2 files changed, 34 insertions, 12 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 90905ba5..62d66318 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -639,14 +639,20 @@ class RegexLexer(Lexer):
                         if isinstance(new_state, tuple):
                             for state in new_state:
                                 if state == '#pop':
-                                    statestack.pop()
+                                    if len(statestack) > 1:
+                                        statestack.pop()
                                 elif state == '#push':
                                     statestack.append(statestack[-1])
                                 else:
                                     statestack.append(state)
                         elif isinstance(new_state, int):
-                            # pop
-                            del statestack[new_state:]
+                            # pop, but keep at least one state on the stack
+                            # (random code leading to unexpected pops should
+                            # not allow exceptions)
+                            if abs(new_state) >= len(statestack):
+                                del statestack[1:]
+                            else:
+                                del statestack[new_state:]
                         elif new_state == '#push':
                             statestack.append(statestack[-1])
                         else:
@@ -724,14 +730,18 @@ class ExtendedRegexLexer(RegexLexer):
                         if isinstance(new_state, tuple):
                             for state in new_state:
                                 if state == '#pop':
-                                    ctx.stack.pop()
+                                    if len(ctx.stack) > 1:
+                                        ctx.stack.pop()
                                 elif state == '#push':
                                     ctx.stack.append(ctx.stack[-1])
                                 else:
                                     ctx.stack.append(state)
                         elif isinstance(new_state, int):
-                            # pop
-                            del ctx.stack[new_state:]
+                            # see RegexLexer for why this check is made
+                            if abs(new_state) >= len(ctx.stack):
+                                del ctx.state[1:]
+                            else:
+                                del ctx.stack[new_state:]
                         elif new_state == '#push':
                             ctx.stack.append(ctx.stack[-1])
                         else:
diff --git a/tests/test_regexlexer.py b/tests/test_regexlexer.py
index d919a950..778f3d03 100644
--- a/tests/test_regexlexer.py
+++ b/tests/test_regexlexer.py
@@ -11,7 +11,6 @@ import unittest
 
 from pygments.token import Text
 from pygments.lexer import RegexLexer
-from pygments.lexer import bygroups
 from pygments.lexer import default
 
 
@@ -21,6 +20,8 @@ class TestLexer(RegexLexer):
         'root': [
             ('a', Text.Root, 'rag'),
             ('e', Text.Root),
+            ('#', Text.Root, '#pop'),
+            ('@', Text.Root, ('#pop', '#pop')),
             default(('beer', 'beer'))
         ],
         'beer': [
@@ -37,18 +38,29 @@ class TupleTransTest(unittest.TestCase):
     def test(self):
         lx = TestLexer()
         toks = list(lx.get_tokens_unprocessed('abcde'))
-        self.assertEqual(toks,
-           [(0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'),
+        self.assertEqual(toks, [
+            (0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'),
             (3, Text.Beer, 'd'), (4, Text.Root, 'e')])
 
     def test_multiline(self):
         lx = TestLexer()
         toks = list(lx.get_tokens_unprocessed('a\ne'))
-        self.assertEqual(toks,
-           [(0, Text.Root, 'a'), (1, Text, u'\n'),
-            (2, Text.Root, 'e')])
+        self.assertEqual(toks, [
+            (0, Text.Root, 'a'), (1, Text, u'\n'), (2, Text.Root, 'e')])
 
     def test_default(self):
         lx = TestLexer()
         toks = list(lx.get_tokens_unprocessed('d'))
         self.assertEqual(toks, [(0, Text.Beer, 'd')])
+
+
+class PopEmptyTest(unittest.TestCase):
+    def test_regular(self):
+        lx = TestLexer()
+        toks = list(lx.get_tokens_unprocessed('#e'))
+        self.assertEqual(toks, [(0, Text.Root, '#'), (1, Text.Root, 'e')])
+
+    def test_tuple(self):
+        lx = TestLexer()
+        toks = list(lx.get_tokens_unprocessed('@e'))
+        self.assertEqual(toks, [(0, Text.Root, '@'), (1, Text.Root, 'e')])
author	Georg Brandl <georg@python.org>	2019-05-06 07:42:02 +0200
committer	Georg Brandl <georg@python.org>	2019-05-06 07:42:02 +0200
commit	deea374f51f82b1ea16b27df3dce1749be93f614 (patch)
tree	2204585abebf3a19310e4e843df1c8f2f8240511
parent	249e5feec73189cfff3a4c81d6a6c9fc821286cd (diff)
download	pygments-deea374f51f82b1ea16b27df3dce1749be93f614.tar.gz