Merged in fix-1506-2 (pull request #817)

Never pop all states from the stack, even if a lexer wants to.
author: Georg Brandl <georg@python.org> 2019-05-07 15:54:20 +0000
committer: Georg Brandl <georg@python.org> 2019-05-07 15:54:20 +0000
commit: 053c41fa754bb92e0a19f45e008c2cd62446de4d (patch)
tree: 6a339f51bde966224ca01d3735f3c9f491b255ba
parent: c3a5a7023c0a266845c485058241ffed2eb37176 (diff)
parent: 974ce7b50b54fd4fbd381bdec8efc784850d0823 (diff)
download: pygments-053c41fa754bb92e0a19f45e008c2cd62446de4d.tar.gz
3 files changed, 35 insertions, 13 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 90905ba5..62d66318 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -639,14 +639,20 @@ class RegexLexer(Lexer):
                         if isinstance(new_state, tuple):
                             for state in new_state:
                                 if state == '#pop':
-                                    statestack.pop()
+                                    if len(statestack) > 1:
+                                        statestack.pop()
                                 elif state == '#push':
                                     statestack.append(statestack[-1])
                                 else:
                                     statestack.append(state)
                         elif isinstance(new_state, int):
-                            # pop
-                            del statestack[new_state:]
+                            # pop, but keep at least one state on the stack
+                            # (random code leading to unexpected pops should
+                            # not allow exceptions)
+                            if abs(new_state) >= len(statestack):
+                                del statestack[1:]
+                            else:
+                                del statestack[new_state:]
                         elif new_state == '#push':
                             statestack.append(statestack[-1])
                         else:
@@ -724,14 +730,18 @@ class ExtendedRegexLexer(RegexLexer):
                         if isinstance(new_state, tuple):
                             for state in new_state:
                                 if state == '#pop':
-                                    ctx.stack.pop()
+                                    if len(ctx.stack) > 1:
+                                        ctx.stack.pop()
                                 elif state == '#push':
                                     ctx.stack.append(ctx.stack[-1])
                                 else:
                                     ctx.stack.append(state)
                         elif isinstance(new_state, int):
-                            # pop
-                            del ctx.stack[new_state:]
+                            # see RegexLexer for why this check is made
+                            if abs(new_state) >= len(ctx.stack):
+                                del ctx.state[1:]
+                            else:
+                                del ctx.stack[new_state:]
                         elif new_state == '#push':
                             ctx.stack.append(ctx.stack[-1])
                         else:
diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py
index 7100868c..b522450c 100644
--- a/pygments/lexers/asm.py
+++ b/pygments/lexers/asm.py
@@ -690,7 +690,7 @@ class Dasm16Lexer(RegexLexer):
 
     # Regexes yo
     char = r'[a-zA-Z$._0-9@]'
-    identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)'
+    identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
     number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
     binary_number = r'0b[01_]+'
     instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'
diff --git a/tests/test_regexlexer.py b/tests/test_regexlexer.py
index d919a950..778f3d03 100644
--- a/tests/test_regexlexer.py
+++ b/tests/test_regexlexer.py
@@ -11,7 +11,6 @@ import unittest
 
 from pygments.token import Text
 from pygments.lexer import RegexLexer
-from pygments.lexer import bygroups
 from pygments.lexer import default
 
 
@@ -21,6 +20,8 @@ class TestLexer(RegexLexer):
         'root': [
             ('a', Text.Root, 'rag'),
             ('e', Text.Root),
+            ('#', Text.Root, '#pop'),
+            ('@', Text.Root, ('#pop', '#pop')),
             default(('beer', 'beer'))
         ],
         'beer': [
@@ -37,18 +38,29 @@ class TupleTransTest(unittest.TestCase):
     def test(self):
         lx = TestLexer()
         toks = list(lx.get_tokens_unprocessed('abcde'))
-        self.assertEqual(toks,
-           [(0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'),
+        self.assertEqual(toks, [
+            (0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'),
             (3, Text.Beer, 'd'), (4, Text.Root, 'e')])
 
     def test_multiline(self):
         lx = TestLexer()
         toks = list(lx.get_tokens_unprocessed('a\ne'))
-        self.assertEqual(toks,
-           [(0, Text.Root, 'a'), (1, Text, u'\n'),
-            (2, Text.Root, 'e')])
+        self.assertEqual(toks, [
+            (0, Text.Root, 'a'), (1, Text, u'\n'), (2, Text.Root, 'e')])
 
     def test_default(self):
         lx = TestLexer()
         toks = list(lx.get_tokens_unprocessed('d'))
         self.assertEqual(toks, [(0, Text.Beer, 'd')])
+
+
+class PopEmptyTest(unittest.TestCase):
+    def test_regular(self):
+        lx = TestLexer()
+        toks = list(lx.get_tokens_unprocessed('#e'))
+        self.assertEqual(toks, [(0, Text.Root, '#'), (1, Text.Root, 'e')])
+
+    def test_tuple(self):
+        lx = TestLexer()
+        toks = list(lx.get_tokens_unprocessed('@e'))
+        self.assertEqual(toks, [(0, Text.Root, '@'), (1, Text.Root, 'e')])
author	Georg Brandl <georg@python.org>	2019-05-07 15:54:20 +0000
committer	Georg Brandl <georg@python.org>	2019-05-07 15:54:20 +0000
commit	053c41fa754bb92e0a19f45e008c2cd62446de4d (patch)
tree	6a339f51bde966224ca01d3735f3c9f491b255ba
parent	c3a5a7023c0a266845c485058241ffed2eb37176 (diff)
parent	974ce7b50b54fd4fbd381bdec8efc784850d0823 (diff)
download	pygments-053c41fa754bb92e0a19f45e008c2cd62446de4d.tar.gz