summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorg Brandl <georg@python.org>2019-05-07 15:54:20 +0000
committerGeorg Brandl <georg@python.org>2019-05-07 15:54:20 +0000
commit053c41fa754bb92e0a19f45e008c2cd62446de4d (patch)
tree6a339f51bde966224ca01d3735f3c9f491b255ba
parentc3a5a7023c0a266845c485058241ffed2eb37176 (diff)
parent974ce7b50b54fd4fbd381bdec8efc784850d0823 (diff)
downloadpygments-053c41fa754bb92e0a19f45e008c2cd62446de4d.tar.gz
Merged in fix-1506-2 (pull request #817)
Never pop all states from the stack, even if a lexer wants to.
-rw-r--r--pygments/lexer.py22
-rw-r--r--pygments/lexers/asm.py2
-rw-r--r--tests/test_regexlexer.py24
3 files changed, 35 insertions, 13 deletions
diff --git a/pygments/lexer.py b/pygments/lexer.py
index 90905ba5..62d66318 100644
--- a/pygments/lexer.py
+++ b/pygments/lexer.py
@@ -639,14 +639,20 @@ class RegexLexer(Lexer):
if isinstance(new_state, tuple):
for state in new_state:
if state == '#pop':
- statestack.pop()
+ if len(statestack) > 1:
+ statestack.pop()
elif state == '#push':
statestack.append(statestack[-1])
else:
statestack.append(state)
elif isinstance(new_state, int):
- # pop
- del statestack[new_state:]
+ # pop, but keep at least one state on the stack
+ # (random code leading to unexpected pops should
+ # not allow exceptions)
+ if abs(new_state) >= len(statestack):
+ del statestack[1:]
+ else:
+ del statestack[new_state:]
elif new_state == '#push':
statestack.append(statestack[-1])
else:
@@ -724,14 +730,18 @@ class ExtendedRegexLexer(RegexLexer):
if isinstance(new_state, tuple):
for state in new_state:
if state == '#pop':
- ctx.stack.pop()
+ if len(ctx.stack) > 1:
+ ctx.stack.pop()
elif state == '#push':
ctx.stack.append(ctx.stack[-1])
else:
ctx.stack.append(state)
elif isinstance(new_state, int):
- # pop
- del ctx.stack[new_state:]
+ # see RegexLexer for why this check is made
+ if abs(new_state) >= len(ctx.stack):
+ del ctx.state[1:]
+ else:
+ del ctx.stack[new_state:]
elif new_state == '#push':
ctx.stack.append(ctx.stack[-1])
else:
diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py
index 7100868c..b522450c 100644
--- a/pygments/lexers/asm.py
+++ b/pygments/lexers/asm.py
@@ -690,7 +690,7 @@ class Dasm16Lexer(RegexLexer):
# Regexes yo
char = r'[a-zA-Z$._0-9@]'
- identifier = r'(?:[a-zA-Z$_]' + char + '*|\.' + char + '+)'
+ identifier = r'(?:[a-zA-Z$_]' + char + r'*|\.' + char + '+)'
number = r'[+-]?(?:0[xX][a-zA-Z0-9]+|\d+)'
binary_number = r'0b[01_]+'
instruction = r'(?i)(' + '|'.join(INSTRUCTIONS) + ')'
diff --git a/tests/test_regexlexer.py b/tests/test_regexlexer.py
index d919a950..778f3d03 100644
--- a/tests/test_regexlexer.py
+++ b/tests/test_regexlexer.py
@@ -11,7 +11,6 @@ import unittest
from pygments.token import Text
from pygments.lexer import RegexLexer
-from pygments.lexer import bygroups
from pygments.lexer import default
@@ -21,6 +20,8 @@ class TestLexer(RegexLexer):
'root': [
('a', Text.Root, 'rag'),
('e', Text.Root),
+ ('#', Text.Root, '#pop'),
+ ('@', Text.Root, ('#pop', '#pop')),
default(('beer', 'beer'))
],
'beer': [
@@ -37,18 +38,29 @@ class TupleTransTest(unittest.TestCase):
def test(self):
lx = TestLexer()
toks = list(lx.get_tokens_unprocessed('abcde'))
- self.assertEqual(toks,
- [(0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'),
+ self.assertEqual(toks, [
+ (0, Text.Root, 'a'), (1, Text.Rag, 'b'), (2, Text.Rag, 'c'),
(3, Text.Beer, 'd'), (4, Text.Root, 'e')])
def test_multiline(self):
lx = TestLexer()
toks = list(lx.get_tokens_unprocessed('a\ne'))
- self.assertEqual(toks,
- [(0, Text.Root, 'a'), (1, Text, u'\n'),
- (2, Text.Root, 'e')])
+ self.assertEqual(toks, [
+ (0, Text.Root, 'a'), (1, Text, u'\n'), (2, Text.Root, 'e')])
def test_default(self):
lx = TestLexer()
toks = list(lx.get_tokens_unprocessed('d'))
self.assertEqual(toks, [(0, Text.Beer, 'd')])
+
+
+class PopEmptyTest(unittest.TestCase):
+ def test_regular(self):
+ lx = TestLexer()
+ toks = list(lx.get_tokens_unprocessed('#e'))
+ self.assertEqual(toks, [(0, Text.Root, '#'), (1, Text.Root, 'e')])
+
+ def test_tuple(self):
+ lx = TestLexer()
+ toks = list(lx.get_tokens_unprocessed('@e'))
+ self.assertEqual(toks, [(0, Text.Root, '@'), (1, Text.Root, 'e')])