diff options
author | Matth?us G. Chajdas <dev@anteru.net> | 2019-05-21 18:30:17 +0200 |
---|---|---|
committer | Matth?us G. Chajdas <dev@anteru.net> | 2019-05-21 18:30:17 +0200 |
commit | 8202e648945351366bd0c465d72953fae40f4783 (patch) | |
tree | 404d0eab5b44e68a20c61c2b0b85771007da7223 | |
parent | dcbf0c89d9ff12723d21410104b40739689e7afd (diff) | |
download | pygments-8202e648945351366bd0c465d72953fae40f4783.tar.gz |
Fix directive parsing in NasmLexer (fixes #1517.)
Directives were parsed independent of whitespace after them, which caused the
cpuid instruction to be parsed as CPU & id, instead of cpuid. We now expect a
whitespace character after a directive, which seems to match the Nasm
documentation.
-rw-r--r-- | pygments/lexers/asm.py | 6 | ||||
-rw-r--r-- | tests/test_asm.py | 30 |
2 files changed, 34 insertions, 2 deletions
diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py index b522450c..3d2933d6 100644 --- a/pygments/lexers/asm.py +++ b/pygments/lexers/asm.py @@ -468,9 +468,11 @@ class NasmLexer(RegexLexer): r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]') wordop = r'seg|wrt|strict' type = r'byte|[dq]?word' - directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' + # Directives must be followed by whitespace, otherwise CPU will match + # cpuid for instance. + directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|' r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|' - r'EXPORT|LIBRARY|MODULE') + r'EXPORT|LIBRARY|MODULE)\s+') flags = re.IGNORECASE | re.MULTILINE tokens = { diff --git a/tests/test_asm.py b/tests/test_asm.py new file mode 100644 index 00000000..8eaed248 --- /dev/null +++ b/tests/test_asm.py @@ -0,0 +1,30 @@ +# -*- coding: utf-8 -*- +""" + Basic ColdfusionHtmlLexer Test + ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import unittest +import os + +from pygments.token import Token +from pygments.lexers import NasmLexer + + +class NasmLexerTest(unittest.TestCase): + + def setUp(self): + self.lexer = NasmLexer() + + def testCPUID(self): + # CPU is a valid directive, and we don't want to parse this as + # cpu id, but as a single token. See bug #1517 + fragment = 'cpuid' + expected = [ + (Token.Name.Function, u'cpuid'), + (Token.Text, u'\n'), + ] + self.assertEqual(expected, list(self.lexer.get_tokens(fragment))) |