Fix directive parsing in NasmLexer (fixes #1517.)

Directives were parsed independent of whitespace after them, which caused the cpuid instruction to be parsed as CPU & id, instead of cpuid. We now expect a whitespace character after a directive, which seems to match the Nasm documentation.
author: Matth?us G. Chajdas <dev@anteru.net> 2019-05-21 18:30:17 +0200
committer: Matth?us G. Chajdas <dev@anteru.net> 2019-05-21 18:30:17 +0200
commit: 8202e648945351366bd0c465d72953fae40f4783 (patch)
tree: 404d0eab5b44e68a20c61c2b0b85771007da7223
parent: dcbf0c89d9ff12723d21410104b40739689e7afd (diff)
download: pygments-8202e648945351366bd0c465d72953fae40f4783.tar.gz
2 files changed, 34 insertions, 2 deletions
diff --git a/pygments/lexers/asm.py b/pygments/lexers/asm.py
index b522450c..3d2933d6 100644
--- a/pygments/lexers/asm.py
+++ b/pygments/lexers/asm.py
@@ -468,9 +468,11 @@ class NasmLexer(RegexLexer):
                 r'mm[0-7]|cr[0-4]|dr[0-367]|tr[3-7]')
     wordop = r'seg|wrt|strict'
     type = r'byte|[dq]?word'
-    directives = (r'BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
+    # Directives must be followed by whitespace, otherwise CPU will match
+    # cpuid for instance.
+    directives = (r'(?:BITS|USE16|USE32|SECTION|SEGMENT|ABSOLUTE|EXTERN|GLOBAL|'
                   r'ORG|ALIGN|STRUC|ENDSTRUC|COMMON|CPU|GROUP|UPPERCASE|IMPORT|'
-                  r'EXPORT|LIBRARY|MODULE')
+                  r'EXPORT|LIBRARY|MODULE)\s+')
 
     flags = re.IGNORECASE | re.MULTILINE
     tokens = {
diff --git a/tests/test_asm.py b/tests/test_asm.py
new file mode 100644
index 00000000..8eaed248
--- /dev/null
+++ b/tests/test_asm.py
@@ -0,0 +1,30 @@
+# -*- coding: utf-8 -*-
+"""
+    Basic ColdfusionHtmlLexer Test
+    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+    :copyright: Copyright 2006-2017 by the Pygments team, see AUTHORS.
+    :license: BSD, see LICENSE for details.
+"""
+
+import unittest
+import os
+
+from pygments.token import Token
+from pygments.lexers import NasmLexer
+
+
+class NasmLexerTest(unittest.TestCase):
+
+    def setUp(self):
+        self.lexer = NasmLexer()
+
+    def testCPUID(self):
+        # CPU is a valid directive, and we don't want to parse this as
+        # cpu id, but as a single token. See bug #1517
+        fragment = 'cpuid'
+        expected = [
+            (Token.Name.Function, u'cpuid'),
+            (Token.Text, u'\n'),
+        ]
+        self.assertEqual(expected, list(self.lexer.get_tokens(fragment)))
author	Matth?us G. Chajdas <dev@anteru.net>	2019-05-21 18:30:17 +0200
committer	Matth?us G. Chajdas <dev@anteru.net>	2019-05-21 18:30:17 +0200
commit	8202e648945351366bd0c465d72953fae40f4783 (patch)
tree	404d0eab5b44e68a20c61c2b0b85771007da7223
parent	dcbf0c89d9ff12723d21410104b40739689e7afd (diff)
download	pygments-8202e648945351366bd0c465d72953fae40f4783.tar.gz