Merge branch 'master' of github.com:pygments/pygments; branch 'lexers_gh' of https://github.com/blu-base/pygments into blu-base-lexers_gh

author: Matthäus G. Chajdas <dev@anteru.net> 2021-12-29 16:51:26 +0100
committer: Matthäus G. Chajdas <dev@anteru.net> 2021-12-29 16:51:26 +0100
commit: 7b3692f99d5100dd1471ec37e22e10fa37e54c30 (patch)
tree: 7206e6b9185d03fa7b25c7ad472b359ed8870d12 /pygments/lexers
parent: a8dcc0a28a0a8a738fcde542691f232842fd9290 (diff)
parent: d07b86e4b7ff25ec6c9007fa3f526b78b092502f (diff)
download: pygments-git-7b3692f99d5100dd1471ec37e22e10fa37e54c30.tar.gz
9 files changed, 169 insertions, 152 deletions
diff --git a/pygments/lexers/gdscript.py b/pygments/lexers/gdscript.py
index bf93721a..1048d1b5 100644
--- a/pygments/lexers/gdscript.py
+++ b/pygments/lexers/gdscript.py
@@ -16,7 +16,7 @@ import re
 from pygments.lexer import RegexLexer, include, bygroups, default, words, \
     combined
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Number, Punctuation
+    Number, Punctuation, Whitespace
 
 __all__ = ["GDScriptLexer"]
 
@@ -51,19 +51,19 @@ class GDScriptLexer(RegexLexer):
 
     tokens = {
         "root": [
-            (r"\n", Text),
+            (r"\n", Whitespace),
             (
                 r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
-                bygroups(Text, String.Affix, String.Doc),
+                bygroups(Whitespace, String.Affix, String.Doc),
             ),
             (
                 r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
-                bygroups(Text, String.Affix, String.Doc),
+                bygroups(Whitespace, String.Affix, String.Doc),
             ),
-            (r"[^\S\n]+", Text),
+            (r"[^\S\n]+", Whitespace),
             (r"#.*$", Comment.Single),
             (r"[]{}:(),;[]", Punctuation),
-            (r"\\\n", Text),
+            (r"(\\)(\n)", bygroups(Text, Whitespace)),
             (r"\\", Text),
             (r"(in|and|or|not)\b", Operator.Word),
             (
@@ -71,8 +71,8 @@ class GDScriptLexer(RegexLexer):
                 Operator,
             ),
             include("keywords"),
-            (r"(func)((?:\s|\\\s)+)", bygroups(Keyword, Text), "funcname"),
-            (r"(class)((?:\s|\\\s)+)", bygroups(Keyword, Text), "classname"),
+            (r"(func)(\s+)", bygroups(Keyword, Whitespace), "funcname"),
+            (r"(class)(\s+)", bygroups(Keyword, Whitespace), "classname"),
             include("builtins"),
             (
                 '([rR]|[uUbB][rR]|[rR][uUbB])(""")',
@@ -317,12 +317,12 @@ class GDScriptLexer(RegexLexer):
         "tdqs": [
             (r'"""', String.Double, "#pop"),
             include("strings-double"),
-            (r"\n", String.Double),
+            (r"\n", Whitespace),
         ],
         "tsqs": [
             (r"'''", String.Single, "#pop"),
             include("strings-single"),
-            (r"\n", String.Single),
+            (r"\n", Whitespace),
         ],
     }
 
diff --git a/pygments/lexers/go.py b/pygments/lexers/go.py
index 4e287caf..33a57ce8 100644
--- a/pygments/lexers/go.py
+++ b/pygments/lexers/go.py
@@ -12,7 +12,7 @@ import re
 
 from pygments.lexer import RegexLexer, bygroups, words
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Number, Punctuation
+    Number, Punctuation, Whitespace
 
 __all__ = ['GoLexer']
 
@@ -32,10 +32,10 @@ class GoLexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'\n', Text),
-            (r'\s+', Text),
-            (r'\\\n', Text),  # line continuations
-            (r'//(.*?)\n', Comment.Single),
+            (r'\n', Whitespace),
+            (r'\s+', Whitespace),
+            (r'(\\)(\n)', bygroups(Text, Whitespace)),  # line continuations
+            (r'//(.*?)$', Comment.Single),
             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
             (r'(import|package)\b', Keyword.Namespace),
             (r'(var|func|struct|map|chan|type|interface|const)\b',
diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py
index 910201cd..ff57c999 100644
--- a/pygments/lexers/grammar_notation.py
+++ b/pygments/lexers/grammar_notation.py
@@ -12,7 +12,7 @@ import re
 
 from pygments.lexer import RegexLexer, bygroups, include, this, using, words
 from pygments.token import Comment, Keyword, Literal, Name, Number, \
-    Operator, Punctuation, String, Text
+    Operator, Punctuation, String, Text, Whitespace
 
 __all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']
 
@@ -126,7 +126,7 @@ class AbnfLexer(RegexLexer):
             (r'[\[\]()]', Punctuation),
 
             # fallback
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'.', Text),
         ],
     }
@@ -154,11 +154,11 @@ class JsgfLexer(RegexLexer):
         'comments': [
             (r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
             (r'/\*[\w\W]*?\*/', Comment.Multiline),
-            (r'//.*', Comment.Single),
+            (r'//.*$', Comment.Single),
         ],
         'non-comments': [
             (r'\A#JSGF[^;]*', Comment.Preproc),
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r';', Punctuation),
             (r'[=|()\[\]*+]', Operator),
             (r'/[^/]+/', Number.Float),
@@ -183,29 +183,29 @@ class JsgfLexer(RegexLexer):
         ],
         'grammar name': [
             (r';', Punctuation, '#pop'),
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'\.', Punctuation),
             (r'[^;\s.]+', Name.Namespace),
         ],
         'rulename': [
             (r'>', Punctuation, '#pop'),
             (r'\*', Punctuation),
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
             (r'[^.>]+', Name.Constant),
         ],
         'documentation comment': [
             (r'\*/', Comment.Multiline, '#pop'),
-            (r'(^\s*\*?\s*)(@(?:example|see)\s+)'
+            (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
              r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
-             bygroups(Comment.Multiline, Comment.Special,
-                      using(this, state='example'))),
+             bygroups(Whitespace,Comment.Multiline, Whitespace, Comment.Special,
+                      Whitespace, using(this, state='example'))),
             (r'(^\s*\*?\s*)(@\S*)',
              bygroups(Comment.Multiline, Comment.Special)),
             (r'[^*\n@]+|\w|\W', Comment.Multiline),
         ],
         'example': [
-            (r'\n\s*\*', Comment.Multiline),
+            (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
             include('non-comments'),
             (r'.', Comment.Multiline),
         ],
@@ -241,7 +241,7 @@ class PegLexer(RegexLexer):
     tokens = {
         'root': [
             # Comments
-            (r'#.*', Comment.Single),
+            (r'#.*$', Comment.Single),
 
             # All operators
             (r'<-|[←:=/|&!?*+^↑~]', Operator),
diff --git a/pygments/lexers/graph.py b/pygments/lexers/graph.py
index bd82f761..163cf804 100644
--- a/pygments/lexers/graph.py
+++ b/pygments/lexers/graph.py
@@ -36,15 +36,15 @@ class CypherLexer(RegexLexer):
     tokens = {
         'root': [
             include('comment'),
-            include('keywords'),
             include('clauses'),
+            include('keywords'),
             include('relations'),
             include('strings'),
             include('whitespace'),
             include('barewords'),
         ],
         'comment': [
-            (r'^.*//.*\n', Comment.Single),
+            (r'^.*//.*$', Comment.Single),
         ],
         'keywords': [
             (r'(create|order|match|limit|set|skip|start|return|with|where|'
@@ -52,15 +52,35 @@ class CypherLexer(RegexLexer):
         ],
         'clauses': [
             # based on https://neo4j.com/docs/cypher-refcard/3.3/
+            (r'(create)(\s+)(index|unique)\b',
+                bygroups(Keyword, Whitespace, Keyword)),
+            (r'(drop)(\s+)(contraint|index)(\s+)(on)\b',
+                bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword)),
+            (r'(ends)(\s+)(with)\b',
+                bygroups(Keyword, Whitespace, Keyword)),
+            (r'(is)(\s+)(node)(\s+)(key)\b',
+                bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword)),
+            (r'(is)(\s+)(null|unique)\b',
+                bygroups(Keyword, Whitespace, Keyword)),
+            (r'(load)(\s+)(csv)(\s+)(from)\b',
+                bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword)),
+            (r'(on)(\s+)(match|create)\b',
+                bygroups(Keyword, Whitespace, Keyword)),
+            (r'(optional)(\s+)(match)\b',
+                bygroups(Keyword, Whitespace, Keyword)),
+            (r'(order)(\s+)(by)\b',
+                bygroups(Keyword, Whitespace, Keyword)),
+            (r'(starts)(\s+)(with)\b',
+                bygroups(Keyword, Whitespace, Keyword)),
+            (r'(union)(\s+)(all)\b',
+                bygroups(Keyword, Whitespace, Keyword)),
+            (r'(using)(\s+)(periodic)(\s+)(commit)\b',
+                bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword)),
             (r'(all|any|as|asc|ascending|assert|call|case|create|'
-             r'create\s+index|create\s+unique|delete|desc|descending|'
-             r'distinct|drop\s+constraint\s+on|drop\s+index\s+on|end|'
-             r'ends\s+with|fieldterminator|foreach|in|is\s+node\s+key|'
-             r'is\s+null|is\s+unique|limit|load\s+csv\s+from|match|merge|none|'
-             r'not|null|on\s+match|on\s+create|optional\s+match|order\s+by|'
-             r'remove|return|set|skip|single|start|starts\s+with|then|union|'
-             r'union\s+all|unwind|using\s+periodic\s+commit|yield|where|when|'
-             r'with)\b', Keyword),
+             r'delete|desc|descending|distinct|end|fieldterminator|'
+             r'foreach|in|limit|match|merge|none|not|null|'
+             r'remove|return|set|skip|single|start|then|union|'
+             r'unwind|yield|where|when|with)\b', Keyword),
         ],
         'relations': [
             (r'(-\[)(.*?)(\]->)', bygroups(Operator, using(this), Operator)),
diff --git a/pygments/lexers/graphics.py b/pygments/lexers/graphics.py
index f0b36b0c..9f3e4a44 100644
--- a/pygments/lexers/graphics.py
+++ b/pygments/lexers/graphics.py
@@ -11,7 +11,7 @@
 from pygments.lexer import RegexLexer, words, include, bygroups, using, \
     this, default
 from pygments.token import Text, Comment, Operator, Keyword, Name, \
-    Number, Punctuation, String
+    Number, Punctuation, String, Whitespace
 
 __all__ = ['GLShaderLexer', 'PostScriptLexer', 'AsymptoteLexer', 'GnuplotLexer',
            'PovrayLexer', 'HLSLShaderLexer']
@@ -30,8 +30,8 @@ class GLShaderLexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'^#.*', Comment.Preproc),
-            (r'//.*', Comment.Single),
+            (r'^#.*$', Comment.Preproc),
+            (r'//.*$', Comment.Single),
             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
             (r'\+|-|~|!=?|\*|/|%|<<|>>|<=?|>=?|==?|&&?|\^|\|\|?',
              Operator),
@@ -143,7 +143,7 @@ class GLShaderLexer(RegexLexer):
             (r'gl_\w*', Name.Builtin),
             (r'[a-zA-Z_]\w*', Name),
             (r'\.', Punctuation),
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
         ],
     }
 
@@ -161,8 +161,8 @@ class HLSLShaderLexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'^#.*', Comment.Preproc),
-            (r'//.*', Comment.Single),
+            (r'^#.*$', Comment.Preproc),
+            (r'//.*$', Comment.Single),
             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
             (r'\+|-|~|!=?|\*|/|%|<<|>>|<=?|>=?|==?|&&?|\^|\|\|?',
              Operator),
@@ -289,7 +289,7 @@ class HLSLShaderLexer(RegexLexer):
              Name.Decorator),   # attributes
             (r'[a-zA-Z_]\w*', Name),
             (r'\\$', Comment.Preproc),  # backslash at end of line -- usually macro continuation
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
         ],
         'string': [
             (r'"', String, '#pop'),
@@ -326,10 +326,10 @@ class PostScriptLexer(RegexLexer):
     tokens = {
         'root': [
             # All comment types
-            (r'^%!.+\n', Comment.Preproc),
-            (r'%%.*\n', Comment.Special),
+            (r'^%!.+$', Comment.Preproc),
+            (r'%%.*$', Comment.Special),
             (r'(^%.*\n){2,}', Comment.Multiline),
-            (r'%.*\n', Comment.Single),
+            (r'%.*$', Comment.Single),
 
             # String literals are awkward; enter separate state.
             (r'\(', String, 'stringliteral'),
@@ -383,7 +383,7 @@ class PostScriptLexer(RegexLexer):
                 'undefinedfilename', 'undefinedresult'), suffix=delimiter_end),
              Name.Builtin),
 
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
         ],
 
         'stringliteral': [
@@ -416,9 +416,9 @@ class AsymptoteLexer(RegexLexer):
 
     tokens = {
         'whitespace': [
-            (r'\n', Text),
-            (r'\s+', Text),
-            (r'\\\n', Text),  # line continuation
+            (r'\n', Whitespace),
+            (r'\s+', Whitespace),
+            (r'(\\)(\n)', bygroups(Text, Whitespace)),  # line continuation
             (r'//(\n|(.|\n)*?[^\\]\n)', Comment),
             (r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment),
         ],
@@ -562,9 +562,9 @@ class GnuplotLexer(RegexLexer):
                              'she$ll', 'test$'),
              Keyword, 'noargs'),
             (r'([a-zA-Z_]\w*)(\s*)(=)',
-             bygroups(Name.Variable, Text, Operator), 'genericargs'),
+             bygroups(Name.Variable, Whitespace, Operator), 'genericargs'),
             (r'([a-zA-Z_]\w*)(\s*\(.*?\)\s*)(=)',
-             bygroups(Name.Function, Text, Operator), 'genericargs'),
+             bygroups(Name.Function, Whitespace, Operator), 'genericargs'),
             (r'@[a-zA-Z_]\w*', Name.Constant),  # macros
             (r';', Keyword),
         ],
@@ -577,13 +577,13 @@ class GnuplotLexer(RegexLexer):
         ],
         'whitespace': [
             ('#', Comment, 'comment'),
-            (r'[ \t\v\f]+', Text),
+            (r'[ \t\v\f]+', Whitespace),
         ],
         'noargs': [
             include('whitespace'),
             # semicolon and newline end the argument list
             (r';', Punctuation, '#pop'),
-            (r'\n', Text, '#pop'),
+            (r'\n', Whitespace, '#pop'),
         ],
         'dqstring': [
             (r'"', String, '#pop'),
@@ -591,7 +591,7 @@ class GnuplotLexer(RegexLexer):
             (r'[^\\"\n]+', String),   # all other characters
             (r'\\\n', String),        # line continuation
             (r'\\', String),          # stray backslash
-            (r'\n', String, '#pop'),  # newline ends the string too
+            (r'\n', Whitespace, '#pop'),  # newline ends the string too
         ],
         'sqstring': [
             (r"''", String),          # escaped single quote
@@ -599,7 +599,7 @@ class GnuplotLexer(RegexLexer):
             (r"[^\\'\n]+", String),   # all other characters
             (r'\\\n', String),        # line continuation
             (r'\\', String),          # normal backslash
-            (r'\n', String, '#pop'),  # newline ends the string too
+            (r'\n', Whitespace, '#pop'),  # newline ends the string too
         ],
         'genericargs': [
             include('noargs'),
@@ -615,7 +615,7 @@ class GnuplotLexer(RegexLexer):
              bygroups(Name.Function, Text, Punctuation)),
             (r'[a-zA-Z_]\w*', Name),
             (r'@[a-zA-Z_]\w*', Name.Constant),  # macros
-            (r'\\\n', Text),
+            (r'(\\)(\n)', bygroups(Text, Whitespace)),
         ],
         'optionarg': [
             include('whitespace'),
@@ -700,7 +700,7 @@ class PovrayLexer(RegexLexer):
     tokens = {
         'root': [
             (r'/\*[\w\W]*?\*/', Comment.Multiline),
-            (r'//.*\n', Comment.Single),
+            (r'//.*$', Comment.Single),
             (r'(?s)"(?:\\.|[^"\\])+"', String.Double),
             (words((
                 'break', 'case', 'debug', 'declare', 'default', 'define', 'else',
@@ -775,7 +775,7 @@ class PovrayLexer(RegexLexer):
             (r'\.[0-9]+', Number.Float),
             (r'[0-9]+', Number.Integer),
             (r'"(\\\\|\\[^\\]|[^"\\])*"', String),
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
         ]
     }
 
diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py
index 74cd8dcc..6ab0f334 100644
--- a/pygments/lexers/haskell.py
+++ b/pygments/lexers/haskell.py
@@ -13,7 +13,7 @@ import re
 from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
     default, include, inherit
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Number, Punctuation, Generic
+    Number, Punctuation, Generic, Whitespace
 from pygments import unistring as uni
 
 __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
@@ -48,7 +48,7 @@ class HaskellLexer(RegexLexer):
     tokens = {
         'root': [
             # Whitespace:
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             # (r'--\s*|.*$', Comment.Doc),
             (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
             (r'\{-', Comment.Multiline, 'comment'),
@@ -91,31 +91,31 @@ class HaskellLexer(RegexLexer):
         ],
         'import': [
             # Import statements
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'"', String, 'string'),
             # after "funclist" state
             (r'\)', Punctuation, '#pop'),
             (r'qualified\b', Keyword),
             # import X as Y
             (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
-             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
+             bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
             # import X hiding (functions)
             (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
             # import X (functions)
             (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             # import X
             (r'[\w.]+', Name.Namespace, '#pop'),
         ],
         'module': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
         ],
         'funclist': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'[' + uni.Lu + r']\w*', Keyword.Type),
             (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
             (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
@@ -153,7 +153,7 @@ class HaskellLexer(RegexLexer):
             (r'o[0-7]+', String.Escape, '#pop'),
             (r'x[\da-fA-F]+', String.Escape, '#pop'),
             (r'\d+', String.Escape, '#pop'),
-            (r'\s+\\', String.Escape, '#pop'),
+            (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
         ],
     }
 
@@ -172,9 +172,9 @@ class HspecLexer(HaskellLexer):
 
     tokens = {
         'root': [
-            (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
-            (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
-            (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
+            (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
+            (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
+            (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
             inherit,
         ],
     }
@@ -215,16 +215,16 @@ class IdrisLexer(RegexLexer):
         'root': [
             # Comments
             (r'^(\s*)(%%(%s))' % '|'.join(directives),
-             bygroups(Text, Keyword.Reserved)),
-            (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
-            (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
-            (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
+             bygroups(Whitespace, Keyword.Reserved)),
+            (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),
+            (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),
+            (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),
             # Declaration
             (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
-             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
+             bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
             #  Identifiers
             (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
-            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
+            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
             (r"('')?[A-Z][\w\']*", Keyword.Type),
             (r'[a-z][\w\']*', Text),
             #  Special Symbols
@@ -239,16 +239,16 @@ class IdrisLexer(RegexLexer):
             (r"'", String.Char, 'character'),
             (r'"', String, 'string'),
             (r'[^\s(){}]+', Text),
-            (r'\s+?', Text),  # Whitespace
+            (r'\s+?', Whitespace),  # Whitespace
         ],
         'module': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'([A-Z][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
         ],
         'funclist': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'[A-Z]\w*', Keyword.Type),
             (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
             (r'--.*$', Comment.Single),
@@ -286,7 +286,7 @@ class IdrisLexer(RegexLexer):
             (r'o[0-7]+', String.Escape, '#pop'),
             (r'x[\da-fA-F]+', String.Escape, '#pop'),
             (r'\d+', String.Escape, '#pop'),
-            (r'\s+\\', String.Escape, '#pop')
+            (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')
         ],
     }
 
@@ -316,7 +316,7 @@ class AgdaLexer(RegexLexer):
         'root': [
             # Declaration
             (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
-             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
+             bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
             # Comments
             (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
             (r'\{-', Comment.Multiline, 'comment'),
@@ -325,7 +325,7 @@ class AgdaLexer(RegexLexer):
             # Lexemes:
             #  Identifiers
             (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
-            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
+            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
             (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
             #  Special Symbols
             (r'(\(|\)|\{|\})', Operator),
@@ -339,7 +339,7 @@ class AgdaLexer(RegexLexer):
             (r"'", String.Char, 'character'),
             (r'"', String, 'string'),
             (r'[^\s(){}]+', Text),
-            (r'\s+?', Text),  # Whitespace
+            (r'\s+?', Whitespace),  # Whitespace
         ],
         'hole': [
             # Holes
@@ -383,7 +383,7 @@ class CryptolLexer(RegexLexer):
     tokens = {
         'root': [
             # Whitespace:
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             # (r'--\s*|.*$', Comment.Doc),
             (r'//.*$', Comment.Single),
             (r'/\*', Comment.Multiline, 'comment'),
@@ -417,31 +417,31 @@ class CryptolLexer(RegexLexer):
         ],
         'import': [
             # Import statements
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'"', String, 'string'),
             # after "funclist" state
             (r'\)', Punctuation, '#pop'),
             (r'qualified\b', Keyword),
             # import X as Y
             (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
-             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
+             bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
             # import X hiding (functions)
             (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
             # import X (functions)
             (r'([A-Z][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             # import X
             (r'[\w.]+', Name.Namespace, '#pop'),
         ],
         'module': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'([A-Z][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
         ],
         'funclist': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'[A-Z]\w*', Keyword.Type),
             (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
             # TODO: these don't match the comments in docs, remove.
@@ -478,7 +478,7 @@ class CryptolLexer(RegexLexer):
             (r'o[0-7]+', String.Escape, '#pop'),
             (r'x[\da-fA-F]+', String.Escape, '#pop'),
             (r'\d+', String.Escape, '#pop'),
-            (r'\s+\\', String.Escape, '#pop'),
+            (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
         ],
     }
 
@@ -720,30 +720,30 @@ class KokaLexer(RegexLexer):
 
             # go into type mode
             (r'::?' + sboundary, tokenType, 'type'),
-            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
              'alias-type'),
-            (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+            (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
              'struct-type'),
             ((r'(%s)' % '|'.join(typeStartKeywords)) +
-             r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+             r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
              'type'),
 
             # special sequences of tokens (we use ?: for non-capturing group as
             # required by 'bygroups')
-            (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
-             bygroups(Keyword, Text, Keyword, Name.Namespace)),
+            (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
+             bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),
             (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
-             r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
+             r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'
              r'((?:[a-z]\w*/)*[a-z]\w*))?',
-             bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
-                      Keyword, Name.Namespace)),
+             bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,
+                      Keyword, Whitespace, Name.Namespace)),
 
-            (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
+            (r'^(public|private)?(\s+)?(function|fun|val)'
              r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
-             bygroups(Keyword, Text, Name.Function)),
-            (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
+             bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
+            (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'
              r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
-             bygroups(Keyword, Text, Keyword, Name.Function)),
+             bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
 
             # keywords
             (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
@@ -800,7 +800,7 @@ class KokaLexer(RegexLexer):
             (r'[(\[<]', tokenType, 'type-nested'),
             (r',', tokenType),
             (r'([a-z]\w*)(\s*)(:)(?!:)',
-             bygroups(Name, Text, tokenType)),  # parameter name
+             bygroups(Name, Whitespace, tokenType)),  # parameter name
             include('type-content')
         ],
 
@@ -833,8 +833,8 @@ class KokaLexer(RegexLexer):
 
         # comments and literals
         'whitespace': [
-            (r'\n\s*#.*$', Comment.Preproc),
-            (r'\s+', Text),
+            (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),
+            (r'\s+', Whitespace),
             (r'/\*', Comment.Multiline, 'comment'),
             (r'//.*$', Comment.Single)
         ],
diff --git a/pygments/lexers/haxe.py b/pygments/lexers/haxe.py
index 69e35693..ee587e99 100644
--- a/pygments/lexers/haxe.py
+++ b/pygments/lexers/haxe.py
@@ -103,7 +103,7 @@ class HaxeLexer(ExtendedRegexLexer):
 
         # space/tab/comment/preproc
         'spaces': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'//[^\n\r]*', Comment.Single),
             (r'/\*.*?\*/', Comment.Multiline),
             (r'(#)(if|elseif|else|end|error)\b', preproc_callback),
@@ -163,14 +163,14 @@ class HaxeLexer(ExtendedRegexLexer):
         ],
 
         'preproc-error': [
-            (r'\s+', Comment.Preproc),
+            (r'\s+', Whitespace),
             (r"'", String.Single, ('#pop', 'string-single')),
             (r'"', String.Double, ('#pop', 'string-double')),
             default('#pop'),
         ],
 
         'preproc-expr': [
-            (r'\s+', Comment.Preproc),
+            (r'\s+', Whitespace),
             (r'\!', Comment.Preproc),
             (r'\(', Comment.Preproc, ('#pop', 'preproc-parenthesis')),
 
@@ -193,20 +193,20 @@ class HaxeLexer(ExtendedRegexLexer):
         ],
 
         'preproc-parenthesis': [
-            (r'\s+', Comment.Preproc),
+            (r'\s+', Whitespace),
             (r'\)', Comment.Preproc, '#pop'),
             default('preproc-expr-in-parenthesis'),
         ],
 
         'preproc-expr-chain': [
-            (r'\s+', Comment.Preproc),
+            (r'\s+', Whitespace),
             (binop, Comment.Preproc, ('#pop', 'preproc-expr-in-parenthesis')),
             default('#pop'),
         ],
 
         # same as 'preproc-expr' but able to chain 'preproc-expr-chain'
         'preproc-expr-in-parenthesis': [
-            (r'\s+', Comment.Preproc),
+            (r'\s+', Whitespace),
             (r'\!', Comment.Preproc),
             (r'\(', Comment.Preproc,
              ('#pop', 'preproc-expr-chain', 'preproc-parenthesis')),
@@ -919,7 +919,7 @@ class HxmlLexer(RegexLexer):
              bygroups(Punctuation, Keyword, Whitespace, String)),
             # Options that take only numerical arguments
             (r'(-)(swf-version)( +)(\d+)',
-             bygroups(Punctuation, Keyword, Number.Integer)),
+             bygroups(Punctuation, Keyword, Whitespace, Number.Integer)),
             # An Option that defines the size, the fps and the background
             # color of an flash movie
             (r'(-)(swf-header)( +)(\d+)(:)(\d+)(:)(\d+)(:)([A-Fa-f0-9]{6})',
diff --git a/pygments/lexers/hdl.py b/pygments/lexers/hdl.py
index 3b49f537..e96f79a4 100644
--- a/pygments/lexers/hdl.py
+++ b/pygments/lexers/hdl.py
@@ -12,7 +12,7 @@ import re
 
 from pygments.lexer import RegexLexer, bygroups, include, using, this, words
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Number, Punctuation
+    Number, Punctuation, Whitespace
 
 __all__ = ['VerilogLexer', 'SystemVerilogLexer', 'VhdlLexer']
 
@@ -34,9 +34,8 @@ class VerilogLexer(RegexLexer):
     tokens = {
         'root': [
             (r'^\s*`define', Comment.Preproc, 'macro'),
-            (r'\n', Text),
-            (r'\s+', Text),
-            (r'\\\n', Text),  # line continuation
+            (r'\s+', Whitespace),
+            (r'(\\)(\n)', bygroups(String.Escape, Whitespace)),  # line continuation
             (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
             (r'[{}#@]', Punctuation),
@@ -54,8 +53,8 @@ class VerilogLexer(RegexLexer):
             (r'[()\[\],.;\']', Punctuation),
             (r'`[a-zA-Z_]\w*', Name.Constant),
 
-            (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)),
-            (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text),
+            (r'^(\s*)(package)(\s+)', bygroups(Whitespace, Keyword.Namespace, Text)),
+            (r'^(\s*)(import)(\s+)', bygroups(Whitespace, Keyword.Namespace, Text),
              'import'),
 
             (words((
@@ -114,7 +113,7 @@ class VerilogLexer(RegexLexer):
             (r'"', String, '#pop'),
             (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
             (r'[^\\"\n]+', String),  # all other characters
-            (r'\\\n', String),  # line continuation
+            (r'(\\)(\n)', bygroups(String.Escape, Whitespace)),  # line continuation
             (r'\\', String),  # stray backslash
         ],
         'macro': [
@@ -123,7 +122,7 @@ class VerilogLexer(RegexLexer):
             (r'//.*?\n', Comment.Single, '#pop'),
             (r'/', Comment.Preproc),
             (r'(?<=\\)\n', Comment.Preproc),
-            (r'\n', Comment.Preproc, '#pop'),
+            (r'\n', Whitespace, '#pop'),
         ],
         'import': [
             (r'[\w:]+\*?', Name.Namespace, '#pop')
@@ -161,13 +160,12 @@ class SystemVerilogLexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'^\s*`define', Comment.Preproc, 'macro'),
-            (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)),
-            (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text), 'import'),
+            (r'^(\s*)(`define)', bygroups(Whitespace, Comment.Preproc), 'macro'),
+            (r'^(\s*)(package)(\s+)', bygroups(Whitespace, Keyword.Namespace, Whitespace)),
+            (r'^(\s*)(import)(\s+)', bygroups(Whitespace, Keyword.Namespace, Whitespace), 'import'),
 
-            (r'\n', Text),
-            (r'\s+', Text),
-            (r'\\\n', Text),  # line continuation
+            (r'\s+', Whitespace),
+            (r'(\\)(\n)', bygroups(String.Escape, Whitespace)),  # line continuation
             (r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
             (r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
             (r'[{}#@]', Punctuation),
@@ -240,11 +238,11 @@ class SystemVerilogLexer(RegexLexer):
              Keyword),
 
             (r'(class)(\s+)([a-zA-Z_]\w*)',
-             bygroups(Keyword.Declaration, Text, Name.Class)),
+             bygroups(Keyword.Declaration, Whitespace, Name.Class)),
             (r'(extends)(\s+)([a-zA-Z_]\w*)',
-             bygroups(Keyword.Declaration, Text, Name.Class)),
+             bygroups(Keyword.Declaration, Whitespace, Name.Class)),
             (r'(endclass\b)(?:(\s*)(:)(\s*)([a-zA-Z_]\w*))?',
-             bygroups(Keyword.Declaration, Text, Punctuation, Text, Name.Class)),
+             bygroups(Keyword.Declaration, Whitespace, Punctuation, Whitespace, Name.Class)),
 
             (words((
                 # Variable types
@@ -355,16 +353,16 @@ class SystemVerilogLexer(RegexLexer):
             (r'"', String, '#pop'),
             (r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
             (r'[^\\"\n]+', String),  # all other characters
-            (r'\\\n', String),  # line continuation
+            (r'(\\)(\n)', bygroups(String.Escape, Whitespace)),  # line continuation
             (r'\\', String),  # stray backslash
         ],
         'macro': [
             (r'[^/\n]+', Comment.Preproc),
             (r'/[*](.|\n)*?[*]/', Comment.Multiline),
-            (r'//.*?\n', Comment.Single, '#pop'),
+            (r'//.*?$', Comment.Single, '#pop'),
             (r'/', Comment.Preproc),
             (r'(?<=\\)\n', Comment.Preproc),
-            (r'\n', Comment.Preproc, '#pop'),
+            (r'\n', Whitespace, '#pop'),
         ],
         'import': [
             (r'[\w:]+\*?', Name.Namespace, '#pop')
@@ -386,9 +384,8 @@ class VhdlLexer(RegexLexer):
 
     tokens = {
         'root': [
-            (r'\n', Text),
-            (r'\s+', Text),
-            (r'\\\n', Text),  # line continuation
+            (r'\s+', Whitespace),
+            (r'(\\)(\n)', bygroups(String.Escape, Whitespace)),  # line continuation
             (r'--.*?$', Comment.Single),
             (r"'(U|X|0|1|Z|W|L|H|-)'", String.Char),
             (r'[~!%^&*+=|?:<>/-]', Operator),
@@ -397,25 +394,25 @@ class VhdlLexer(RegexLexer):
             (r'"[^\n\\"]*"', String),
 
             (r'(library)(\s+)([a-z_]\w*)',
-             bygroups(Keyword, Text, Name.Namespace)),
-            (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)),
+             bygroups(Keyword, Whitespace, Name.Namespace)),
+            (r'(use)(\s+)(entity)', bygroups(Keyword, Whitespace, Keyword)),
             (r'(use)(\s+)([a-z_][\w.]*\.)(all)',
-             bygroups(Keyword, Text, Name.Namespace, Keyword)),
+             bygroups(Keyword, Whitespace, Name.Namespace, Keyword)),
             (r'(use)(\s+)([a-z_][\w.]*)',
-             bygroups(Keyword, Text, Name.Namespace)),
+             bygroups(Keyword, Whitespace, Name.Namespace)),
             (r'(std|ieee)(\.[a-z_]\w*)',
              bygroups(Name.Namespace, Name.Namespace)),
             (words(('std', 'ieee', 'work'), suffix=r'\b'),
              Name.Namespace),
             (r'(entity|component)(\s+)([a-z_]\w*)',
-             bygroups(Keyword, Text, Name.Class)),
+             bygroups(Keyword, Whitespace, Name.Class)),
             (r'(architecture|configuration)(\s+)([a-z_]\w*)(\s+)'
              r'(of)(\s+)([a-z_]\w*)(\s+)(is)',
-             bygroups(Keyword, Text, Name.Class, Text, Keyword, Text,
-                      Name.Class, Text, Keyword)),
+             bygroups(Keyword, Whitespace, Name.Class, Whitespace, Keyword, Whitespace,
+                      Name.Class, Whitespace, Keyword)),
             (r'([a-z_]\w*)(:)(\s+)(process|for)',
-             bygroups(Name.Class, Operator, Text, Keyword)),
-            (r'(end)(\s+)', bygroups(using(this), Text), 'endblock'),
+             bygroups(Name.Class, Operator, Whitespace, Keyword)),
+            (r'(end)(\s+)', bygroups(using(this), Whitespace), 'endblock'),
 
             include('types'),
             include('keywords'),
@@ -426,7 +423,7 @@ class VhdlLexer(RegexLexer):
         'endblock': [
             include('keywords'),
             (r'[a-z_]\w*', Name.Class),
-            (r'(\s+)', Text),
+            (r'\s+', Whitespace),
             (r';', Punctuation, '#pop'),
         ],
         'types': [
diff --git a/pygments/lexers/hexdump.py b/pygments/lexers/hexdump.py
index f93bda76..041d7f6c 100644
--- a/pygments/lexers/hexdump.py
+++ b/pygments/lexers/hexdump.py
@@ -56,8 +56,8 @@ class HexdumpLexer(RegexLexer):
              bygroups(Whitespace, Punctuation, String, Punctuation)),
             (r'(\s{2,3})(\|)(.{1,15})(\|)$',
              bygroups(Whitespace, Punctuation, String, Punctuation)),
-            (r'(\s{2,3})(.{1,15})$', bygroups(Text, String)),
-            (r'(\s{2,3})(.{16}|.{20})$', bygroups(Text, String), 'nonpiped-strings'),
+            (r'(\s{2,3})(.{1,15})$', bygroups(Whitespace, String)),
+            (r'(\s{2,3})(.{16}|.{20})$', bygroups(Whitespace, String), 'nonpiped-strings'),
             (r'\s', Whitespace),
             (r'^\*', Punctuation),
         ],
author	Matthäus G. Chajdas <dev@anteru.net>	2021-12-29 16:51:26 +0100
committer	Matthäus G. Chajdas <dev@anteru.net>	2021-12-29 16:51:26 +0100
commit	7b3692f99d5100dd1471ec37e22e10fa37e54c30 (patch)
tree	7206e6b9185d03fa7b25c7ad472b359ed8870d12 /pygments/lexers
parent	a8dcc0a28a0a8a738fcde542691f232842fd9290 (diff)
parent	d07b86e4b7ff25ec6c9007fa3f526b78b092502f (diff)
download	pygments-git-7b3692f99d5100dd1471ec37e22e10fa37e54c30.tar.gz