summaryrefslogtreecommitdiff
path: root/pygments/lexers
diff options
context:
space:
mode:
authorMatthäus G. Chajdas <dev@anteru.net>2021-12-29 16:51:26 +0100
committerMatthäus G. Chajdas <dev@anteru.net>2021-12-29 16:51:26 +0100
commit7b3692f99d5100dd1471ec37e22e10fa37e54c30 (patch)
tree7206e6b9185d03fa7b25c7ad472b359ed8870d12 /pygments/lexers
parenta8dcc0a28a0a8a738fcde542691f232842fd9290 (diff)
parentd07b86e4b7ff25ec6c9007fa3f526b78b092502f (diff)
downloadpygments-git-7b3692f99d5100dd1471ec37e22e10fa37e54c30.tar.gz
Merge branch 'master' of github.com:pygments/pygments; branch 'lexers_gh' of https://github.com/blu-base/pygments into blu-base-lexers_gh
Diffstat (limited to 'pygments/lexers')
-rw-r--r--pygments/lexers/gdscript.py20
-rw-r--r--pygments/lexers/go.py10
-rw-r--r--pygments/lexers/grammar_notation.py22
-rw-r--r--pygments/lexers/graph.py40
-rw-r--r--pygments/lexers/graphics.py46
-rw-r--r--pygments/lexers/haskell.py102
-rw-r--r--pygments/lexers/haxe.py14
-rw-r--r--pygments/lexers/hdl.py63
-rw-r--r--pygments/lexers/hexdump.py4
9 files changed, 169 insertions, 152 deletions
diff --git a/pygments/lexers/gdscript.py b/pygments/lexers/gdscript.py
index bf93721a..1048d1b5 100644
--- a/pygments/lexers/gdscript.py
+++ b/pygments/lexers/gdscript.py
@@ -16,7 +16,7 @@ import re
from pygments.lexer import RegexLexer, include, bygroups, default, words, \
combined
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation
+ Number, Punctuation, Whitespace
__all__ = ["GDScriptLexer"]
@@ -51,19 +51,19 @@ class GDScriptLexer(RegexLexer):
tokens = {
"root": [
- (r"\n", Text),
+ (r"\n", Whitespace),
(
r'^(\s*)([rRuUbB]{,2})("""(?:.|\n)*?""")',
- bygroups(Text, String.Affix, String.Doc),
+ bygroups(Whitespace, String.Affix, String.Doc),
),
(
r"^(\s*)([rRuUbB]{,2})('''(?:.|\n)*?''')",
- bygroups(Text, String.Affix, String.Doc),
+ bygroups(Whitespace, String.Affix, String.Doc),
),
- (r"[^\S\n]+", Text),
+ (r"[^\S\n]+", Whitespace),
(r"#.*$", Comment.Single),
(r"[]{}:(),;[]", Punctuation),
- (r"\\\n", Text),
+ (r"(\\)(\n)", bygroups(Text, Whitespace)),
(r"\\", Text),
(r"(in|and|or|not)\b", Operator.Word),
(
@@ -71,8 +71,8 @@ class GDScriptLexer(RegexLexer):
Operator,
),
include("keywords"),
- (r"(func)((?:\s|\\\s)+)", bygroups(Keyword, Text), "funcname"),
- (r"(class)((?:\s|\\\s)+)", bygroups(Keyword, Text), "classname"),
+ (r"(func)(\s+)", bygroups(Keyword, Whitespace), "funcname"),
+ (r"(class)(\s+)", bygroups(Keyword, Whitespace), "classname"),
include("builtins"),
(
'([rR]|[uUbB][rR]|[rR][uUbB])(""")',
@@ -317,12 +317,12 @@ class GDScriptLexer(RegexLexer):
"tdqs": [
(r'"""', String.Double, "#pop"),
include("strings-double"),
- (r"\n", String.Double),
+ (r"\n", Whitespace),
],
"tsqs": [
(r"'''", String.Single, "#pop"),
include("strings-single"),
- (r"\n", String.Single),
+ (r"\n", Whitespace),
],
}
diff --git a/pygments/lexers/go.py b/pygments/lexers/go.py
index 4e287caf..33a57ce8 100644
--- a/pygments/lexers/go.py
+++ b/pygments/lexers/go.py
@@ -12,7 +12,7 @@ import re
from pygments.lexer import RegexLexer, bygroups, words
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation
+ Number, Punctuation, Whitespace
__all__ = ['GoLexer']
@@ -32,10 +32,10 @@ class GoLexer(RegexLexer):
tokens = {
'root': [
- (r'\n', Text),
- (r'\s+', Text),
- (r'\\\n', Text), # line continuations
- (r'//(.*?)\n', Comment.Single),
+ (r'\n', Whitespace),
+ (r'\s+', Whitespace),
+ (r'(\\)(\n)', bygroups(Text, Whitespace)), # line continuations
+ (r'//(.*?)$', Comment.Single),
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
(r'(import|package)\b', Keyword.Namespace),
(r'(var|func|struct|map|chan|type|interface|const)\b',
diff --git a/pygments/lexers/grammar_notation.py b/pygments/lexers/grammar_notation.py
index 910201cd..ff57c999 100644
--- a/pygments/lexers/grammar_notation.py
+++ b/pygments/lexers/grammar_notation.py
@@ -12,7 +12,7 @@ import re
from pygments.lexer import RegexLexer, bygroups, include, this, using, words
from pygments.token import Comment, Keyword, Literal, Name, Number, \
- Operator, Punctuation, String, Text
+ Operator, Punctuation, String, Text, Whitespace
__all__ = ['BnfLexer', 'AbnfLexer', 'JsgfLexer', 'PegLexer']
@@ -126,7 +126,7 @@ class AbnfLexer(RegexLexer):
(r'[\[\]()]', Punctuation),
# fallback
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'.', Text),
],
}
@@ -154,11 +154,11 @@ class JsgfLexer(RegexLexer):
'comments': [
(r'/\*\*(?!/)', Comment.Multiline, 'documentation comment'),
(r'/\*[\w\W]*?\*/', Comment.Multiline),
- (r'//.*', Comment.Single),
+ (r'//.*$', Comment.Single),
],
'non-comments': [
(r'\A#JSGF[^;]*', Comment.Preproc),
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r';', Punctuation),
(r'[=|()\[\]*+]', Operator),
(r'/[^/]+/', Number.Float),
@@ -183,29 +183,29 @@ class JsgfLexer(RegexLexer):
],
'grammar name': [
(r';', Punctuation, '#pop'),
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'\.', Punctuation),
(r'[^;\s.]+', Name.Namespace),
],
'rulename': [
(r'>', Punctuation, '#pop'),
(r'\*', Punctuation),
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'([^.>]+)(\s*)(\.)', bygroups(Name.Namespace, Text, Punctuation)),
(r'[^.>]+', Name.Constant),
],
'documentation comment': [
(r'\*/', Comment.Multiline, '#pop'),
- (r'(^\s*\*?\s*)(@(?:example|see)\s+)'
+ (r'^(\s*)(\*?)(\s*)(@(?:example|see))(\s+)'
r'([\w\W]*?(?=(?:^\s*\*?\s*@|\*/)))',
- bygroups(Comment.Multiline, Comment.Special,
- using(this, state='example'))),
+ bygroups(Whitespace,Comment.Multiline, Whitespace, Comment.Special,
+ Whitespace, using(this, state='example'))),
(r'(^\s*\*?\s*)(@\S*)',
bygroups(Comment.Multiline, Comment.Special)),
(r'[^*\n@]+|\w|\W', Comment.Multiline),
],
'example': [
- (r'\n\s*\*', Comment.Multiline),
+ (r'(\n\s*)(\*)', bygroups(Whitespace, Comment.Multiline)),
include('non-comments'),
(r'.', Comment.Multiline),
],
@@ -241,7 +241,7 @@ class PegLexer(RegexLexer):
tokens = {
'root': [
# Comments
- (r'#.*', Comment.Single),
+ (r'#.*$', Comment.Single),
# All operators
(r'<-|[←:=/|&!?*+^↑~]', Operator),
diff --git a/pygments/lexers/graph.py b/pygments/lexers/graph.py
index bd82f761..163cf804 100644
--- a/pygments/lexers/graph.py
+++ b/pygments/lexers/graph.py
@@ -36,15 +36,15 @@ class CypherLexer(RegexLexer):
tokens = {
'root': [
include('comment'),
- include('keywords'),
include('clauses'),
+ include('keywords'),
include('relations'),
include('strings'),
include('whitespace'),
include('barewords'),
],
'comment': [
- (r'^.*//.*\n', Comment.Single),
+ (r'^.*//.*$', Comment.Single),
],
'keywords': [
(r'(create|order|match|limit|set|skip|start|return|with|where|'
@@ -52,15 +52,35 @@ class CypherLexer(RegexLexer):
],
'clauses': [
# based on https://neo4j.com/docs/cypher-refcard/3.3/
+ (r'(create)(\s+)(index|unique)\b',
+ bygroups(Keyword, Whitespace, Keyword)),
+ (r'(drop)(\s+)(contraint|index)(\s+)(on)\b',
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword)),
+ (r'(ends)(\s+)(with)\b',
+ bygroups(Keyword, Whitespace, Keyword)),
+ (r'(is)(\s+)(node)(\s+)(key)\b',
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword)),
+ (r'(is)(\s+)(null|unique)\b',
+ bygroups(Keyword, Whitespace, Keyword)),
+ (r'(load)(\s+)(csv)(\s+)(from)\b',
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword)),
+ (r'(on)(\s+)(match|create)\b',
+ bygroups(Keyword, Whitespace, Keyword)),
+ (r'(optional)(\s+)(match)\b',
+ bygroups(Keyword, Whitespace, Keyword)),
+ (r'(order)(\s+)(by)\b',
+ bygroups(Keyword, Whitespace, Keyword)),
+ (r'(starts)(\s+)(with)\b',
+ bygroups(Keyword, Whitespace, Keyword)),
+ (r'(union)(\s+)(all)\b',
+ bygroups(Keyword, Whitespace, Keyword)),
+ (r'(using)(\s+)(periodic)(\s+)(commit)\b',
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword)),
(r'(all|any|as|asc|ascending|assert|call|case|create|'
- r'create\s+index|create\s+unique|delete|desc|descending|'
- r'distinct|drop\s+constraint\s+on|drop\s+index\s+on|end|'
- r'ends\s+with|fieldterminator|foreach|in|is\s+node\s+key|'
- r'is\s+null|is\s+unique|limit|load\s+csv\s+from|match|merge|none|'
- r'not|null|on\s+match|on\s+create|optional\s+match|order\s+by|'
- r'remove|return|set|skip|single|start|starts\s+with|then|union|'
- r'union\s+all|unwind|using\s+periodic\s+commit|yield|where|when|'
- r'with)\b', Keyword),
+ r'delete|desc|descending|distinct|end|fieldterminator|'
+ r'foreach|in|limit|match|merge|none|not|null|'
+ r'remove|return|set|skip|single|start|then|union|'
+ r'unwind|yield|where|when|with)\b', Keyword),
],
'relations': [
(r'(-\[)(.*?)(\]->)', bygroups(Operator, using(this), Operator)),
diff --git a/pygments/lexers/graphics.py b/pygments/lexers/graphics.py
index f0b36b0c..9f3e4a44 100644
--- a/pygments/lexers/graphics.py
+++ b/pygments/lexers/graphics.py
@@ -11,7 +11,7 @@
from pygments.lexer import RegexLexer, words, include, bygroups, using, \
this, default
from pygments.token import Text, Comment, Operator, Keyword, Name, \
- Number, Punctuation, String
+ Number, Punctuation, String, Whitespace
__all__ = ['GLShaderLexer', 'PostScriptLexer', 'AsymptoteLexer', 'GnuplotLexer',
'PovrayLexer', 'HLSLShaderLexer']
@@ -30,8 +30,8 @@ class GLShaderLexer(RegexLexer):
tokens = {
'root': [
- (r'^#.*', Comment.Preproc),
- (r'//.*', Comment.Single),
+ (r'^#.*$', Comment.Preproc),
+ (r'//.*$', Comment.Single),
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
(r'\+|-|~|!=?|\*|/|%|<<|>>|<=?|>=?|==?|&&?|\^|\|\|?',
Operator),
@@ -143,7 +143,7 @@ class GLShaderLexer(RegexLexer):
(r'gl_\w*', Name.Builtin),
(r'[a-zA-Z_]\w*', Name),
(r'\.', Punctuation),
- (r'\s+', Text),
+ (r'\s+', Whitespace),
],
}
@@ -161,8 +161,8 @@ class HLSLShaderLexer(RegexLexer):
tokens = {
'root': [
- (r'^#.*', Comment.Preproc),
- (r'//.*', Comment.Single),
+ (r'^#.*$', Comment.Preproc),
+ (r'//.*$', Comment.Single),
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
(r'\+|-|~|!=?|\*|/|%|<<|>>|<=?|>=?|==?|&&?|\^|\|\|?',
Operator),
@@ -289,7 +289,7 @@ class HLSLShaderLexer(RegexLexer):
Name.Decorator), # attributes
(r'[a-zA-Z_]\w*', Name),
(r'\\$', Comment.Preproc), # backslash at end of line -- usually macro continuation
- (r'\s+', Text),
+ (r'\s+', Whitespace),
],
'string': [
(r'"', String, '#pop'),
@@ -326,10 +326,10 @@ class PostScriptLexer(RegexLexer):
tokens = {
'root': [
# All comment types
- (r'^%!.+\n', Comment.Preproc),
- (r'%%.*\n', Comment.Special),
+ (r'^%!.+$', Comment.Preproc),
+ (r'%%.*$', Comment.Special),
(r'(^%.*\n){2,}', Comment.Multiline),
- (r'%.*\n', Comment.Single),
+ (r'%.*$', Comment.Single),
# String literals are awkward; enter separate state.
(r'\(', String, 'stringliteral'),
@@ -383,7 +383,7 @@ class PostScriptLexer(RegexLexer):
'undefinedfilename', 'undefinedresult'), suffix=delimiter_end),
Name.Builtin),
- (r'\s+', Text),
+ (r'\s+', Whitespace),
],
'stringliteral': [
@@ -416,9 +416,9 @@ class AsymptoteLexer(RegexLexer):
tokens = {
'whitespace': [
- (r'\n', Text),
- (r'\s+', Text),
- (r'\\\n', Text), # line continuation
+ (r'\n', Whitespace),
+ (r'\s+', Whitespace),
+ (r'(\\)(\n)', bygroups(Text, Whitespace)), # line continuation
(r'//(\n|(.|\n)*?[^\\]\n)', Comment),
(r'/(\\\n)?\*(.|\n)*?\*(\\\n)?/', Comment),
],
@@ -562,9 +562,9 @@ class GnuplotLexer(RegexLexer):
'she$ll', 'test$'),
Keyword, 'noargs'),
(r'([a-zA-Z_]\w*)(\s*)(=)',
- bygroups(Name.Variable, Text, Operator), 'genericargs'),
+ bygroups(Name.Variable, Whitespace, Operator), 'genericargs'),
(r'([a-zA-Z_]\w*)(\s*\(.*?\)\s*)(=)',
- bygroups(Name.Function, Text, Operator), 'genericargs'),
+ bygroups(Name.Function, Whitespace, Operator), 'genericargs'),
(r'@[a-zA-Z_]\w*', Name.Constant), # macros
(r';', Keyword),
],
@@ -577,13 +577,13 @@ class GnuplotLexer(RegexLexer):
],
'whitespace': [
('#', Comment, 'comment'),
- (r'[ \t\v\f]+', Text),
+ (r'[ \t\v\f]+', Whitespace),
],
'noargs': [
include('whitespace'),
# semicolon and newline end the argument list
(r';', Punctuation, '#pop'),
- (r'\n', Text, '#pop'),
+ (r'\n', Whitespace, '#pop'),
],
'dqstring': [
(r'"', String, '#pop'),
@@ -591,7 +591,7 @@ class GnuplotLexer(RegexLexer):
(r'[^\\"\n]+', String), # all other characters
(r'\\\n', String), # line continuation
(r'\\', String), # stray backslash
- (r'\n', String, '#pop'), # newline ends the string too
+ (r'\n', Whitespace, '#pop'), # newline ends the string too
],
'sqstring': [
(r"''", String), # escaped single quote
@@ -599,7 +599,7 @@ class GnuplotLexer(RegexLexer):
(r"[^\\'\n]+", String), # all other characters
(r'\\\n', String), # line continuation
(r'\\', String), # normal backslash
- (r'\n', String, '#pop'), # newline ends the string too
+ (r'\n', Whitespace, '#pop'), # newline ends the string too
],
'genericargs': [
include('noargs'),
@@ -615,7 +615,7 @@ class GnuplotLexer(RegexLexer):
bygroups(Name.Function, Text, Punctuation)),
(r'[a-zA-Z_]\w*', Name),
(r'@[a-zA-Z_]\w*', Name.Constant), # macros
- (r'\\\n', Text),
+ (r'(\\)(\n)', bygroups(Text, Whitespace)),
],
'optionarg': [
include('whitespace'),
@@ -700,7 +700,7 @@ class PovrayLexer(RegexLexer):
tokens = {
'root': [
(r'/\*[\w\W]*?\*/', Comment.Multiline),
- (r'//.*\n', Comment.Single),
+ (r'//.*$', Comment.Single),
(r'(?s)"(?:\\.|[^"\\])+"', String.Double),
(words((
'break', 'case', 'debug', 'declare', 'default', 'define', 'else',
@@ -775,7 +775,7 @@ class PovrayLexer(RegexLexer):
(r'\.[0-9]+', Number.Float),
(r'[0-9]+', Number.Integer),
(r'"(\\\\|\\[^\\]|[^"\\])*"', String),
- (r'\s+', Text),
+ (r'\s+', Whitespace),
]
}
diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py
index 74cd8dcc..6ab0f334 100644
--- a/pygments/lexers/haskell.py
+++ b/pygments/lexers/haskell.py
@@ -13,7 +13,7 @@ import re
from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
default, include, inherit
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation, Generic
+ Number, Punctuation, Generic, Whitespace
from pygments import unistring as uni
__all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
@@ -48,7 +48,7 @@ class HaskellLexer(RegexLexer):
tokens = {
'root': [
# Whitespace:
- (r'\s+', Text),
+ (r'\s+', Whitespace),
# (r'--\s*|.*$', Comment.Doc),
(r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
(r'\{-', Comment.Multiline, 'comment'),
@@ -91,31 +91,31 @@ class HaskellLexer(RegexLexer):
],
'import': [
# Import statements
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'"', String, 'string'),
# after "funclist" state
(r'\)', Punctuation, '#pop'),
(r'qualified\b', Keyword),
# import X as Y
(r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
- bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
+ bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
# import X hiding (functions)
(r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
- bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
+ bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
# import X (functions)
(r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+ bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
# import X
(r'[\w.]+', Name.Namespace, '#pop'),
],
'module': [
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+ bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
(r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
],
'funclist': [
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'[' + uni.Lu + r']\w*', Keyword.Type),
(r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
(r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
@@ -153,7 +153,7 @@ class HaskellLexer(RegexLexer):
(r'o[0-7]+', String.Escape, '#pop'),
(r'x[\da-fA-F]+', String.Escape, '#pop'),
(r'\d+', String.Escape, '#pop'),
- (r'\s+\\', String.Escape, '#pop'),
+ (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
],
}
@@ -172,9 +172,9 @@ class HspecLexer(HaskellLexer):
tokens = {
'root': [
- (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
- (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
- (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
+ (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
+ (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
+ (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
inherit,
],
}
@@ -215,16 +215,16 @@ class IdrisLexer(RegexLexer):
'root': [
# Comments
(r'^(\s*)(%%(%s))' % '|'.join(directives),
- bygroups(Text, Keyword.Reserved)),
- (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
- (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
- (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
+ bygroups(Whitespace, Keyword.Reserved)),
+ (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),
+ (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),
+ (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),
# Declaration
(r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
- bygroups(Text, Name.Function, Text, Operator.Word, Text)),
+ bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
# Identifiers
(r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
- (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
+ (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
(r"('')?[A-Z][\w\']*", Keyword.Type),
(r'[a-z][\w\']*', Text),
# Special Symbols
@@ -239,16 +239,16 @@ class IdrisLexer(RegexLexer):
(r"'", String.Char, 'character'),
(r'"', String, 'string'),
(r'[^\s(){}]+', Text),
- (r'\s+?', Text), # Whitespace
+ (r'\s+?', Whitespace), # Whitespace
],
'module': [
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'([A-Z][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+ bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
(r'[A-Z][\w.]*', Name.Namespace, '#pop'),
],
'funclist': [
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'[A-Z]\w*', Keyword.Type),
(r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
(r'--.*$', Comment.Single),
@@ -286,7 +286,7 @@ class IdrisLexer(RegexLexer):
(r'o[0-7]+', String.Escape, '#pop'),
(r'x[\da-fA-F]+', String.Escape, '#pop'),
(r'\d+', String.Escape, '#pop'),
- (r'\s+\\', String.Escape, '#pop')
+ (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')
],
}
@@ -316,7 +316,7 @@ class AgdaLexer(RegexLexer):
'root': [
# Declaration
(r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
- bygroups(Text, Name.Function, Text, Operator.Word, Text)),
+ bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
# Comments
(r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
(r'\{-', Comment.Multiline, 'comment'),
@@ -325,7 +325,7 @@ class AgdaLexer(RegexLexer):
# Lexemes:
# Identifiers
(r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
- (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
+ (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
(r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
# Special Symbols
(r'(\(|\)|\{|\})', Operator),
@@ -339,7 +339,7 @@ class AgdaLexer(RegexLexer):
(r"'", String.Char, 'character'),
(r'"', String, 'string'),
(r'[^\s(){}]+', Text),
- (r'\s+?', Text), # Whitespace
+ (r'\s+?', Whitespace), # Whitespace
],
'hole': [
# Holes
@@ -383,7 +383,7 @@ class CryptolLexer(RegexLexer):
tokens = {
'root': [
# Whitespace:
- (r'\s+', Text),
+ (r'\s+', Whitespace),
# (r'--\s*|.*$', Comment.Doc),
(r'//.*$', Comment.Single),
(r'/\*', Comment.Multiline, 'comment'),
@@ -417,31 +417,31 @@ class CryptolLexer(RegexLexer):
],
'import': [
# Import statements
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'"', String, 'string'),
# after "funclist" state
(r'\)', Punctuation, '#pop'),
(r'qualified\b', Keyword),
# import X as Y
(r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
- bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
+ bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
# import X hiding (functions)
(r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
- bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
+ bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
# import X (functions)
(r'([A-Z][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+ bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
# import X
(r'[\w.]+', Name.Namespace, '#pop'),
],
'module': [
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'([A-Z][\w.]*)(\s+)(\()',
- bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+ bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
(r'[A-Z][\w.]*', Name.Namespace, '#pop'),
],
'funclist': [
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'[A-Z]\w*', Keyword.Type),
(r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
# TODO: these don't match the comments in docs, remove.
@@ -478,7 +478,7 @@ class CryptolLexer(RegexLexer):
(r'o[0-7]+', String.Escape, '#pop'),
(r'x[\da-fA-F]+', String.Escape, '#pop'),
(r'\d+', String.Escape, '#pop'),
- (r'\s+\\', String.Escape, '#pop'),
+ (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
],
}
@@ -720,30 +720,30 @@ class KokaLexer(RegexLexer):
# go into type mode
(r'::?' + sboundary, tokenType, 'type'),
- (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+ (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
'alias-type'),
- (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+ (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
'struct-type'),
((r'(%s)' % '|'.join(typeStartKeywords)) +
- r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+ r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
'type'),
# special sequences of tokens (we use ?: for non-capturing group as
# required by 'bygroups')
- (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
- bygroups(Keyword, Text, Keyword, Name.Namespace)),
+ (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),
(r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
- r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
+ r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'
r'((?:[a-z]\w*/)*[a-z]\w*))?',
- bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
- Keyword, Name.Namespace)),
+ bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,
+ Keyword, Whitespace, Name.Namespace)),
- (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
+ (r'^(public|private)?(\s+)?(function|fun|val)'
r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
- bygroups(Keyword, Text, Name.Function)),
- (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
+ (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'
r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
- bygroups(Keyword, Text, Keyword, Name.Function)),
+ bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
# keywords
(r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
@@ -800,7 +800,7 @@ class KokaLexer(RegexLexer):
(r'[(\[<]', tokenType, 'type-nested'),
(r',', tokenType),
(r'([a-z]\w*)(\s*)(:)(?!:)',
- bygroups(Name, Text, tokenType)), # parameter name
+ bygroups(Name, Whitespace, tokenType)), # parameter name
include('type-content')
],
@@ -833,8 +833,8 @@ class KokaLexer(RegexLexer):
# comments and literals
'whitespace': [
- (r'\n\s*#.*$', Comment.Preproc),
- (r'\s+', Text),
+ (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),
+ (r'\s+', Whitespace),
(r'/\*', Comment.Multiline, 'comment'),
(r'//.*$', Comment.Single)
],
diff --git a/pygments/lexers/haxe.py b/pygments/lexers/haxe.py
index 69e35693..ee587e99 100644
--- a/pygments/lexers/haxe.py
+++ b/pygments/lexers/haxe.py
@@ -103,7 +103,7 @@ class HaxeLexer(ExtendedRegexLexer):
# space/tab/comment/preproc
'spaces': [
- (r'\s+', Text),
+ (r'\s+', Whitespace),
(r'//[^\n\r]*', Comment.Single),
(r'/\*.*?\*/', Comment.Multiline),
(r'(#)(if|elseif|else|end|error)\b', preproc_callback),
@@ -163,14 +163,14 @@ class HaxeLexer(ExtendedRegexLexer):
],
'preproc-error': [
- (r'\s+', Comment.Preproc),
+ (r'\s+', Whitespace),
(r"'", String.Single, ('#pop', 'string-single')),
(r'"', String.Double, ('#pop', 'string-double')),
default('#pop'),
],
'preproc-expr': [
- (r'\s+', Comment.Preproc),
+ (r'\s+', Whitespace),
(r'\!', Comment.Preproc),
(r'\(', Comment.Preproc, ('#pop', 'preproc-parenthesis')),
@@ -193,20 +193,20 @@ class HaxeLexer(ExtendedRegexLexer):
],
'preproc-parenthesis': [
- (r'\s+', Comment.Preproc),
+ (r'\s+', Whitespace),
(r'\)', Comment.Preproc, '#pop'),
default('preproc-expr-in-parenthesis'),
],
'preproc-expr-chain': [
- (r'\s+', Comment.Preproc),
+ (r'\s+', Whitespace),
(binop, Comment.Preproc, ('#pop', 'preproc-expr-in-parenthesis')),
default('#pop'),
],
# same as 'preproc-expr' but able to chain 'preproc-expr-chain'
'preproc-expr-in-parenthesis': [
- (r'\s+', Comment.Preproc),
+ (r'\s+', Whitespace),
(r'\!', Comment.Preproc),
(r'\(', Comment.Preproc,
('#pop', 'preproc-expr-chain', 'preproc-parenthesis')),
@@ -919,7 +919,7 @@ class HxmlLexer(RegexLexer):
bygroups(Punctuation, Keyword, Whitespace, String)),
# Options that take only numerical arguments
(r'(-)(swf-version)( +)(\d+)',
- bygroups(Punctuation, Keyword, Number.Integer)),
+ bygroups(Punctuation, Keyword, Whitespace, Number.Integer)),
# An Option that defines the size, the fps and the background
# color of an flash movie
(r'(-)(swf-header)( +)(\d+)(:)(\d+)(:)(\d+)(:)([A-Fa-f0-9]{6})',
diff --git a/pygments/lexers/hdl.py b/pygments/lexers/hdl.py
index 3b49f537..e96f79a4 100644
--- a/pygments/lexers/hdl.py
+++ b/pygments/lexers/hdl.py
@@ -12,7 +12,7 @@ import re
from pygments.lexer import RegexLexer, bygroups, include, using, this, words
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
- Number, Punctuation
+ Number, Punctuation, Whitespace
__all__ = ['VerilogLexer', 'SystemVerilogLexer', 'VhdlLexer']
@@ -34,9 +34,8 @@ class VerilogLexer(RegexLexer):
tokens = {
'root': [
(r'^\s*`define', Comment.Preproc, 'macro'),
- (r'\n', Text),
- (r'\s+', Text),
- (r'\\\n', Text), # line continuation
+ (r'\s+', Whitespace),
+ (r'(\\)(\n)', bygroups(String.Escape, Whitespace)), # line continuation
(r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
(r'[{}#@]', Punctuation),
@@ -54,8 +53,8 @@ class VerilogLexer(RegexLexer):
(r'[()\[\],.;\']', Punctuation),
(r'`[a-zA-Z_]\w*', Name.Constant),
- (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)),
- (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text),
+ (r'^(\s*)(package)(\s+)', bygroups(Whitespace, Keyword.Namespace, Text)),
+ (r'^(\s*)(import)(\s+)', bygroups(Whitespace, Keyword.Namespace, Text),
'import'),
(words((
@@ -114,7 +113,7 @@ class VerilogLexer(RegexLexer):
(r'"', String, '#pop'),
(r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
(r'[^\\"\n]+', String), # all other characters
- (r'\\\n', String), # line continuation
+ (r'(\\)(\n)', bygroups(String.Escape, Whitespace)), # line continuation
(r'\\', String), # stray backslash
],
'macro': [
@@ -123,7 +122,7 @@ class VerilogLexer(RegexLexer):
(r'//.*?\n', Comment.Single, '#pop'),
(r'/', Comment.Preproc),
(r'(?<=\\)\n', Comment.Preproc),
- (r'\n', Comment.Preproc, '#pop'),
+ (r'\n', Whitespace, '#pop'),
],
'import': [
(r'[\w:]+\*?', Name.Namespace, '#pop')
@@ -161,13 +160,12 @@ class SystemVerilogLexer(RegexLexer):
tokens = {
'root': [
- (r'^\s*`define', Comment.Preproc, 'macro'),
- (r'^(\s*)(package)(\s+)', bygroups(Text, Keyword.Namespace, Text)),
- (r'^(\s*)(import)(\s+)', bygroups(Text, Keyword.Namespace, Text), 'import'),
+ (r'^(\s*)(`define)', bygroups(Whitespace, Comment.Preproc), 'macro'),
+ (r'^(\s*)(package)(\s+)', bygroups(Whitespace, Keyword.Namespace, Whitespace)),
+ (r'^(\s*)(import)(\s+)', bygroups(Whitespace, Keyword.Namespace, Whitespace), 'import'),
- (r'\n', Text),
- (r'\s+', Text),
- (r'\\\n', Text), # line continuation
+ (r'\s+', Whitespace),
+ (r'(\\)(\n)', bygroups(String.Escape, Whitespace)), # line continuation
(r'/(\\\n)?/(\n|(.|\n)*?[^\\]\n)', Comment.Single),
(r'/(\\\n)?[*](.|\n)*?[*](\\\n)?/', Comment.Multiline),
(r'[{}#@]', Punctuation),
@@ -240,11 +238,11 @@ class SystemVerilogLexer(RegexLexer):
Keyword),
(r'(class)(\s+)([a-zA-Z_]\w*)',
- bygroups(Keyword.Declaration, Text, Name.Class)),
+ bygroups(Keyword.Declaration, Whitespace, Name.Class)),
(r'(extends)(\s+)([a-zA-Z_]\w*)',
- bygroups(Keyword.Declaration, Text, Name.Class)),
+ bygroups(Keyword.Declaration, Whitespace, Name.Class)),
(r'(endclass\b)(?:(\s*)(:)(\s*)([a-zA-Z_]\w*))?',
- bygroups(Keyword.Declaration, Text, Punctuation, Text, Name.Class)),
+ bygroups(Keyword.Declaration, Whitespace, Punctuation, Whitespace, Name.Class)),
(words((
# Variable types
@@ -355,16 +353,16 @@ class SystemVerilogLexer(RegexLexer):
(r'"', String, '#pop'),
(r'\\([\\abfnrtv"\']|x[a-fA-F0-9]{2,4}|[0-7]{1,3})', String.Escape),
(r'[^\\"\n]+', String), # all other characters
- (r'\\\n', String), # line continuation
+ (r'(\\)(\n)', bygroups(String.Escape, Whitespace)), # line continuation
(r'\\', String), # stray backslash
],
'macro': [
(r'[^/\n]+', Comment.Preproc),
(r'/[*](.|\n)*?[*]/', Comment.Multiline),
- (r'//.*?\n', Comment.Single, '#pop'),
+ (r'//.*?$', Comment.Single, '#pop'),
(r'/', Comment.Preproc),
(r'(?<=\\)\n', Comment.Preproc),
- (r'\n', Comment.Preproc, '#pop'),
+ (r'\n', Whitespace, '#pop'),
],
'import': [
(r'[\w:]+\*?', Name.Namespace, '#pop')
@@ -386,9 +384,8 @@ class VhdlLexer(RegexLexer):
tokens = {
'root': [
- (r'\n', Text),
- (r'\s+', Text),
- (r'\\\n', Text), # line continuation
+ (r'\s+', Whitespace),
+ (r'(\\)(\n)', bygroups(String.Escape, Whitespace)), # line continuation
(r'--.*?$', Comment.Single),
(r"'(U|X|0|1|Z|W|L|H|-)'", String.Char),
(r'[~!%^&*+=|?:<>/-]', Operator),
@@ -397,25 +394,25 @@ class VhdlLexer(RegexLexer):
(r'"[^\n\\"]*"', String),
(r'(library)(\s+)([a-z_]\w*)',
- bygroups(Keyword, Text, Name.Namespace)),
- (r'(use)(\s+)(entity)', bygroups(Keyword, Text, Keyword)),
+ bygroups(Keyword, Whitespace, Name.Namespace)),
+ (r'(use)(\s+)(entity)', bygroups(Keyword, Whitespace, Keyword)),
(r'(use)(\s+)([a-z_][\w.]*\.)(all)',
- bygroups(Keyword, Text, Name.Namespace, Keyword)),
+ bygroups(Keyword, Whitespace, Name.Namespace, Keyword)),
(r'(use)(\s+)([a-z_][\w.]*)',
- bygroups(Keyword, Text, Name.Namespace)),
+ bygroups(Keyword, Whitespace, Name.Namespace)),
(r'(std|ieee)(\.[a-z_]\w*)',
bygroups(Name.Namespace, Name.Namespace)),
(words(('std', 'ieee', 'work'), suffix=r'\b'),
Name.Namespace),
(r'(entity|component)(\s+)([a-z_]\w*)',
- bygroups(Keyword, Text, Name.Class)),
+ bygroups(Keyword, Whitespace, Name.Class)),
(r'(architecture|configuration)(\s+)([a-z_]\w*)(\s+)'
r'(of)(\s+)([a-z_]\w*)(\s+)(is)',
- bygroups(Keyword, Text, Name.Class, Text, Keyword, Text,
- Name.Class, Text, Keyword)),
+ bygroups(Keyword, Whitespace, Name.Class, Whitespace, Keyword, Whitespace,
+ Name.Class, Whitespace, Keyword)),
(r'([a-z_]\w*)(:)(\s+)(process|for)',
- bygroups(Name.Class, Operator, Text, Keyword)),
- (r'(end)(\s+)', bygroups(using(this), Text), 'endblock'),
+ bygroups(Name.Class, Operator, Whitespace, Keyword)),
+ (r'(end)(\s+)', bygroups(using(this), Whitespace), 'endblock'),
include('types'),
include('keywords'),
@@ -426,7 +423,7 @@ class VhdlLexer(RegexLexer):
'endblock': [
include('keywords'),
(r'[a-z_]\w*', Name.Class),
- (r'(\s+)', Text),
+ (r'\s+', Whitespace),
(r';', Punctuation, '#pop'),
],
'types': [
diff --git a/pygments/lexers/hexdump.py b/pygments/lexers/hexdump.py
index f93bda76..041d7f6c 100644
--- a/pygments/lexers/hexdump.py
+++ b/pygments/lexers/hexdump.py
@@ -56,8 +56,8 @@ class HexdumpLexer(RegexLexer):
bygroups(Whitespace, Punctuation, String, Punctuation)),
(r'(\s{2,3})(\|)(.{1,15})(\|)$',
bygroups(Whitespace, Punctuation, String, Punctuation)),
- (r'(\s{2,3})(.{1,15})$', bygroups(Text, String)),
- (r'(\s{2,3})(.{16}|.{20})$', bygroups(Text, String), 'nonpiped-strings'),
+ (r'(\s{2,3})(.{1,15})$', bygroups(Whitespace, String)),
+ (r'(\s{2,3})(.{16}|.{20})$', bygroups(Whitespace, String), 'nonpiped-strings'),
(r'\s', Whitespace),
(r'^\*', Punctuation),
],