Modernize Whitespace token in lexer: haskell

author: Sebastian Engel <dev@sebastianengel.eu> 2021-12-03 15:40:45 +0100
committer: Sebastian Engel <dev@sebastianengel.eu> 2021-12-03 15:40:45 +0100
commit: c1c4f9dd79ce258d6b55b92767e46bd980811165 (patch)
tree: 11dffee5995b2f620ee4bc066c495a65648250cc /pygments/lexers/haskell.py
parent: a6a0da0e079bcc4a9e108d3641f42d78c01b4f48 (diff)
download: pygments-git-c1c4f9dd79ce258d6b55b92767e46bd980811165.tar.gz
1 files changed, 51 insertions, 51 deletions
diff --git a/pygments/lexers/haskell.py b/pygments/lexers/haskell.py
index 74cd8dcc..6ab0f334 100644
--- a/pygments/lexers/haskell.py
+++ b/pygments/lexers/haskell.py
@@ -13,7 +13,7 @@ import re
 from pygments.lexer import Lexer, RegexLexer, bygroups, do_insertions, \
     default, include, inherit
 from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
-    Number, Punctuation, Generic
+    Number, Punctuation, Generic, Whitespace
 from pygments import unistring as uni
 
 __all__ = ['HaskellLexer', 'HspecLexer', 'IdrisLexer', 'AgdaLexer', 'CryptolLexer',
@@ -48,7 +48,7 @@ class HaskellLexer(RegexLexer):
     tokens = {
         'root': [
             # Whitespace:
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             # (r'--\s*|.*$', Comment.Doc),
             (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
             (r'\{-', Comment.Multiline, 'comment'),
@@ -91,31 +91,31 @@ class HaskellLexer(RegexLexer):
         ],
         'import': [
             # Import statements
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'"', String, 'string'),
             # after "funclist" state
             (r'\)', Punctuation, '#pop'),
             (r'qualified\b', Keyword),
             # import X as Y
             (r'([' + uni.Lu + r'][\w.]*)(\s+)(as)(\s+)([' + uni.Lu + r'][\w.]*)',
-             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
+             bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
             # import X hiding (functions)
             (r'([' + uni.Lu + r'][\w.]*)(\s+)(hiding)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
             # import X (functions)
             (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             # import X
             (r'[\w.]+', Name.Namespace, '#pop'),
         ],
         'module': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'([' + uni.Lu + r'][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             (r'[' + uni.Lu + r'][\w.]*', Name.Namespace, '#pop'),
         ],
         'funclist': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'[' + uni.Lu + r']\w*', Keyword.Type),
             (r'(_[\w\']+|[' + uni.Ll + r'][\w\']*)', Name.Function),
             (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
@@ -153,7 +153,7 @@ class HaskellLexer(RegexLexer):
             (r'o[0-7]+', String.Escape, '#pop'),
             (r'x[\da-fA-F]+', String.Escape, '#pop'),
             (r'\d+', String.Escape, '#pop'),
-            (r'\s+\\', String.Escape, '#pop'),
+            (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
         ],
     }
 
@@ -172,9 +172,9 @@ class HspecLexer(HaskellLexer):
 
     tokens = {
         'root': [
-            (r'(it\s*)("[^"]*")', bygroups(Text, String.Doc)),
-            (r'(describe\s*)("[^"]*")', bygroups(Text, String.Doc)),
-            (r'(context\s*)("[^"]*")', bygroups(Text, String.Doc)),
+            (r'(it)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
+            (r'(describe)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
+            (r'(context)(\s*)("[^"]*")', bygroups(Text, Whitespace, String.Doc)),
             inherit,
         ],
     }
@@ -215,16 +215,16 @@ class IdrisLexer(RegexLexer):
         'root': [
             # Comments
             (r'^(\s*)(%%(%s))' % '|'.join(directives),
-             bygroups(Text, Keyword.Reserved)),
-            (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Text, Comment.Single)),
-            (r'(\s*)(\|{3}.*?)$', bygroups(Text, Comment.Single)),
-            (r'(\s*)(\{-)', bygroups(Text, Comment.Multiline), 'comment'),
+             bygroups(Whitespace, Keyword.Reserved)),
+            (r'(\s*)(--(?![!#$%&*+./<=>?@^|_~:\\]).*?)$', bygroups(Whitespace, Comment.Single)),
+            (r'(\s*)(\|{3}.*?)$', bygroups(Whitespace, Comment.Single)),
+            (r'(\s*)(\{-)', bygroups(Whitespace, Comment.Multiline), 'comment'),
             # Declaration
             (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
-             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
+             bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
             #  Identifiers
             (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
-            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
+            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
             (r"('')?[A-Z][\w\']*", Keyword.Type),
             (r'[a-z][\w\']*', Text),
             #  Special Symbols
@@ -239,16 +239,16 @@ class IdrisLexer(RegexLexer):
             (r"'", String.Char, 'character'),
             (r'"', String, 'string'),
             (r'[^\s(){}]+', Text),
-            (r'\s+?', Text),  # Whitespace
+            (r'\s+?', Whitespace),  # Whitespace
         ],
         'module': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'([A-Z][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
         ],
         'funclist': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'[A-Z]\w*', Keyword.Type),
             (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
             (r'--.*$', Comment.Single),
@@ -286,7 +286,7 @@ class IdrisLexer(RegexLexer):
             (r'o[0-7]+', String.Escape, '#pop'),
             (r'x[\da-fA-F]+', String.Escape, '#pop'),
             (r'\d+', String.Escape, '#pop'),
-            (r'\s+\\', String.Escape, '#pop')
+            (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop')
         ],
     }
 
@@ -316,7 +316,7 @@ class AgdaLexer(RegexLexer):
         'root': [
             # Declaration
             (r'^(\s*)([^\s(){}]+)(\s*)(:)(\s*)',
-             bygroups(Text, Name.Function, Text, Operator.Word, Text)),
+             bygroups(Whitespace, Name.Function, Whitespace, Operator.Word, Whitespace)),
             # Comments
             (r'--(?![!#$%&*+./<=>?@^|_~:\\]).*?$', Comment.Single),
             (r'\{-', Comment.Multiline, 'comment'),
@@ -325,7 +325,7 @@ class AgdaLexer(RegexLexer):
             # Lexemes:
             #  Identifiers
             (r'\b(%s)(?!\')\b' % '|'.join(reserved), Keyword.Reserved),
-            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Text), 'module'),
+            (r'(import|module)(\s+)', bygroups(Keyword.Reserved, Whitespace), 'module'),
             (r'\b(Set|Prop)[\u2080-\u2089]*\b', Keyword.Type),
             #  Special Symbols
             (r'(\(|\)|\{|\})', Operator),
@@ -339,7 +339,7 @@ class AgdaLexer(RegexLexer):
             (r"'", String.Char, 'character'),
             (r'"', String, 'string'),
             (r'[^\s(){}]+', Text),
-            (r'\s+?', Text),  # Whitespace
+            (r'\s+?', Whitespace),  # Whitespace
         ],
         'hole': [
             # Holes
@@ -383,7 +383,7 @@ class CryptolLexer(RegexLexer):
     tokens = {
         'root': [
             # Whitespace:
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             # (r'--\s*|.*$', Comment.Doc),
             (r'//.*$', Comment.Single),
             (r'/\*', Comment.Multiline, 'comment'),
@@ -417,31 +417,31 @@ class CryptolLexer(RegexLexer):
         ],
         'import': [
             # Import statements
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'"', String, 'string'),
             # after "funclist" state
             (r'\)', Punctuation, '#pop'),
             (r'qualified\b', Keyword),
             # import X as Y
             (r'([A-Z][\w.]*)(\s+)(as)(\s+)([A-Z][\w.]*)',
-             bygroups(Name.Namespace, Text, Keyword, Text, Name), '#pop'),
+             bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Name), '#pop'),
             # import X hiding (functions)
             (r'([A-Z][\w.]*)(\s+)(hiding)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Keyword, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Keyword, Whitespace, Punctuation), 'funclist'),
             # import X (functions)
             (r'([A-Z][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             # import X
             (r'[\w.]+', Name.Namespace, '#pop'),
         ],
         'module': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'([A-Z][\w.]*)(\s+)(\()',
-             bygroups(Name.Namespace, Text, Punctuation), 'funclist'),
+             bygroups(Name.Namespace, Whitespace, Punctuation), 'funclist'),
             (r'[A-Z][\w.]*', Name.Namespace, '#pop'),
         ],
         'funclist': [
-            (r'\s+', Text),
+            (r'\s+', Whitespace),
             (r'[A-Z]\w*', Keyword.Type),
             (r'(_[\w\']+|[a-z][\w\']*)', Name.Function),
             # TODO: these don't match the comments in docs, remove.
@@ -478,7 +478,7 @@ class CryptolLexer(RegexLexer):
             (r'o[0-7]+', String.Escape, '#pop'),
             (r'x[\da-fA-F]+', String.Escape, '#pop'),
             (r'\d+', String.Escape, '#pop'),
-            (r'\s+\\', String.Escape, '#pop'),
+            (r'(\s+)(\\)', bygroups(Whitespace, String.Escape), '#pop'),
         ],
     }
 
@@ -720,30 +720,30 @@ class KokaLexer(RegexLexer):
 
             # go into type mode
             (r'::?' + sboundary, tokenType, 'type'),
-            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+            (r'(alias)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
              'alias-type'),
-            (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+            (r'(struct)(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
              'struct-type'),
             ((r'(%s)' % '|'.join(typeStartKeywords)) +
-             r'(\s+)([a-z]\w*)?', bygroups(Keyword, Text, tokenTypeDef),
+             r'(\s+)([a-z]\w*)?', bygroups(Keyword, Whitespace, tokenTypeDef),
              'type'),
 
             # special sequences of tokens (we use ?: for non-capturing group as
             # required by 'bygroups')
-            (r'(module)(\s+)(interface\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
-             bygroups(Keyword, Text, Keyword, Name.Namespace)),
+            (r'(module)(\s+)(interface(?=\s))?(\s+)?((?:[a-z]\w*/)*[a-z]\w*)',
+             bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Namespace)),
             (r'(import)(\s+)((?:[a-z]\w*/)*[a-z]\w*)'
-             r'(?:(\s*)(=)(\s*)((?:qualified\s*)?)'
+             r'(?:(\s*)(=)(\s*)(qualified)?(\s*)'
              r'((?:[a-z]\w*/)*[a-z]\w*))?',
-             bygroups(Keyword, Text, Name.Namespace, Text, Keyword, Text,
-                      Keyword, Name.Namespace)),
+             bygroups(Keyword, Whitespace, Name.Namespace, Whitespace, Keyword, Whitespace,
+                      Keyword, Whitespace, Name.Namespace)),
 
-            (r'(^(?:(?:public|private)\s*)?(?:function|fun|val))'
+            (r'^(public|private)?(\s+)?(function|fun|val)'
              r'(\s+)([a-z]\w*|\((?:' + symbols + r'|/)\))',
-             bygroups(Keyword, Text, Name.Function)),
-            (r'(^(?:(?:public|private)\s*)?external)(\s+)(inline\s+)?'
+             bygroups(Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
+            (r'^(?:(public|private)(?=\s+external))?((?<!^)\s+)?(external)(\s+)(inline(?=\s))?(\s+)?'
              r'([a-z]\w*|\((?:' + symbols + r'|/)\))',
-             bygroups(Keyword, Text, Keyword, Name.Function)),
+             bygroups(Keyword, Whitespace, Keyword, Whitespace, Keyword, Whitespace, Name.Function)),
 
             # keywords
             (r'(%s)' % '|'.join(typekeywords) + boundary, Keyword.Type),
@@ -800,7 +800,7 @@ class KokaLexer(RegexLexer):
             (r'[(\[<]', tokenType, 'type-nested'),
             (r',', tokenType),
             (r'([a-z]\w*)(\s*)(:)(?!:)',
-             bygroups(Name, Text, tokenType)),  # parameter name
+             bygroups(Name, Whitespace, tokenType)),  # parameter name
             include('type-content')
         ],
 
@@ -833,8 +833,8 @@ class KokaLexer(RegexLexer):
 
         # comments and literals
         'whitespace': [
-            (r'\n\s*#.*$', Comment.Preproc),
-            (r'\s+', Text),
+            (r'(\n\s*)(#.*)$', bygroups(Whitespace, Comment.Preproc)),
+            (r'\s+', Whitespace),
             (r'/\*', Comment.Multiline, 'comment'),
             (r'//.*$', Comment.Single)
         ],
author	Sebastian Engel <dev@sebastianengel.eu>	2021-12-03 15:40:45 +0100
committer	Sebastian Engel <dev@sebastianengel.eu>	2021-12-03 15:40:45 +0100
commit	c1c4f9dd79ce258d6b55b92767e46bd980811165 (patch)
tree	11dffee5995b2f620ee4bc066c495a65648250cc /pygments/lexers/haskell.py
parent	a6a0da0e079bcc4a9e108d3641f42d78c01b4f48 (diff)
download	pygments-git-c1c4f9dd79ce258d6b55b92767e46bd980811165.tar.gz